From 03116a9f8c2fc98577e153083aaf9b6a701ab8f9 Mon Sep 17 00:00:00 2001 From: Joachim Protze Date: Thu, 30 Jul 2020 09:28:17 +0200 Subject: [PATCH 001/600] [OpenMP] Use weak attribute in interface only for static library This is to address the issue reported at: https://bugs.llvm.org/show_bug.cgi?id=46863 Since weak is meaningless for a shared library interface function, this patch disables the attribute, when the OpenMP library is built as shared library. ompt_start_tool is not an interface function, but a internally called function possibly implemented by an OMPT tool. This function needs to be weak if possible to allow overwriting ompt_start_tool with a function implementation built into the application. Differential Revision: https://reviews.llvm.org/D84871 --- openmp/runtime/src/kmp_ftn_entry.h | 8 ++++---- openmp/runtime/src/kmp_os.h | 10 ++++++++-- openmp/runtime/src/ompt-specific.cpp | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index ab57907e088e3..b4b0dea0d1afa 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -939,7 +939,7 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_DEFAULT_DEVICE)(int KMP_DEREF arg) { // Get number of NON-HOST devices. // libomptarget, if loaded, provides this function in api.cpp. -int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) KMP_WEAK_ATTRIBUTE; +int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) KMP_WEAK_ATTRIBUTE_EXTERNAL; int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) { #if KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB) return 0; @@ -957,13 +957,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) { // This function always returns true when called on host device. // Compiler/libomptarget should handle when it is called inside target region. -int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) KMP_WEAK_ATTRIBUTE; +int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) KMP_WEAK_ATTRIBUTE_EXTERNAL; int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) { return 1; // This is the host } // libomptarget, if loaded, provides this function -int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) KMP_WEAK_ATTRIBUTE; +int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) KMP_WEAK_ATTRIBUTE_EXTERNAL; int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) { #if KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB) return KMP_HOST_DEVICE; @@ -1318,7 +1318,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_TASK_PRIORITY)(void) { // This function will be defined in libomptarget. When libomptarget is not // loaded, we assume we are on the host and return KMP_HOST_DEVICE. // Compiler/libomptarget will handle this if called inside target. -int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE; +int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE_EXTERNAL; int FTN_STDCALL FTN_GET_DEVICE_NUM(void) { return KMP_HOST_DEVICE; } // Compiler will ensure that this is only called from host in sequential region diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index 33735cf455c7e..c658f90975489 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -338,10 +338,16 @@ extern "C" { #define KMP_ALIAS(alias_of) __attribute__((alias(alias_of))) #endif +#if KMP_HAVE_WEAK_ATTRIBUTE && !KMP_DYNAMIC_LIB +#define KMP_WEAK_ATTRIBUTE_EXTERNAL __attribute__((weak)) +#else +#define KMP_WEAK_ATTRIBUTE_EXTERNAL /* Nothing */ +#endif + #if KMP_HAVE_WEAK_ATTRIBUTE -#define KMP_WEAK_ATTRIBUTE __attribute__((weak)) +#define KMP_WEAK_ATTRIBUTE_INTERNAL __attribute__((weak)) #else -#define KMP_WEAK_ATTRIBUTE /* Nothing */ +#define KMP_WEAK_ATTRIBUTE_INTERNAL /* Nothing */ #endif // Define KMP_VERSION_SYMBOL and KMP_EXPAND_NAME diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp index a7288f08a6614..9be699110fc6b 100644 --- a/openmp/runtime/src/ompt-specific.cpp +++ b/openmp/runtime/src/ompt-specific.cpp @@ -27,7 +27,7 @@ #define THREAD_LOCAL __thread #endif -#define OMPT_WEAK_ATTRIBUTE KMP_WEAK_ATTRIBUTE +#define OMPT_WEAK_ATTRIBUTE KMP_WEAK_ATTRIBUTE_INTERNAL //****************************************************************************** // macros From eef1bfb2d219191cee16ee24efbf2d204488696c Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Fri, 31 Jul 2020 13:18:11 +0200 Subject: [PATCH 002/600] [mlir][Linalg] Conv {1,2,3}D ops defined with TC syntax Replaced definition of named ND ConvOps with tensor comprehension syntax which reduces boilerplate code significantly. Furthermore, new ops to support TF convolutions added (without strides and dilations). Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D84628 --- .../Linalg/IR/LinalgNamedStructuredOpsSpec.tc | 52 ++++++++ .../mlir/Dialect/Linalg/IR/LinalgOps.h | 8 -- .../Dialect/Linalg/IR/LinalgStructuredOps.td | 125 ------------------ .../LinalgToStandard/LinalgToStandard.cpp | 3 - mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 80 +++++------ mlir/lib/Dialect/Linalg/Transforms/Loops.cpp | 73 +++------- mlir/test/Dialect/Linalg/invalid.mlir | 8 -- mlir/test/Dialect/Linalg/loops.mlir | 82 ++++++------ 8 files changed, 146 insertions(+), 285 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc index 056f0723e92dd..27d4330a54d5f 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc @@ -17,3 +17,55 @@ ods_def: def batch_matmul(A: f32(Batch, M, K), B: f32(Batch, K, N)) -> (C: f32(Batch, M, N)) { C(b, m, n) = std_addf(std_mulf(A(b, m, k), B(b, k, n))); } + +ods_def: +def conv_1d(I: f32(W), K: f32(KW)) -> (O: f32(W)) { + O(w) = std_addf(O(w), std_mulf(I(w + kw), K(kw))); +} + +ods_def: +def conv_1d_nwc(I: f32(N, W, C), K: f32(F, KW, C)) -> (O: f32(N, W, F)) { + O(n, w, f) = std_addf(O(n, w, f), + std_mulf(I(n, w + kw, c), K(f, kw, c))); +} + +ods_def: +def conv_1d_ncw(I: f32(N, C, W), K: f32(F, C, KW)) -> (O: f32(N, F, W)) { + O(n, f, w) = std_addf(O(n, f, w), + std_mulf(I(n, c, w + kw), K(f, c, kw))); +} + +ods_def: +def conv_2d(I: f32(H, W), K: f32(KH, KW)) -> (O: f32(H, W)) { + O(h, w) = std_addf(O(h, w), std_mulf(I(h + kh, w + kw), K(kh, kw))); +} + +ods_def: +def conv_2d_nhwc(I: f32(N, H, W, C), K: f32(F, KH, KW, C)) -> (O: f32(N, H, W, F)) { + O(n, h, w, f) = std_addf(O(n, h, w, f), + std_mulf(I(n, h + kh, w + kw, c), K(f, kh, kw, c))); +} + +ods_def: +def conv_2d_nchw(I: f32(N, C, H, W), K: f32(F, C, KH, KW)) -> (O: f32(N, F, H, W)) { + O(n, f, h, w) = std_addf(O(n, f, h, w), + std_mulf(I(n, c, h + kh, w + kw), K(f, c, kh, kw))); +} + +ods_def: +def conv_3d(I: f32(D, H, W), K: f32(KD, KH, KW)) -> (O: f32(D, H, W)) { + O(d, h, w) = std_addf(O(d, h, w), + std_mulf(I(d + kd, h + kh, w + kw), K(kd, kh, kw))); +} + +ods_def: +def conv_3d_ndhwc(I: f32(N, D, H, W, C), K: f32(F, KD, KH, KW, C)) -> (O: f32(N, D, H, W, F)) { + O(n, d, h, w, f) = std_addf(O(n, d, h, w, f), + std_mulf(I(n, d + kd, h + kh, w + kw, c), K(f, kd, kh, kw, c))); +} + +ods_def: +def conv_3d_ncdhw(I: f32(N, C, D, H, W), K: f32(F, C, KD, KH, KW)) -> (O: f32(N, F, D, H, W)) { + O(n, f, d, h, w) = std_addf(O(n, f, d, h, w), + std_mulf(I(n, c, d + kd, h + kh, w + kw), K(f, c, kd, kh, kw))); +} \ No newline at end of file diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h index 75e6599bf9fee..21bff4185abf8 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.h @@ -85,14 +85,6 @@ AffineMap extractOrIdentityMap(Optional maybeMap, unsigned rank, SmallVector concat(ArrayRef a, ArrayRef b); -/// Generates indexing maps for convolution with the following structure: -/// input: (m_1, ..., m_r, n_1, ..., n_r) -> (m_1 + n_1, ..., m_r + n_r) -/// kernel: (m_1, ..., m_r, n_1, ..., n_r) -> (n_1, ..., n_r) -/// output: (m_1, ..., m_r, n_1, ..., n_r) -> (m_1, ..., m_r) -/// where r is the rank of the input, kernel and output -llvm::Optional> -createConvNDIndexingMaps(MLIRContext *context, unsigned rank); - #include "mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterfaces.h.inc" #define GET_OP_CLASSES diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index 84ae8e440bee6..1e3321af981e6 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -180,131 +180,6 @@ def FillOp : LinalgStructured_Op<"fill", [NInputs<0>, NOutputs<1>]> { let hasFolder = 1; } -class ConvOpBase - : LinalgStructured_Op, NOutputs<1>]> { - let description = [{ - Base operation for any N-D Convolution implemented as a linalg.generic op. - - Usage: - - ```mlir - linalg.convD(%in, %filter, %out) : memref<(?x)+f32>, - memref<(?x)+f32>, - memref<(?x)+f32> - ``` - - where %in: input array - %filter: kernel or filter that will be applied on the input array - %out: output array - - and rank of the operands is *N*. - - Every child convolution is expressed as: - - ```mlir - #conv_trait = { - args_in = 2, - args_out = 1, - indexing_maps = #conv_accesses, - library_call = "linalg_conv", - iterator_types = [("parallel", "parallel")+], // `2 * rank` iterators - } - - linalg.generic #conv_trait %in, %filter, %out { - ^bb0(%a: f32, %b: f32, %c: f32) : - %d = mulf %a, %b : f32 - %e = addf %c, %d : f32 - linalg.yield %e : f32 - } : memref<(?x)+f32>, - memref<(?x)+f32>, - memref<(?x)+f32> - ``` - - where #conv_accesses depend on the rank of the operands and thus - can be found in the documentation of each N-D case. - Please note that the input array is expected to be right-padded i.e. - the size of the input is greater than or equal to the size of the output - + size of the kernel - 1. If it is not padded the behavior of the op - is undefined. - }]; - - let arguments = (ins AnyStridedMemRefOfRank, - AnyStridedMemRefOfRank, - AnyStridedMemRefOfRank); - - let extraClassDeclaration = libraryCallName # [{ - llvm::Optional> referenceIterators() { - // There are always 2 loops for each dimension of the convolution. First - // iterates output and second kernel. Since ranks of all 3 operands must - // be the same it does not matter which operand is picked to get the rank. - // Loops iterating the output can be parallelized and thus are marked as - // "parallel" while loops iterating the kernel are accumulating the - // products and therefore are marked as "reduction". - unsigned rank = getInputShapedType(0).getRank(); - SmallVector parallel(rank, getParallelIteratorTypeName()); - SmallVector reduction(rank, getReductionIteratorTypeName()); - parallel.insert(parallel.end(), reduction.begin(), reduction.end()); - return parallel; - } - - // Generates indexing maps with the following structure: - // input: (m_1, ..., m_r, n_1, ..., n_r) -> (m_1 + n_1, ..., m_r + n_r) - // kernel: (m_1, ..., m_r, n_1, ..., n_r) -> (n_1, ..., n_r) - // output: (m_1, ..., m_r, n_1, ..., n_r) -> (m_1, ..., m_r) - // where r is the rank of the input, kernel and output - llvm::Optional> referenceIndexingMaps() { - MLIRContext *context = getContext(); - unsigned rank = getInputShapedType(0).getRank(); - return createConvNDIndexingMaps(context, rank); - } - }]; - - let hasFolder = 1; - let verifier = [{ return ::verify(*this); }]; -} - -def Conv1DOp : ConvOpBase<"conv1D", 1> { - let description = [{ - *1D* convolution which uses following affine maps to access operands: - - ```mlir - #conv_accesses = [ - affine_map<(m, n) -> (m + n)>, // in - affine_map<(m, n) -> (n)>, // kernel - affine_map<(m, n) -> (m)> // out - ] - ``` - }]; -} - -def Conv2DOp : ConvOpBase<"conv2D", 2> { - let description = [{ - *2D* convolution which uses following affine maps to access operands: - - ```mlir - #conv_accesses = [ - affine_map<(m1, m2, n1, n2) -> (m1 + n1, m2 + n2)>, // in - affine_map<(m1, m2, n1, n2) -> (n1, n2)>, // kernel - affine_map<(m1, m2, n1, n2) -> (m1, m2) // out - ] - ``` - }]; -} - -def Conv3DOp : ConvOpBase<"conv3D", 3> { - let description = [{ - *3D* convolution which uses following affine maps to access operands: - - ```mlir - #conv_accesses = [ - affine_map<(m1, m2, m3, n1, n2, n3) -> (m1 + n1, m2 + n2, m3 + n3)>, // in - affine_map<(m1, m2, m3, n1, n2, n3) -> (n1, n2, n3)>, // kernel - affine_map<(m1, m2, m3, n1, n2, n3) -> (m1, m2, m3)> // out - ] - ``` - }]; -} - /// A base class for pooling operation such as conv. The arguments must contain /// optional arguments `strides`, `dilations` and `padding` with following type: /// OptionalAttr:$strides diff --git a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp index 921445bd03b1f..55ffa3f8b6e61 100644 --- a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp +++ b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp @@ -236,9 +236,6 @@ void mlir::populateLinalgToStandardConversionPatterns( LinalgOpConversion, LinalgOpConversion, LinalgOpConversion, - LinalgOpConversion, - LinalgOpConversion, - LinalgOpConversion, LinalgOpConversion, LinalgOpConversion, LinalgOpConversion>(ctx); diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index e67adf8c20420..03bd71f177169 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -986,17 +986,6 @@ static LogicalResult verifyStrideOrDilation(LinalgPoolingOp op, return success(); } -template -static LogicalResult verify(ConvNDOp op) { - auto outputType = op.getOutputShapedType(0).getElementType(); - auto inputType = op.getInputShapedType(0).getElementType(); - auto kernelType = op.getInputShapedType(1).getElementType(); - if (outputType != inputType || inputType != kernelType) - return op.emitOpError("expected all element types of operands to match"); - - return success(); -} - static LogicalResult verify(ConvOp op) { auto oType = op.output().getType().cast(); auto fType = op.filter().getType().cast(); @@ -1107,27 +1096,6 @@ mlir::linalg::weightedPoolingInputIndex(PoolingOp op, return res; } -llvm::Optional> -mlir::linalg::createConvNDIndexingMaps(MLIRContext *context, unsigned rank) { - unsigned numDims = rank * 2, idx = 0; - - SmallVector dims, in, kernel, out; - dims = makeAffineDimExprs(numDims, idx, context); - in.reserve(rank); - kernel.reserve(rank); - out.reserve(rank); - - for (unsigned i = 0; i < rank; i++) { - in.push_back(dims[i] + dims[rank + i]); - kernel.push_back(dims[rank + i]); - out.push_back(dims[i]); - } - - return SmallVector{AffineMap::get(numDims, 0, in, context), - AffineMap::get(numDims, 0, kernel, context), - AffineMap::get(numDims, 0, out, context)}; -} - #define INSTANTIATE_WEIGHTED_POOLING_INPUT_INDEX(OP_TYPE) \ template SmallVector \ mlir::linalg::weightedPoolingInputIndex( \ @@ -1209,18 +1177,6 @@ LogicalResult FillOp::fold(ArrayRef, SmallVectorImpl &) { return foldMemRefCast(*this); } -LogicalResult Conv1DOp::fold(ArrayRef, - SmallVectorImpl &) { - return foldMemRefCast(*this); -} -LogicalResult Conv2DOp::fold(ArrayRef, - SmallVectorImpl &) { - return foldMemRefCast(*this); -} -LogicalResult Conv3DOp::fold(ArrayRef, - SmallVectorImpl &) { - return foldMemRefCast(*this); -} LogicalResult GenericOp::fold(ArrayRef, SmallVectorImpl &) { return foldMemRefCast(*this); @@ -1362,3 +1318,39 @@ LogicalResult MatvecOp::fold(ArrayRef, SmallVectorImpl &) { return foldMemRefCast(*this); } +LogicalResult ConvWOp::fold(ArrayRef, + SmallVectorImpl &) { + return foldMemRefCast(*this); +} +LogicalResult ConvNWCOp::fold(ArrayRef, + SmallVectorImpl &) { + return foldMemRefCast(*this); +} +LogicalResult ConvNCWOp::fold(ArrayRef, + SmallVectorImpl &) { + return foldMemRefCast(*this); +} +LogicalResult ConvHWOp::fold(ArrayRef, + SmallVectorImpl &) { + return foldMemRefCast(*this); +} +LogicalResult ConvNHWCOp::fold(ArrayRef, + SmallVectorImpl &) { + return foldMemRefCast(*this); +} +LogicalResult ConvNCHWOp::fold(ArrayRef, + SmallVectorImpl &) { + return foldMemRefCast(*this); +} +LogicalResult ConvDHWOp::fold(ArrayRef, + SmallVectorImpl &) { + return foldMemRefCast(*this); +} +LogicalResult ConvNDHWCOp::fold(ArrayRef, + SmallVectorImpl &) { + return foldMemRefCast(*this); +} +LogicalResult ConvNCDHWOp::fold(ArrayRef, + SmallVectorImpl &) { + return foldMemRefCast(*this); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp index db29835e2caa7..281edd9a91f64 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Loops.cpp @@ -295,61 +295,6 @@ void emitScalarImplementation(ArrayRef allIvs, FillOp fillOp) { nPar > 0 ? O(ivs) = fillOp.value() : O() = fillOp.value(); } -/// Following functions emit scalar part of the N-D convolution op. -/// N-D convolution has 2N loops: -/// 1-N: Iterate over the output array *O* with iterators *m1, ..., mN*. -/// N-2N:. Iterate over the kernel *K* with iterators *n1, ..., nN*. -/// -/// The scalar part accumulates products of input array *I* values with kernel -/// ones. The accumulation expression therefore looks like: -/// O[m1, ..., mN] += I[m1 + n1, ..., mN + nN] * K[n1, ..., nN]. -/// Note that the input array has to be padded in order to prevent -/// out of bounds accesses. -template -void emitScalarImplementation(ArrayRef allIvs, Conv1DOp convOp) { - assert(convOp.hasBufferSemantics() && - "expected linalg op with buffer semantics"); - assert(allIvs.size() == 2); - Value m1(allIvs[0]); - Value n1(allIvs[1]); - IndexedValueType I(convOp.getInput(0)), K(convOp.getInput(1)), - O(convOp.getOutputBuffer(0)); - // Emit scalar form for the 1D conv case. - Value i1 = m1 + n1; - O(m1) = O(m1) + I(i1) * K(n1); -} - -template -void emitScalarImplementation(ArrayRef allIvs, Conv2DOp convOp) { - assert(convOp.hasBufferSemantics() && - "expected linalg op with buffer semantics"); - assert(allIvs.size() == 4); - Value m1(allIvs[0]), m2(allIvs[1]); - Value n1(allIvs[2]), n2(allIvs[3]); - IndexedValueType I(convOp.getInput(0)), K(convOp.getInput(1)), - O(convOp.getOutputBuffer(0)); - // Emit scalar form for the 2D conv case. - Value i1 = m1 + n1; - Value i2 = m2 + n2; - O(m1, m2) = O(m1, m2) + I(i1, i2) * K(n1, n2); -} - -template -void emitScalarImplementation(ArrayRef allIvs, Conv3DOp convOp) { - assert(convOp.hasBufferSemantics() && - "expected linalg op with buffer semantics"); - assert(allIvs.size() == 6); - Value m1(allIvs[0]), m2(allIvs[1]), m3(allIvs[2]); - Value n1(allIvs[3]), n2(allIvs[4]), n3(allIvs[5]); - IndexedValueType I(convOp.getInput(0)), K(convOp.getInput(1)), - O(convOp.getOutputBuffer(0)); - // Emit scalar form for the 3D conv case. - Value i1 = m1 + n1; - Value i2 = m2 + n2; - Value i3 = m3 + n3; - O(m1, m2, m3) = O(m1, m2, m3) + I(i1, i2, i3) * K(n1, n2, n3); -} - template Value getConvOpInput(ConvOp convOp, StdIndexedValue im, MutableArrayRef imIdx) { @@ -738,6 +683,24 @@ static Optional linalgOpToLoopsImplSwitch(Operation *op, return linalgOpToLoopsImpl(op, builder); if (isa(op)) return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); + if (isa(op)) + return linalgOpToLoopsImpl(op, builder); llvm_unreachable("Unexpected op in linalgOpToLoopsImpl"); } diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index a5a6e9bee34f8..ca59ecd387ec3 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -507,11 +507,3 @@ func @named_ops(%a3: memref, %b3: memref, %c3: memref, memref, memref) -> () return } - -// ----- - -func @conv_type_mismatch(%in: memref, %filter: memref, %out: memref) { - // expected-error @+1 {{expected all element types of operands to match}} - linalg.conv1D(%in, %filter, %out) : memref, memref, memref - return -} diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir index ee63d59ca8c46..6af53a2b8d222 100644 --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -1288,7 +1288,7 @@ func @conv4d(%in : memref, %filter : memref, %out : m // CHECKPARALLEL: store %[[res]], %[[arg2]][%[[i0]], %[[i1]], %[[i2]], %[[i3]]] : memref func @conv1d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { - linalg.conv1D(%in, %filter, %out) : memref, memref, memref + linalg.conv_1d %in, %filter, %out : (memref, memref, memref) return } @@ -1303,10 +1303,10 @@ func @conv1d_no_symbols(%in : memref, %filter : memref, %out : mem // CHECKLOOP: scf.for %[[b:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { // CHECKLOOP: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { // CHECKLOOP: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) -// CHECKLOOP: %[[va:.*]] = load %[[arg1]][%[[m]]] : memref // CHECKLOOP: %[[vb:.*]] = load %[[arg0]][%[[aff]]] : memref -// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECKLOOP: %[[va:.*]] = load %[[arg1]][%[[m]]] : memref // CHECKLOOP: %[[vc:.*]] = load %[[arg2]][%[[b]]] : memref +// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 // CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 // CHECKLOOP: store %[[res]], %[[arg2]][%[[b]]] : memref @@ -1318,19 +1318,18 @@ func @conv1d_no_symbols(%in : memref, %filter : memref, %out : mem // CHECKPARALLEL: %[[c1:.*]] = constant 1 : index // CHECKPARALLEL: %[[dim0:.*]] = dim %[[arg1]], %[[c0]] : memref // CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg2]], %[[c0]] : memref -// CHECKPARALLEL: scf.parallel (%[[b:.*]]) = (%[[c0]]) to (%[[dim1]]) step (%[[c1]]) { -// CHECKPARALLEL: scf.for %[[m:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { -// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) -// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[m]]] : memref -// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]]] : memref -// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 -// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[b]]] : memref -// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[b]]] : memref +// CHECKPARALLEL: scf.parallel (%[[b:.*]], %[[m:.*]]) = (%[[c0]], %[[c0]]) to (%[[dim1]], %[[dim0]]) step (%[[c1]], %[[c1]]) { +// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[b]], %[[m]]) +// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]]] : memref +// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[m]]] : memref +// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[b]]] : memref +// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[b]]] : memref func @conv2d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { - linalg.conv2D(%in, %filter, %out) : memref, memref, memref + linalg.conv_2d %in, %filter, %out : (memref, memref, memref) return } // CHECKLOOP-LABEL: @conv2d_no_symbols @@ -1349,10 +1348,12 @@ func @conv2d_no_symbols(%in : memref, %filter : memref, %out : // CHECKLOOP: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { // CHECKLOOP: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) // CHECKLOOP: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) -// CHECKLOOP: %[[va:.*]] = load %[[arg1]][%[[arg5]], %[[arg6]]] : memref // CHECKLOOP: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]]] : memref -// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 + +// CHECKLOOP: %[[va:.*]] = load %[[arg1]][%[[arg5]], %[[arg6]]] : memref // CHECKLOOP: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]]] : memref + +// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 // CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 // CHECKLOOP: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref @@ -1366,21 +1367,19 @@ func @conv2d_no_symbols(%in : memref, %filter : memref, %out : // CHECKPARALLEL: %[[dim1:.*]] = dim %[[arg1]], %[[c1]] : memref // CHECKPARALLEL: %[[dim2:.*]] = dim %[[arg2]], %[[c0]] : memref // CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c1]] : memref -// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]]) = (%[[c0]], %[[c0]]) to (%[[dim2]], %[[dim3]]) step (%[[c1]], %[[c1]]) { -// CHECKPARALLEL: scf.for %[[arg5:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { -// CHECKPARALLEL: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { -// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) -// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) -// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[arg5]], %[[arg6]]] : memref -// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]]] : memref -// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 -// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]]] : memref -// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref +// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]], %[[arg6:.*]]) = (%[[c0]], %[[c0]], %[[c0]], %[[c0]]) to (%[[dim2]], %[[dim3]], %[[dim0]], %[[dim1]]) step (%[[c1]], %[[c1]], %[[c1]], %[[c1]]) { +// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg5]]) +// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg6]]) +// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]]] : memref +// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[arg5]], %[[arg6]]] : memref +// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]]] : memref +// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]]] : memref func @conv3d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { - linalg.conv3D(%in, %filter, %out) : memref, memref, memref + linalg.conv_3d %in, %filter, %out : (memref, memref, memref) return } @@ -1406,10 +1405,12 @@ func @conv3d_no_symbols(%in : memref, %filter : memref, %o // CHECKLOOP: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) // CHECKLOOP: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) // CHECKLOOP: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) -// CHECKLOOP: %[[va:.*]] = load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref // CHECKLOOP: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref -// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 + +// CHECKLOOP: %[[va:.*]] = load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref // CHECKLOOP: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref + +// CHECKLOOP: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 // CHECKLOOP: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 // CHECKLOOP: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref @@ -1426,16 +1427,13 @@ func @conv3d_no_symbols(%in : memref, %filter : memref, %o // CHECKPARALLEL: %[[dim3:.*]] = dim %[[arg2]], %[[c0]] : memref // CHECKPARALLEL: %[[dim4:.*]] = dim %[[arg2]], %[[c1]] : memref // CHECKPARALLEL: %[[dim5:.*]] = dim %[[arg2]], %[[c2]] : memref -// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]]) = (%[[c0]], %[[c0]], %[[c0]]) to (%[[dim3]], %[[dim4]], %[[dim5]]) step (%[[c1]], %[[c1]], %[[c1]]) { -// CHECKPARALLEL: scf.for %[[arg6:.*]] = %[[c0]] to %[[dim0]] step %[[c1]] { -// CHECKPARALLEL: scf.for %[[arg7:.*]] = %[[c0]] to %[[dim1]] step %[[c1]] { -// CHECKPARALLEL: scf.for %[[arg8:.*]] = %[[c0]] to %[[dim2]] step %[[c1]] { -// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) -// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) -// CHECKPARALLEL: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) -// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref -// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref -// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 -// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref -// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref +// CHECKPARALLEL: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]], %[[arg6:.*]], %[[arg7:.*]], %[[arg8:.*]]) = (%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]) to (%[[dim3]], %[[dim4]], %[[dim5]], %[[dim0]], %[[dim1]], %[[dim2]]) step (%[[c1]], %[[c1]], %[[c1]], %[[c1]], %[[c1]], %[[c1]]) { +// CHECKPARALLEL: %[[aff:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg3]], %[[arg6]]) +// CHECKPARALLEL: %[[aff2:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg4]], %[[arg7]]) +// CHECKPARALLEL: %[[aff3:.*]] = affine.apply #[[$stride1Dilation1]](%[[arg5]], %[[arg8]]) +// CHECKPARALLEL: %[[vb:.*]] = load %[[arg0]][%[[aff]], %[[aff2]], %[[aff3]]] : memref +// CHECKPARALLEL: %[[va:.*]] = load %[[arg1]][%[[arg6]], %[[arg7]], %[[arg8]]] : memref +// CHECKPARALLEL: %[[vc:.*]] = load %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref +// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32 +// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref From acb3b8dce1cd7ed25a137c718678449dd2d77f79 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Fri, 31 Jul 2020 06:19:47 -0500 Subject: [PATCH 003/600] [Sanitizers] Fix lint failure with Python 3.6 There are some files in compiler-rt that use UTF-8 characters in some of the comments. This causes lint failures with some versions of Python. This patch just makes the encoding explicit in the call to open. --- compiler-rt/lib/sanitizer_common/scripts/litlint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/scripts/litlint.py b/compiler-rt/lib/sanitizer_common/scripts/litlint.py index 81b89c2144381..c443efb915a9c 100755 --- a/compiler-rt/lib/sanitizer_common/scripts/litlint.py +++ b/compiler-rt/lib/sanitizer_common/scripts/litlint.py @@ -45,7 +45,7 @@ def LintFile(p): The number of errors detected. """ errs = 0 - with open(p, 'r') as f: + with open(p, 'r', encoding='utf-8') as f: for i, s in enumerate(f.readlines(), start=1): msg, col = LintLine(s) if msg != None: From 2978b10aa164f692d48041327e27b2811649a233 Mon Sep 17 00:00:00 2001 From: Kirill Bobyrev Date: Fri, 31 Jul 2020 14:02:19 +0200 Subject: [PATCH 004/600] [clangd] Fix remote index build on macOS macOS builds suddenly started failing: https://github.com/kirillbobyrev/indexing-tools/runs/925090879 This patch makes use of imported libraries and fixes builds for macOS. Landing this without a review since the patch is quite straightforward and I've been testing it on my local macOS machine for a while. Differential Revision: https://reviews.llvm.org/D84928 --- llvm/cmake/modules/FindGRPC.cmake | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/llvm/cmake/modules/FindGRPC.cmake b/llvm/cmake/modules/FindGRPC.cmake index 8a0ca593b2f4a..70d67ceb791d3 100644 --- a/llvm/cmake/modules/FindGRPC.cmake +++ b/llvm/cmake/modules/FindGRPC.cmake @@ -45,11 +45,25 @@ else() # system path. if (GRPC_HOMEBREW_RETURN_CODE EQUAL "0") include_directories(${GRPC_HOMEBREW_PATH}/include) - link_directories(${GRPC_HOMEBREW_PATH}/lib) + find_library(GRPC_LIBRARY + grpc++ + PATHS ${GRPC_HOMEBREW_PATH}/lib + NO_DEFAULT_PATH + REQUIRED) + add_library(grpc++ UNKNOWN IMPORTED GLOBAL) + set_target_properties(grpc++ PROPERTIES + IMPORTED_LOCATION ${GRPC_LIBRARY}) endif() if (PROTOBUF_HOMEBREW_RETURN_CODE EQUAL "0") include_directories(${PROTOBUF_HOMEBREW_PATH}/include) - link_directories(${PROTOBUF_HOMEBREW_PATH}/lib) + find_library(PROTOBUF_LIBRARY + protobuf + PATHS ${PROTOBUF_HOMEBREW_PATH}/lib + NO_DEFAULT_PATH + REQUIRED) + add_library(protobuf UNKNOWN IMPORTED GLOBAL) + set_target_properties(protobuf PROPERTIES + IMPORTED_LOCATION ${PROTOBUF_LIBRARY}) endif() endif() endif() From cbf5bf513b93cc5bfa360f4be8a57e50988e22f1 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Fri, 31 Jul 2020 20:02:20 +0800 Subject: [PATCH 005/600] [DWARFYAML] Add emitDebug[GNU]Pub[names/types] functions. NFC. In this patch, emitDebugPubnames(), emitDebugPubtypes(), emitDebugGNUPubnames(), emitDebugGNUPubtypes() are added. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D85003 --- llvm/include/llvm/ObjectYAML/DWARFEmitter.h | 6 +++-- llvm/lib/ObjectYAML/DWARFEmitter.cpp | 27 ++++++++++++++++++--- llvm/lib/ObjectYAML/ELFEmitter.cpp | 10 +++----- llvm/lib/ObjectYAML/MachOEmitter.cpp | 6 ++--- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h index 5837c69ed59f9..5c29b0e757246 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h +++ b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h @@ -33,8 +33,10 @@ Error emitDebugStr(raw_ostream &OS, const Data &DI); Error emitDebugAranges(raw_ostream &OS, const Data &DI); Error emitDebugRanges(raw_ostream &OS, const Data &DI); -Error emitPubSection(raw_ostream &OS, const PubSection &Sect, - bool IsLittleEndian, bool IsGNUPubSec = false); +Error emitDebugPubnames(raw_ostream &OS, const Data &DI); +Error emitDebugPubtypes(raw_ostream &OS, const Data &DI); +Error emitDebugGNUPubnames(raw_ostream &OS, const Data &DI); +Error emitDebugGNUPubtypes(raw_ostream &OS, const Data &DI); Error emitDebugInfo(raw_ostream &OS, const Data &DI); Error emitDebugLine(raw_ostream &OS, const Data &DI); Error emitDebugAddr(raw_ostream &OS, const Data &DI); diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index be9581fa38083..9655b05317abf 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -209,9 +209,8 @@ Error DWARFYAML::emitDebugRanges(raw_ostream &OS, const DWARFYAML::Data &DI) { return Error::success(); } -Error DWARFYAML::emitPubSection(raw_ostream &OS, - const DWARFYAML::PubSection &Sect, - bool IsLittleEndian, bool IsGNUPubSec) { +static Error emitPubSection(raw_ostream &OS, const DWARFYAML::PubSection &Sect, + bool IsLittleEndian, bool IsGNUPubSec = false) { writeInitialLength(Sect.Length, OS, IsLittleEndian); writeInteger((uint16_t)Sect.Version, OS, IsLittleEndian); writeInteger((uint32_t)Sect.UnitOffset, OS, IsLittleEndian); @@ -227,6 +226,28 @@ Error DWARFYAML::emitPubSection(raw_ostream &OS, return Error::success(); } +Error DWARFYAML::emitDebugPubnames(raw_ostream &OS, const Data &DI) { + assert(DI.PubNames && "unexpected emitDebugPubnames() call"); + return emitPubSection(OS, *DI.PubNames, DI.IsLittleEndian); +} + +Error DWARFYAML::emitDebugPubtypes(raw_ostream &OS, const Data &DI) { + assert(DI.PubTypes && "unexpected emitDebugPubtypes() call"); + return emitPubSection(OS, *DI.PubTypes, DI.IsLittleEndian); +} + +Error DWARFYAML::emitDebugGNUPubnames(raw_ostream &OS, const Data &DI) { + assert(DI.GNUPubNames && "unexpected emitDebugGNUPubnames() call"); + return emitPubSection(OS, *DI.GNUPubNames, DI.IsLittleEndian, + /*IsGNUStyle=*/true); +} + +Error DWARFYAML::emitDebugGNUPubtypes(raw_ostream &OS, const Data &DI) { + assert(DI.GNUPubTypes && "unexpected emitDebugGNUPubtypes() call"); + return emitPubSection(OS, *DI.GNUPubTypes, DI.IsLittleEndian, + /*IsGNUStyle=*/true); +} + static Expected writeDIE(ArrayRef AbbrevDecls, const DWARFYAML::Unit &Unit, const DWARFYAML::Entry &Entry, diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index bc27c03cb6877..9fefd8c4c9965 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -967,15 +967,13 @@ Expected emitDWARF(typename ELFT::Shdr &SHeader, StringRef Name, else if (Name == ".debug_info") Err = DWARFYAML::emitDebugInfo(*OS, DWARF); else if (Name == ".debug_pubnames") - Err = DWARFYAML::emitPubSection(*OS, *DWARF.PubNames, DWARF.IsLittleEndian); + Err = DWARFYAML::emitDebugPubnames(*OS, DWARF); else if (Name == ".debug_pubtypes") - Err = DWARFYAML::emitPubSection(*OS, *DWARF.PubTypes, DWARF.IsLittleEndian); + Err = DWARFYAML::emitDebugPubtypes(*OS, DWARF); else if (Name == ".debug_gnu_pubnames") - Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubNames, - DWARF.IsLittleEndian, /*IsGNUStyle=*/true); + Err = DWARFYAML::emitDebugGNUPubnames(*OS, DWARF); else if (Name == ".debug_gnu_pubtypes") - Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubTypes, - DWARF.IsLittleEndian, /*IsGNUStyle=*/true); + Err = DWARFYAML::emitDebugGNUPubtypes(*OS, DWARF); else if (Name == ".debug_str_offsets") Err = DWARFYAML::emitDebugStrOffsets(*OS, DWARF); else if (Name == ".debug_rnglists") diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index 680264484704b..619572a7532c4 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -299,12 +299,10 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) { Err = DWARFYAML::emitDebugRanges(OS, Obj.DWARF); else if (0 == strncmp(&Sec.sectname[0], "__debug_pubnames", 16)) { if (Obj.DWARF.PubNames) - Err = DWARFYAML::emitPubSection(OS, *Obj.DWARF.PubNames, - Obj.IsLittleEndian); + Err = DWARFYAML::emitDebugPubnames(OS, Obj.DWARF); } else if (0 == strncmp(&Sec.sectname[0], "__debug_pubtypes", 16)) { if (Obj.DWARF.PubTypes) - Err = DWARFYAML::emitPubSection(OS, *Obj.DWARF.PubTypes, - Obj.IsLittleEndian); + Err = DWARFYAML::emitDebugPubtypes(OS, Obj.DWARF); } else if (0 == strncmp(&Sec.sectname[0], "__debug_info", 16)) Err = DWARFYAML::emitDebugInfo(OS, Obj.DWARF); else if (0 == strncmp(&Sec.sectname[0], "__debug_line", 16)) From 760e4f22027ca1d609d21d220a28661ab091eb29 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Fri, 31 Jul 2020 20:06:30 +0800 Subject: [PATCH 006/600] [DWARFYAML] Add helper function getDWARFEmitterByName(). NFC. In this patch, we add a helper function getDWARFEmitterByName(). This function returns the proper DWARF section emitting method by the name. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D84952 --- llvm/include/llvm/ObjectYAML/DWARFEmitter.h | 2 + llvm/lib/ObjectYAML/DWARFEmitter.cpp | 56 +++++++++++++-------- llvm/lib/ObjectYAML/ELFEmitter.cpp | 34 +------------ 3 files changed, 40 insertions(+), 52 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h index 5c29b0e757246..89d01cecb9b7b 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h +++ b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h @@ -43,6 +43,8 @@ Error emitDebugAddr(raw_ostream &OS, const Data &DI); Error emitDebugStrOffsets(raw_ostream &OS, const Data &DI); Error emitDebugRnglists(raw_ostream &OS, const Data &DI); +std::function +getDWARFEmitterByName(StringRef SecName); Expected>> emitDebugSections(StringRef YAMLString, bool IsLittleEndian = sys::IsLittleEndianHost); diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index 9655b05317abf..dbf417780cfc7 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/Support/Errc.h" @@ -762,14 +763,40 @@ Error DWARFYAML::emitDebugRnglists(raw_ostream &OS, const Data &DI) { OS, *DI.DebugRnglists, DI.IsLittleEndian, DI.Is64BitAddrSize); } -using EmitFuncType = Error (*)(raw_ostream &, const DWARFYAML::Data &); +std::function +DWARFYAML::getDWARFEmitterByName(StringRef SecName) { + auto EmitFunc = + StringSwitch< + std::function>(SecName) + .Case("debug_abbrev", DWARFYAML::emitDebugAbbrev) + .Case("debug_addr", DWARFYAML::emitDebugAddr) + .Case("debug_aranges", DWARFYAML::emitDebugAranges) + .Case("debug_gnu_pubnames", DWARFYAML::emitDebugGNUPubnames) + .Case("debug_gnu_pubtypes", DWARFYAML::emitDebugGNUPubtypes) + .Case("debug_info", DWARFYAML::emitDebugInfo) + .Case("debug_line", DWARFYAML::emitDebugLine) + .Case("debug_pubnames", DWARFYAML::emitDebugPubnames) + .Case("debug_pubtypes", DWARFYAML::emitDebugPubtypes) + .Case("debug_ranges", DWARFYAML::emitDebugRanges) + .Case("debug_rnglists", DWARFYAML::emitDebugRnglists) + .Case("debug_str", DWARFYAML::emitDebugStr) + .Case("debug_str_offsets", DWARFYAML::emitDebugStrOffsets) + .Default([&](raw_ostream &, const DWARFYAML::Data &) { + return createStringError(errc::not_supported, + SecName + " is not supported"); + }); + + return EmitFunc; +} static Error -emitDebugSectionImpl(const DWARFYAML::Data &DI, EmitFuncType EmitFunc, - StringRef Sec, +emitDebugSectionImpl(const DWARFYAML::Data &DI, StringRef Sec, StringMap> &OutputBuffers) { std::string Data; raw_string_ostream DebugInfoStream(Data); + + auto EmitFunc = DWARFYAML::getDWARFEmitterByName(Sec); + if (Error Err = EmitFunc(DebugInfoStream, DI)) return Err; DebugInfoStream.flush(); @@ -796,23 +823,12 @@ DWARFYAML::emitDebugSections(StringRef YAMLString, bool IsLittleEndian) { return createStringError(YIn.error(), GeneratedDiag.getMessage()); StringMap> DebugSections; - Error Err = emitDebugSectionImpl(DI, &DWARFYAML::emitDebugInfo, "debug_info", - DebugSections); - Err = joinErrors(std::move(Err), - emitDebugSectionImpl(DI, &DWARFYAML::emitDebugLine, - "debug_line", DebugSections)); - Err = joinErrors(std::move(Err), - emitDebugSectionImpl(DI, &DWARFYAML::emitDebugStr, - "debug_str", DebugSections)); - Err = joinErrors(std::move(Err), - emitDebugSectionImpl(DI, &DWARFYAML::emitDebugAbbrev, - "debug_abbrev", DebugSections)); - Err = joinErrors(std::move(Err), - emitDebugSectionImpl(DI, &DWARFYAML::emitDebugAranges, - "debug_aranges", DebugSections)); - Err = joinErrors(std::move(Err), - emitDebugSectionImpl(DI, &DWARFYAML::emitDebugRanges, - "debug_ranges", DebugSections)); + Error Err = Error::success(); + cantFail(std::move(Err)); + + for (StringRef SecName : DI.getNonEmptySectionNames()) + Err = joinErrors(std::move(Err), + emitDebugSectionImpl(DI, SecName, DebugSections)); if (Err) return std::move(Err); diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 9fefd8c4c9965..8cb7af91c56b0 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -949,39 +949,9 @@ Expected emitDWARF(typename ELFT::Shdr &SHeader, StringRef Name, return 0; uint64_t BeginOffset = CBA.tell(); - Error Err = Error::success(); - cantFail(std::move(Err)); - - if (Name == ".debug_str") - Err = DWARFYAML::emitDebugStr(*OS, DWARF); - else if (Name == ".debug_aranges") - Err = DWARFYAML::emitDebugAranges(*OS, DWARF); - else if (Name == ".debug_ranges") - Err = DWARFYAML::emitDebugRanges(*OS, DWARF); - else if (Name == ".debug_line") - Err = DWARFYAML::emitDebugLine(*OS, DWARF); - else if (Name == ".debug_addr") - Err = DWARFYAML::emitDebugAddr(*OS, DWARF); - else if (Name == ".debug_abbrev") - Err = DWARFYAML::emitDebugAbbrev(*OS, DWARF); - else if (Name == ".debug_info") - Err = DWARFYAML::emitDebugInfo(*OS, DWARF); - else if (Name == ".debug_pubnames") - Err = DWARFYAML::emitDebugPubnames(*OS, DWARF); - else if (Name == ".debug_pubtypes") - Err = DWARFYAML::emitDebugPubtypes(*OS, DWARF); - else if (Name == ".debug_gnu_pubnames") - Err = DWARFYAML::emitDebugGNUPubnames(*OS, DWARF); - else if (Name == ".debug_gnu_pubtypes") - Err = DWARFYAML::emitDebugGNUPubtypes(*OS, DWARF); - else if (Name == ".debug_str_offsets") - Err = DWARFYAML::emitDebugStrOffsets(*OS, DWARF); - else if (Name == ".debug_rnglists") - Err = DWARFYAML::emitDebugRnglists(*OS, DWARF); - else - llvm_unreachable("unexpected emitDWARF() call"); - if (Err) + auto EmitFunc = DWARFYAML::getDWARFEmitterByName(Name.substr(1)); + if (Error Err = EmitFunc(*OS, DWARF)) return std::move(Err); return CBA.tell() - BeginOffset; From 74b02d73e34278e081dcb4946d66b9562f0986fe Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Fri, 31 Jul 2020 12:56:10 +0800 Subject: [PATCH 007/600] [DWARFYAML] Make the debug_aranges entry optional. This patch makes the 'debug_aranges' entry optional. If the entry is empty, yaml2obj will only emit the header for it. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D84921 --- llvm/include/llvm/ObjectYAML/DWARFYAML.h | 2 +- llvm/lib/ObjectYAML/DWARFEmitter.cpp | 3 +- llvm/lib/ObjectYAML/DWARFYAML.cpp | 7 +- llvm/lib/ObjectYAML/MachOEmitter.cpp | 7 +- .../ObjectYAML/MachO/DWARF-debug_aranges.yaml | 67 ++++++++++++++++++- .../yaml2obj/ELF/DWARF/debug-aranges.yaml | 19 ++++++ llvm/tools/obj2yaml/dwarf2yaml.cpp | 6 +- 7 files changed, 99 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 32382da6de504..5737ceccc0a40 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -211,7 +211,7 @@ struct Data { std::vector AbbrevDecls; std::vector DebugStrings; Optional> DebugStrOffsets; - std::vector ARanges; + Optional> DebugAranges; std::vector DebugRanges; std::vector DebugAddr; Optional PubNames; diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index dbf417780cfc7..ab3cd05a6495d 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -126,7 +126,8 @@ Error DWARFYAML::emitDebugAbbrev(raw_ostream &OS, const DWARFYAML::Data &DI) { } Error DWARFYAML::emitDebugAranges(raw_ostream &OS, const DWARFYAML::Data &DI) { - for (auto Range : DI.ARanges) { + assert(DI.DebugAranges && "unexpected emitDebugAranges() call"); + for (auto Range : *DI.DebugAranges) { uint8_t AddrSize; if (Range.AddrSize) AddrSize = *Range.AddrSize; diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 7dd289ff6973b..04dd185ad954e 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -17,7 +17,7 @@ namespace llvm { bool DWARFYAML::Data::isEmpty() const { - return DebugStrings.empty() && AbbrevDecls.empty() && ARanges.empty() && + return DebugStrings.empty() && AbbrevDecls.empty() && DebugAranges && DebugRanges.empty() && !PubNames && !PubTypes && !GNUPubNames && !GNUPubTypes && CompileUnits.empty() && DebugLines.empty(); } @@ -26,7 +26,7 @@ SetVector DWARFYAML::Data::getNonEmptySectionNames() const { SetVector SecNames; if (!DebugStrings.empty()) SecNames.insert("debug_str"); - if (!ARanges.empty()) + if (DebugAranges) SecNames.insert("debug_aranges"); if (!DebugRanges.empty()) SecNames.insert("debug_ranges"); @@ -61,8 +61,7 @@ void MappingTraits::mapping(IO &IO, DWARFYAML::Data &DWARF) { IO.setContext(&DWARFCtx); IO.mapOptional("debug_str", DWARF.DebugStrings); IO.mapOptional("debug_abbrev", DWARF.AbbrevDecls); - if (!DWARF.ARanges.empty() || !IO.outputting()) - IO.mapOptional("debug_aranges", DWARF.ARanges); + IO.mapOptional("debug_aranges", DWARF.DebugAranges); if (!DWARF.DebugRanges.empty() || !IO.outputting()) IO.mapOptional("debug_ranges", DWARF.DebugRanges); IO.mapOptional("debug_pubnames", DWARF.PubNames); diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index 619572a7532c4..9b454c528a7e2 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -293,9 +293,10 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) { Err = DWARFYAML::emitDebugStr(OS, Obj.DWARF); else if (0 == strncmp(&Sec.sectname[0], "__debug_abbrev", 16)) Err = DWARFYAML::emitDebugAbbrev(OS, Obj.DWARF); - else if (0 == strncmp(&Sec.sectname[0], "__debug_aranges", 16)) - Err = DWARFYAML::emitDebugAranges(OS, Obj.DWARF); - else if (0 == strncmp(&Sec.sectname[0], "__debug_ranges", 16)) + else if (0 == strncmp(&Sec.sectname[0], "__debug_aranges", 16)) { + if (Obj.DWARF.DebugAranges) + Err = DWARFYAML::emitDebugAranges(OS, Obj.DWARF); + } else if (0 == strncmp(&Sec.sectname[0], "__debug_ranges", 16)) Err = DWARFYAML::emitDebugRanges(OS, Obj.DWARF); else if (0 == strncmp(&Sec.sectname[0], "__debug_pubnames", 16)) { if (Obj.DWARF.PubNames) diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_aranges.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_aranges.yaml index ecdd88a68dd0c..1e9b880c3cd3d 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_aranges.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_aranges.yaml @@ -1,4 +1,7 @@ -# RUN: yaml2obj %s | obj2yaml | FileCheck %s +## a) Test that yaml2macho is able to emit the .debug_aranges section and obj2yaml +## is able to convert it back. + +# RUN: yaml2obj --docnum=1 %s | obj2yaml | FileCheck %s --- !mach-o FileHeader: @@ -331,3 +334,65 @@ DWARF: # CHECK-NEXT: Descriptors: # CHECK-NEXT: - Address: 0x0000000100000F50 # CHECK-NEXT: Length: 52 + +## b) Test that if the "debug_aranges" entry is empty, yaml2macho will only emit the +## section header. + +# RUN: yaml2obj --docnum=2 %s -o %t2.o +# RUN: llvm-readobj --sections --section-data %t2.o | FileCheck %s --check-prefix=EMPTY-SECTION + +# EMPTY-SECTION: Index: 0 +# EMPTY-SECTION-NEXT: Name: __debug_aranges (5F 5F 64 65 62 75 67 5F 61 72 61 6E 67 65 73 00) +# EMPTY-SECTION-NEXT: Segment: __DWARF (5F 5F 44 57 41 52 46 00 00 00 00 00 00 00 00 00) +# EMPTY-SECTION-NEXT: Address: 0x0 +# EMPTY-SECTION-NEXT: Size: 0xC +# EMPTY-SECTION-NEXT: Offset: 528 +# EMPTY-SECTION-NEXT: Alignment: 0 +# EMPTY-SECTION-NEXT: RelocationOffset: 0x0 +# EMPTY-SECTION-NEXT: RelocationCount: 0 +# EMPTY-SECTION-NEXT: Type: Regular (0x0) +# EMPTY-SECTION-NEXT: Attributes [ (0x0) +# EMPTY-SECTION-NEXT: ] +# EMPTY-SECTION-NEXT: Reserved1: 0x0 +# EMPTY-SECTION-NEXT: Reserved2: 0x0 +# EMPTY-SECTION-NEXT: Reserved3: 0x0 +# EMPTY-SECTION-NEXT: SectionData ( +# EMPTY-SECTION-NEXT: ) + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 232 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DWARF + vmaddr: 0x00 + vmsize: 0x00 + fileoff: 0x00 + filesize: 0x00 + maxprot: 0 + initprot: 0 + nsects: 1 + flags: 0 + Sections: + - sectname: __debug_aranges + segname: __DWARF + addr: 0x00 + size: 12 + offset: 528 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 +DWARF: + debug_aranges: [] diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-aranges.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-aranges.yaml index 9fa86449053d0..4fa924c33ad13 100644 --- a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-aranges.yaml +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-aranges.yaml @@ -595,3 +595,22 @@ DWARF: Descriptors: - Address: 0x1234 Length: 0x4321 + +## l) Test that the .debug_aranges section header is emitted if the "debug_aranges" +## entry is empty. + +# RUN: yaml2obj --docnum=12 %s -o %t12.o +# RUN: llvm-readobj --sections --section-data %t12.o | \ +# RUN: FileCheck -DSIZE=0 -DADDRALIGN=1 %s --check-prefixes=DWARF-HEADER,EMPTY-CONTENT + +# EMPTY-CONTENT-NEXT: SectionData ( +# EMPTY-CONTENT-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_aranges: [] diff --git a/llvm/tools/obj2yaml/dwarf2yaml.cpp b/llvm/tools/obj2yaml/dwarf2yaml.cpp index 66264a46068a6..4c1742cf922be 100644 --- a/llvm/tools/obj2yaml/dwarf2yaml.cpp +++ b/llvm/tools/obj2yaml/dwarf2yaml.cpp @@ -64,7 +64,7 @@ Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { DCtx.isLittleEndian(), 0); uint64_t Offset = 0; DWARFDebugArangeSet Set; - + std::vector DebugAranges; while (ArangesData.isValidOffset(Offset)) { if (Error E = Set.extract(ArangesData, &Offset)) return E; @@ -81,8 +81,10 @@ Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { Desc.Length = Descriptor.Length; Range.Descriptors.push_back(Desc); } - Y.ARanges.push_back(Range); + DebugAranges.push_back(Range); } + + Y.DebugAranges = DebugAranges; return ErrorSuccess(); } From 0d25d3b7e3e3acb86d93acb2291c1d26e056746b Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Fri, 31 Jul 2020 14:28:49 +0200 Subject: [PATCH 008/600] [clang-tidy] Fix build problem after commit 45a720a864320bbbeb596a When building with LLVM8.0 on RHEL7.8 I got failures like this after commit 45a720a864320bbbe: /app/llvm/8.0/bin/../lib/gcc/x86_64-unknown-linux-gnu/ 5.4.0/../../../../include/c++/5.4.0/ext/new_allocator.h:120:23: error: no matching constructor for initialization of 'std::pair, std::__cxx11::basic_string >' { ::new((void *)__p) _Up(std::forward<_Args>(__args)...); } ... ../../clang-tools-extra/clang-tidy/ClangTidyOptions.cpp:73:15: note: in instantiation of function template specialization 'std::vector, std::__cxx11::basic_string >, std::allocator, std::__cxx11::basic_string > > >::emplace_back &>' requested here Options.emplace_back(KeyValue.getKey(), KeyValue.getValue().Value); This is an attempt to avoid such build problems. --- clang-tools-extra/clang-tidy/ClangTidyOptions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp b/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp index 19ba47f005dc8..6b28cb2bdd13d 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp @@ -70,7 +70,7 @@ struct NOptionMap { NOptionMap(IO &, const ClangTidyOptions::OptionMap &OptionMap) { Options.reserve(OptionMap.size()); for (const auto &KeyValue : OptionMap) - Options.emplace_back(KeyValue.getKey(), KeyValue.getValue().Value); + Options.emplace_back(std::string(KeyValue.getKey()), KeyValue.getValue().Value); } ClangTidyOptions::OptionMap denormalize(IO &) { ClangTidyOptions::OptionMap Map; From 638f0cf565f2121151c32d7eb52a1de0e333d5f6 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Fri, 31 Jul 2020 14:32:18 +0200 Subject: [PATCH 009/600] [clangd] Be more explicit on testing the optional DefLoc in LocatedSymbol. And also fix a bug where we may return a meaningless location. Differential Revision: https://reviews.llvm.org/D84919 --- clang-tools-extra/clangd/XRefs.cpp | 13 ++-- .../clangd/unittests/XRefsTests.cpp | 74 ++++++++++++------- 2 files changed, 51 insertions(+), 36 deletions(-) diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index cf747b607f4a1..1fc89f3e08472 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -405,15 +405,17 @@ locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST, log("locateSymbolNamedTextuallyAt: {0}", MaybeDeclLoc.takeError()); return; } - Location DeclLoc = *MaybeDeclLoc; - Location DefLoc; + LocatedSymbol Located; + Located.PreferredDeclaration = *MaybeDeclLoc; + Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str(); if (Sym.Definition) { auto MaybeDefLoc = indexToLSPLocation(Sym.Definition, MainFilePath); if (!MaybeDefLoc) { log("locateSymbolNamedTextuallyAt: {0}", MaybeDefLoc.takeError()); return; } - DefLoc = *MaybeDefLoc; + Located.PreferredDeclaration = *MaybeDefLoc; + Located.Definition = *MaybeDefLoc; } if (ScoredResults.size() >= 3) { @@ -424,11 +426,6 @@ locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST, return; } - LocatedSymbol Located; - Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str(); - Located.PreferredDeclaration = bool(Sym.Definition) ? DefLoc : DeclLoc; - Located.Definition = DefLoc; - SymbolQualitySignals Quality; Quality.merge(Sym); SymbolRelevanceSignals Relevance; diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 0a8f85ed53176..c9c115fd19d83 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -41,6 +41,7 @@ using ::testing::ElementsAre; using ::testing::Eq; using ::testing::IsEmpty; using ::testing::Matcher; +using ::testing::UnorderedElementsAre; using ::testing::UnorderedElementsAreArray; MATCHER_P2(FileRange, File, Range, "") { @@ -264,19 +265,23 @@ MATCHER_P3(Sym, Name, Decl, DefOrNone, "") { << llvm::to_string(arg.PreferredDeclaration); return false; } + if (!Def && !arg.Definition) + return true; if (Def && !arg.Definition) { *result_listener << "Has no definition"; return false; } - if (Def && arg.Definition->range != *Def) { + if (!Def && arg.Definition) { + *result_listener << "Definition is " << llvm::to_string(arg.Definition); + return false; + } + if (arg.Definition->range != *Def) { *result_listener << "Definition is " << llvm::to_string(arg.Definition); return false; } return true; } -::testing::Matcher Sym(std::string Name, Range Decl) { - return Sym(Name, Decl, llvm::None); -} + MATCHER_P(Sym, Name, "") { return arg.Name == Name; } MATCHER_P(RangeIs, R, "") { return arg.range == R; } @@ -771,7 +776,7 @@ TEST(LocateSymbol, TextualSmoke) { auto AST = TU.build(); auto Index = TU.index(); EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()), - ElementsAre(Sym("MyClass", T.range()))); + ElementsAre(Sym("MyClass", T.range(), T.range()))); } TEST(LocateSymbol, Textual) { @@ -891,18 +896,20 @@ TEST(LocateSymbol, Ambiguous) { // FIXME: Target the constructor as well. EXPECT_THAT(locateSymbolAt(AST, T.point("9")), ElementsAre(Sym("Foo"))); EXPECT_THAT(locateSymbolAt(AST, T.point("10")), - ElementsAre(Sym("Foo", T.range("ConstructorLoc")))); + ElementsAre(Sym("Foo", T.range("ConstructorLoc"), llvm::None))); EXPECT_THAT(locateSymbolAt(AST, T.point("11")), - ElementsAre(Sym("Foo", T.range("ConstructorLoc")))); + ElementsAre(Sym("Foo", T.range("ConstructorLoc"), llvm::None))); // These assertions are unordered because the order comes from // CXXRecordDecl::lookupDependentName() which doesn't appear to provide // an order guarantee. EXPECT_THAT(locateSymbolAt(AST, T.point("12")), - UnorderedElementsAre(Sym("bar", T.range("NonstaticOverload1")), - Sym("bar", T.range("NonstaticOverload2")))); - EXPECT_THAT(locateSymbolAt(AST, T.point("13")), - UnorderedElementsAre(Sym("baz", T.range("StaticOverload1")), - Sym("baz", T.range("StaticOverload2")))); + UnorderedElementsAre( + Sym("bar", T.range("NonstaticOverload1"), llvm::None), + Sym("bar", T.range("NonstaticOverload2"), llvm::None))); + EXPECT_THAT( + locateSymbolAt(AST, T.point("13")), + UnorderedElementsAre(Sym("baz", T.range("StaticOverload1"), llvm::None), + Sym("baz", T.range("StaticOverload2"), llvm::None))); } TEST(LocateSymbol, TextualDependent) { @@ -932,9 +939,10 @@ TEST(LocateSymbol, TextualDependent) { // interaction between locateASTReferent() and // locateSymbolNamedTextuallyAt(). auto Results = locateSymbolAt(AST, Source.point(), Index.get()); - EXPECT_THAT(Results, UnorderedElementsAre( - Sym("uniqueMethodName", Header.range("FooLoc")), - Sym("uniqueMethodName", Header.range("BarLoc")))); + EXPECT_THAT(Results, + UnorderedElementsAre( + Sym("uniqueMethodName", Header.range("FooLoc"), llvm::None), + Sym("uniqueMethodName", Header.range("BarLoc"), llvm::None))); } TEST(LocateSymbol, TemplateTypedefs) { @@ -992,20 +1000,23 @@ int [[bar_not_preamble]]; auto Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point("p1")); EXPECT_TRUE(bool(Locations)) << "findDefinitions returned an error"; - EXPECT_THAT(*Locations, ElementsAre(Sym("foo", SourceAnnotations.range()))); + EXPECT_THAT(*Locations, ElementsAre(Sym("foo", SourceAnnotations.range(), + SourceAnnotations.range()))); // Go to a definition in header_in_preamble.h. Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point("p2")); EXPECT_TRUE(bool(Locations)) << "findDefinitions returned an error"; EXPECT_THAT( *Locations, - ElementsAre(Sym("bar_preamble", HeaderInPreambleAnnotations.range()))); + ElementsAre(Sym("bar_preamble", HeaderInPreambleAnnotations.range(), + HeaderInPreambleAnnotations.range()))); // Go to a definition in header_not_in_preamble.h. Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point("p3")); EXPECT_TRUE(bool(Locations)) << "findDefinitions returned an error"; EXPECT_THAT(*Locations, ElementsAre(Sym("bar_not_preamble", + HeaderNotInPreambleAnnotations.range(), HeaderNotInPreambleAnnotations.range()))); } @@ -1039,21 +1050,25 @@ TEST(GoToInclude, All) { // Test include in preamble. auto Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point()); ASSERT_TRUE(bool(Locations)) << "locateSymbolAt returned an error"; - EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range()))); + EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range(), + HeaderAnnotations.range()))); // Test include in preamble, last char. Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point("2")); ASSERT_TRUE(bool(Locations)) << "locateSymbolAt returned an error"; - EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range()))); + EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range(), + HeaderAnnotations.range()))); Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point("3")); ASSERT_TRUE(bool(Locations)) << "locateSymbolAt returned an error"; - EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range()))); + EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range(), + HeaderAnnotations.range()))); // Test include outside of preamble. Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point("6")); ASSERT_TRUE(bool(Locations)) << "locateSymbolAt returned an error"; - EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range()))); + EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range(), + HeaderAnnotations.range()))); // Test a few positions that do not result in Locations. Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point("4")); @@ -1062,11 +1077,13 @@ TEST(GoToInclude, All) { Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point("5")); ASSERT_TRUE(bool(Locations)) << "locateSymbolAt returned an error"; - EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range()))); + EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range(), + HeaderAnnotations.range()))); Locations = runLocateSymbolAt(Server, FooCpp, SourceAnnotations.point("7")); ASSERT_TRUE(bool(Locations)) << "locateSymbolAt returned an error"; - EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range()))); + EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range(), + HeaderAnnotations.range()))); // Objective C #import directive. Annotations ObjC(R"objc( @@ -1078,7 +1095,8 @@ TEST(GoToInclude, All) { Server.addDocument(FooM, ObjC.code()); Locations = runLocateSymbolAt(Server, FooM, ObjC.point()); ASSERT_TRUE(bool(Locations)) << "locateSymbolAt returned an error"; - EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range()))); + EXPECT_THAT(*Locations, ElementsAre(Sym("foo.h", HeaderAnnotations.range(), + HeaderAnnotations.range()))); } TEST(LocateSymbol, WithPreamble) { @@ -1103,7 +1121,7 @@ TEST(LocateSymbol, WithPreamble) { // LocateSymbol goes to a #include file: the result comes from the preamble. EXPECT_THAT( cantFail(runLocateSymbolAt(Server, FooCpp, FooWithHeader.point())), - ElementsAre(Sym("foo.h", FooHeader.range()))); + ElementsAre(Sym("foo.h", FooHeader.range(), FooHeader.range()))); // Only preamble is built, and no AST is built in this request. Server.addDocument(FooCpp, FooWithoutHeader.code(), "null", @@ -1112,7 +1130,7 @@ TEST(LocateSymbol, WithPreamble) { // stale one. EXPECT_THAT( cantFail(runLocateSymbolAt(Server, FooCpp, FooWithoutHeader.point())), - ElementsAre(Sym("foo", FooWithoutHeader.range()))); + ElementsAre(Sym("foo", FooWithoutHeader.range(), llvm::None))); // Reset test environment. runAddDocument(Server, FooCpp, FooWithHeader.code()); @@ -1122,7 +1140,7 @@ TEST(LocateSymbol, WithPreamble) { // Use the AST being built in above request. EXPECT_THAT( cantFail(runLocateSymbolAt(Server, FooCpp, FooWithoutHeader.point())), - ElementsAre(Sym("foo", FooWithoutHeader.range()))); + ElementsAre(Sym("foo", FooWithoutHeader.range(), llvm::None))); } TEST(LocateSymbol, NearbyTokenSmoke) { @@ -1133,7 +1151,7 @@ TEST(LocateSymbol, NearbyTokenSmoke) { auto AST = TestTU::withCode(T.code()).build(); // We don't pass an index, so can't hit index-based fallback. EXPECT_THAT(locateSymbolAt(AST, T.point()), - ElementsAre(Sym("err", T.range()))); + ElementsAre(Sym("err", T.range(), T.range()))); } TEST(LocateSymbol, NearbyIdentifier) { From e704aa4f254a26505d4bb9dc38bdee0ff4efa4ba Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Fri, 17 Jul 2020 09:48:01 -0700 Subject: [PATCH 010/600] DR2303: Prefer 'nearer' base classes during template deduction. DR2303 fixes the case where the derived-base match for template deduction is ambiguous if a base-of-base ALSO matches. The canonical example (as shown in the test) is just like the MSVC implementation of std::tuple. This fixes a fairly sizable issue, where if a user inherits from std::tuple on Windows (with the MS STL), they cannot use that type to call a function that takes std::tuple. Differential Revision: https://reviews.llvm.org/D84048 --- clang/lib/Sema/SemaTemplateDeduction.cpp | 187 ++++++++++++++--------- clang/test/CXX/drs/dr23xx.cpp | 32 ++++ clang/www/cxx_dr_status.html | 2 +- 3 files changed, 152 insertions(+), 69 deletions(-) diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 1f7d0f0e8d973..7aa94502fa846 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -1201,6 +1201,120 @@ static bool isForwardingReference(QualType Param, unsigned FirstInnerIndex) { return false; } +/// Attempt to deduce the template arguments by checking the base types +/// according to (C++20 [temp.deduct.call] p4b3. +/// +/// \param S the semantic analysis object within which we are deducing. +/// +/// \param RecordT the top level record object we are deducing against. +/// +/// \param TemplateParams the template parameters that we are deducing. +/// +/// \param SpecParam the template specialization parameter type. +/// +/// \param Info information about the template argument deduction itself. +/// +/// \param Deduced the deduced template arguments. +/// +/// \returns the result of template argument deduction with the bases. "invalid" +/// means no matches, "success" found a single item, and the +/// "MiscellaneousDeductionFailure" result happens when the match is ambiguous. +static Sema::TemplateDeductionResult DeduceTemplateBases( + Sema &S, const RecordType *RecordT, TemplateParameterList *TemplateParams, + const TemplateSpecializationType *SpecParam, TemplateDeductionInfo &Info, + SmallVectorImpl &Deduced) { + // C++14 [temp.deduct.call] p4b3: + // If P is a class and P has the form simple-template-id, then the + // transformed A can be a derived class of the deduced A. Likewise if + // P is a pointer to a class of the form simple-template-id, the + // transformed A can be a pointer to a derived class pointed to by the + // deduced A. However, if there is a class C that is a (direct or + // indirect) base class of D and derived (directly or indirectly) from a + // class B and that would be a valid deduced A, the deduced A cannot be + // B or pointer to B, respectively. + // + // These alternatives are considered only if type deduction would + // otherwise fail. If they yield more than one possible deduced A, the + // type deduction fails. + + // Use a breadth-first search through the bases to collect the set of + // successful matches. Visited contains the set of nodes we have already + // visited, while ToVisit is our stack of records that we still need to + // visit. Matches contains a list of matches that have yet to be + // disqualified. + llvm::SmallPtrSet Visited; + SmallVector ToVisit; + // We iterate over this later, so we have to use MapVector to ensure + // determinism. + llvm::MapVector> + Matches; + + auto AddBases = [&Visited, &ToVisit](const RecordType *RT) { + CXXRecordDecl *RD = cast(RT->getDecl()); + for (const auto &Base : RD->bases()) { + assert(Base.getType()->isRecordType() && + "Base class that isn't a record?"); + const RecordType *RT = Base.getType()->getAs(); + if (Visited.insert(RT).second) + ToVisit.push_back(Base.getType()->getAs()); + } + }; + + // Set up the loop by adding all the bases. + AddBases(RecordT); + + // Search each path of bases until we either run into a successful match + // (where all bases of it are invalid), or we run out of bases. + while (!ToVisit.empty()) { + const RecordType *NextT = ToVisit.pop_back_val(); + + SmallVector DeducedCopy(Deduced.begin(), + Deduced.end()); + TemplateDeductionInfo BaseInfo(TemplateDeductionInfo::ForBase, Info); + Sema::TemplateDeductionResult BaseResult = + DeduceTemplateArguments(S, TemplateParams, SpecParam, + QualType(NextT, 0), BaseInfo, DeducedCopy); + + // If this was a successful deduction, add it to the list of matches, + // otherwise we need to continue searching its bases. + if (BaseResult == Sema::TDK_Success) + Matches.insert({NextT, DeducedCopy}); + else + AddBases(NextT); + } + + // At this point, 'Matches' contains a list of seemingly valid bases, however + // in the event that we have more than 1 match, it is possible that the base + // of one of the matches might be disqualified for being a base of another + // valid match. We can count on cyclical instantiations being invalid to + // simplify the disqualifications. That is, if A & B are both matches, and B + // inherits from A (disqualifying A), we know that A cannot inherit from B. + if (Matches.size() > 1) { + Visited.clear(); + for (const auto &Match : Matches) + AddBases(Match.first); + + // We can give up once we have a single item (or have run out of things to + // search) since cyclical inheritence isn't valid. + while (Matches.size() > 1 && !ToVisit.empty()) { + const RecordType *NextT = ToVisit.pop_back_val(); + Matches.erase(NextT); + + // Always add all bases, since the inheritence tree can contain + // disqualifications for multiple matches. + AddBases(NextT); + } + } + + if (Matches.empty()) + return Sema::TDK_Invalid; + if (Matches.size() > 1) + return Sema::TDK_MiscellaneousDeductionFailure; + + std::swap(Matches.front().second, Deduced); + return Sema::TDK_Success; +} + /// Deduce the template arguments by comparing the parameter type and /// the argument type (C++ [temp.deduct.type]). /// @@ -1787,78 +1901,15 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S, if (!S.isCompleteType(Info.getLocation(), Arg)) return Result; - // C++14 [temp.deduct.call] p4b3: - // If P is a class and P has the form simple-template-id, then the - // transformed A can be a derived class of the deduced A. Likewise if - // P is a pointer to a class of the form simple-template-id, the - // transformed A can be a pointer to a derived class pointed to by the - // deduced A. - // - // These alternatives are considered only if type deduction would - // otherwise fail. If they yield more than one possible deduced A, the - // type deduction fails. - // Reset the incorrectly deduced argument from above. Deduced = DeducedOrig; - // Use data recursion to crawl through the list of base classes. - // Visited contains the set of nodes we have already visited, while - // ToVisit is our stack of records that we still need to visit. - llvm::SmallPtrSet Visited; - SmallVector ToVisit; - ToVisit.push_back(RecordT); - bool Successful = false; - SmallVector SuccessfulDeduced; - while (!ToVisit.empty()) { - // Retrieve the next class in the inheritance hierarchy. - const RecordType *NextT = ToVisit.pop_back_val(); - - // If we have already seen this type, skip it. - if (!Visited.insert(NextT).second) - continue; - - // If this is a base class, try to perform template argument - // deduction from it. - if (NextT != RecordT) { - TemplateDeductionInfo BaseInfo(TemplateDeductionInfo::ForBase, Info); - Sema::TemplateDeductionResult BaseResult = - DeduceTemplateArguments(S, TemplateParams, SpecParam, - QualType(NextT, 0), BaseInfo, Deduced); - - // If template argument deduction for this base was successful, - // note that we had some success. Otherwise, ignore any deductions - // from this base class. - if (BaseResult == Sema::TDK_Success) { - // If we've already seen some success, then deduction fails due to - // an ambiguity (temp.deduct.call p5). - if (Successful) - return Sema::TDK_MiscellaneousDeductionFailure; - - Successful = true; - std::swap(SuccessfulDeduced, Deduced); - - Info.Param = BaseInfo.Param; - Info.FirstArg = BaseInfo.FirstArg; - Info.SecondArg = BaseInfo.SecondArg; - } - - Deduced = DeducedOrig; - } - - // Visit base classes - CXXRecordDecl *Next = cast(NextT->getDecl()); - for (const auto &Base : Next->bases()) { - assert(Base.getType()->isRecordType() && - "Base class that isn't a record?"); - ToVisit.push_back(Base.getType()->getAs()); - } - } - - if (Successful) { - std::swap(SuccessfulDeduced, Deduced); - return Sema::TDK_Success; - } + // Check bases according to C++14 [temp.deduct.call] p4b3: + Sema::TemplateDeductionResult BaseResult = DeduceTemplateBases( + S, RecordT, TemplateParams, SpecParam, Info, Deduced); + if (BaseResult != Sema::TDK_Invalid) + return BaseResult; return Result; } diff --git a/clang/test/CXX/drs/dr23xx.cpp b/clang/test/CXX/drs/dr23xx.cpp index 3268838ac6c85..c265ebbe359cb 100644 --- a/clang/test/CXX/drs/dr23xx.cpp +++ b/clang/test/CXX/drs/dr23xx.cpp @@ -113,3 +113,35 @@ namespace dr2387 { // dr2387: 9 extern template const int d; #endif } + +#if __cplusplus >= 201103L +namespace dr2303 { // dr2303: 12 +template +struct A; +template <> +struct A<> {}; +template +struct A : A {}; +struct B : A {}; +struct C : A, A {}; // expected-warning {{direct base 'A' is inaccessible}} +struct D : A, A {}; // expected-warning {{direct base 'A' is inaccessible}} +struct E : A {}; +struct F : B, E {}; + +template +void f(const A &) { + static_assert(sizeof...(T) == 2, "Should only match A"); +} +template +void f2(const A *); + +void g() { + f(B{}); // This is no longer ambiguous. + B b; + f2(&b); + f(C{}); + f(D{}); + f(F{}); // expected-error {{ambiguous conversion from derived class}} +} +} //namespace dr2303 +#endif diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index c7369525c36fb..74319b1389437 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -13633,7 +13633,7 @@

C++ defect report implementation status

2303 DRWP Partial ordering and recursive variadic inheritance - Unknown + Clang 12 2304 From 793c29a267ca85da84403e3bb032ea49eed9e5f7 Mon Sep 17 00:00:00 2001 From: Sourabh Singh Tomar Date: Fri, 31 Jul 2020 18:52:12 +0530 Subject: [PATCH 011/600] [MLIR,OpenMP][NFCI] Removed loop for accessing regions of ParallelOp `ParallelOp` has only one region associated with it. Reviewed By: kiranchandramohan, ftynse Differential Revision: https://reviews.llvm.org/D85008 --- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 2c61f2a4ac11b..6b068660d98ff 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -399,13 +399,13 @@ ModuleTranslation::convertOmpParallel(Operation &opInst, llvm::Instruction *codeGenIPBBTI = codeGenIPBB->getTerminator(); builder.SetInsertPoint(codeGenIPBB); - - for (auto ®ion : opInst.getRegions()) { - for (auto &bb : region) { - auto *llvmBB = llvm::BasicBlock::Create( - llvmContext, "omp.par.region", codeGenIP.getBlock()->getParent()); - blockMapping[&bb] = llvmBB; - } + // ParallelOp has only `1` region associated with it. + auto ®ion = cast(opInst).getRegion(); + for (auto &bb : region) { + auto *llvmBB = llvm::BasicBlock::Create( + llvmContext, "omp.par.region", codeGenIP.getBlock()->getParent()); + blockMapping[&bb] = llvmBB; + } // Then, convert blocks one by one in topological order to ensure // defs are converted before uses. @@ -433,7 +433,6 @@ ModuleTranslation::convertOmpParallel(Operation &opInst, // Finally, after all blocks have been traversed and values mapped, // connect the PHI nodes to the results of preceding blocks. connectPHINodes(region, valueMapping, blockMapping); - } }; // TODO: Perform appropriate actions according to the data-sharing From 2da9b44415ce7958d09da53746ad46be631dcf1f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 31 Jul 2020 09:54:39 -0400 Subject: [PATCH 012/600] [gn build] (manually) merge 63d3aeb529 --- llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 94f0a66ecb182..c9e7c45fc118b 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -125,7 +125,6 @@ write_cmake_config("config") { "RETSIGTYPE=void", "LLVM_GISEL_COV_ENABLED=", "LLVM_GISEL_COV_PREFIX=", - "LLVM_WITH_Z3=", # FIXME: Set to 1 on mac once the 10.14 SDK is in common use. "LLVM_SUPPORT_XCODE_SIGNPOSTS=", @@ -315,6 +314,7 @@ write_cmake_config("llvm-config") { values = [ "LLVM_ENABLE_DUMP=", "LLVM_DEFAULT_TARGET_TRIPLE=$llvm_target_triple", + "LLVM_FORCE_ENABLE_STATS=", "LLVM_HAS_ATOMICS=1", "LLVM_HAVE_TF_AOT=", "LLVM_HAVE_TF_API=", @@ -332,8 +332,8 @@ write_cmake_config("llvm-config") { "LLVM_VERSION_MAJOR=$llvm_version_major", "LLVM_VERSION_MINOR=$llvm_version_minor", "LLVM_VERSION_PATCH=$llvm_version_patch", + "LLVM_WITH_Z3=", "PACKAGE_VERSION=${llvm_version}git", - "LLVM_FORCE_ENABLE_STATS=", ] if (current_os == "win") { From 6983cf3a57aa6d8619eb39e1625eed5340ba05c7 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Fri, 31 Jul 2020 14:17:31 +0000 Subject: [PATCH 013/600] [MLIR][Shape] Allow unsafe `shape.broadcast` In a context in which `shape.broadcast` is known not to produce an error value, we want it to operate solely on extent tensors. The operation's behavior is then undefined in the error case as the result type cannot hold this value. Differential Revision: https://reviews.llvm.org/D84933 --- .../include/mlir/Dialect/Shape/IR/ShapeOps.td | 42 ++++++++++--------- mlir/test/Dialect/Shape/canonicalize.mlir | 25 +++++++++++ mlir/test/Dialect/Shape/invalid.mlir | 10 +++-- 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td index 72e392b256db3..bc7b6048e28f4 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td @@ -49,25 +49,24 @@ def Shape_AddOp : Shape_Op<"add", [Commutative, NoSideEffect]> { def Shape_BroadcastOp : Shape_Op<"broadcast", [Commutative]> { let summary = "Returns the broadcasted output shape of two inputs"; let description = [{ - Computes the broadcasted output shape following: - 1. If any inputs are unranked, output is unranked; - 2. Else the input array with number of dimensions smaller than the max - input dimension, has 1’s prepended to its shapes and the output shape is - calculated as follows: - - output[i] = lhs[i] if lhs[i] == rhs[i] or rhs[i] is unknown/undefined - = rhs[i] if lhs[i] is unknown/undefined - = lhs[i] if rhs[i] == 1 - = rhs[i] if lhs[i] == 1 - = error if lhs[i] != rhs[i] - - Op has an optional string attribute for the error case where there is no - broadcastable output shape possible for the given inputs. - - Op may also return an ExtentTensor, but this should only be done when this - is statically guaranteed to never fail, either because of a dependency on a - cstr_broadcastable operation or other details of the construction of the - program. + Returns the broadcasted shape for two input shapes or extent tensors. Both + operands can be of type `shape.shape` or `tensor`. The result is of + type `shape.shape` and, if both operands are tensors, may be of type + `tensor`. + + If the two operand shapes are of different rank the smaller one is padded + with 1's from the left. The resulting broadcasted shape is then defined as + + result[i] = lhs[i] if lhs[i] == rhs[i] + = lhs[i] if rhs[i] == 1 + = rhs[i] if lhs[i] == 1. + + In case the resulting shape is undefined, i.e. if corresponding extents are + different from each other but none is 1, the result is an error shape. + Likewise error values are propagated if any of the operands holds an error + value. If the result type is an extent tensor (and can therefore not hold + the error value) the behavior may be undefined. The optional string + attribute can be used to describe the error case. }]; let arguments = (ins Shape_ShapeOrExtentTensorType:$lhs, @@ -75,8 +74,11 @@ def Shape_BroadcastOp : Shape_Op<"broadcast", [Commutative]> { OptionalAttr:$error); let results = (outs Shape_ShapeOrExtentTensorType:$result); - let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($lhs) `,` type($rhs) `->` type($result)"; + let assemblyFormat = [{ + $lhs `,` $rhs attr-dict `:` type($lhs) `,` type($rhs) `->` type($result) + }]; + let verifier = [{ return ::verifyShapeOrExtentTensorOp(*this); }]; let hasFolder = 1; let verifier = [{ return ::verifyShapeOrExtentTensorOp(*this); }]; diff --git a/mlir/test/Dialect/Shape/canonicalize.mlir b/mlir/test/Dialect/Shape/canonicalize.mlir index e18ff14df304a..21c5a68c3adc4 100644 --- a/mlir/test/Dialect/Shape/canonicalize.mlir +++ b/mlir/test/Dialect/Shape/canonicalize.mlir @@ -60,6 +60,31 @@ func @f() -> !shape.shape { // ----- +// Basic case including extent tensors. +// CHECK-LABEL: @broadcast +func @broadcast() -> tensor { + // CHECK: shape.const_shape [7, 2] : tensor + %0 = shape.const_shape [1, 2] : tensor + %1 = shape.const_shape [7, 1] : tensor + %2 = shape.broadcast %0, %1 + : tensor, tensor -> tensor + return %2 : tensor +} + +// ----- + +// Basic case including extent tensors. +// CHECK-LABEL: @broadcast +func @broadcast() -> !shape.shape { + // CHECK: shape.const_shape [7, 2] : !shape.shape + %0 = shape.const_shape [1, 2] : tensor + %1 = shape.const_shape [7, 1] : tensor + %2 = shape.broadcast %0, %1 : tensor, tensor -> !shape.shape + return %2 : !shape.shape +} + +// ----- + // Rhs is a scalar. // CHECK-LABEL: func @f func @f(%arg0 : !shape.shape) -> !shape.shape { diff --git a/mlir/test/Dialect/Shape/invalid.mlir b/mlir/test/Dialect/Shape/invalid.mlir index 448bd84e754ee..eb0ae5ae05a9b 100644 --- a/mlir/test/Dialect/Shape/invalid.mlir +++ b/mlir/test/Dialect/Shape/invalid.mlir @@ -138,17 +138,19 @@ func @add(%lhs : !shape.size, %rhs : index) -> index { // ----- -func @broadcast_error_possible(%arg0 : !shape.shape, %arg1 : !shape.shape) -> tensor { +func @broadcast(%arg0 : !shape.shape, %arg1 : !shape.shape) -> tensor { // expected-error@+1 {{if at least one of the operands can hold error values then the result must be of type `shape` to propagate them}} - %result = shape.broadcast %arg0, %arg1 : !shape.shape, !shape.shape -> tensor + %result = shape.broadcast %arg0, %arg1 + : !shape.shape, !shape.shape -> tensor return %result : tensor } // ----- -func @broadcast_error_possible(%arg0 : !shape.shape, %arg1 : tensor) -> tensor { +func @broadcast(%arg0 : !shape.shape, %arg1 : tensor) -> tensor { // expected-error@+1 {{if at least one of the operands can hold error values then the result must be of type `shape` to propagate them}} - %result = shape.broadcast %arg0, %arg1 : !shape.shape, tensor -> tensor + %result = shape.broadcast %arg0, %arg1 + : !shape.shape, tensor -> tensor return %result : tensor } From 57bd64ff8434aa55aeb1c7a1035f4b5b7468b809 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 27 May 2016 15:50:12 -0700 Subject: [PATCH 014/600] Support addrspacecast initializers with isNoopAddrSpaceCast Moves isNoopAddrSpaceCast to the TargetMachine. It logically belongs with the DataLayout. --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 2 +- llvm/include/llvm/CodeGen/TargetLowering.h | 9 +------ llvm/include/llvm/Target/TargetMachine.h | 5 ++++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 10 +++++++ llvm/lib/CodeGen/CodeGenPrepare.cpp | 2 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 2 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 5 ++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 6 ----- .../lib/Target/AArch64/AArch64TargetMachine.h | 6 +++++ llvm/lib/Target/AMDGPU/AMDGPU.h | 15 +++++++++-- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 6 ++--- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 5 ++-- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 6 +++++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 2 ++ .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 5 +++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 17 ++++++------ llvm/lib/Target/AMDGPU/SIISelLowering.h | 9 ------- llvm/lib/Target/ARM/ARMISelLowering.h | 6 ----- llvm/lib/Target/ARM/ARMTargetMachine.h | 6 +++++ llvm/lib/Target/Mips/MipsISelLowering.h | 8 ------ llvm/lib/Target/Mips/MipsTargetMachine.h | 8 ++++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 5 ---- llvm/lib/Target/PowerPC/PPCTargetMachine.h | 5 ++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 11 -------- llvm/lib/Target/X86/X86ISelLowering.h | 2 -- llvm/lib/Target/X86/X86TargetMachine.cpp | 8 ++++++ llvm/lib/Target/X86/X86TargetMachine.h | 2 ++ .../addrspacecast-initializer-unsupported.ll | 7 +++++ .../AMDGPU/addrspacecast-initializer.ll | 27 +++++++++++++++++++ 30 files changed, 131 insertions(+), 78 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll create mode 100644 llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index d8af891a4b762..9e5c45084c599 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -222,7 +222,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { - return getTLI()->isNoopAddrSpaceCast(FromAS, ToAS); + return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS); } Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index a92761abd2f82..79ef2d06d38f4 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1759,17 +1759,10 @@ class TargetLoweringBase { return ""; } - /// Returns true if a cast between SrcAS and DestAS is a noop. - virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return false; - } - /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we /// are happy to sink it into basic blocks. A cast may be free, but not /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. - virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return isNoopAddrSpaceCast(SrcAS, DestAS); - } + virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const; /// Return true if the pointer arguments to CI should be aligned by aligning /// the object whose address is being passed. If so then MinSize is set to the diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index f59bc5e5bae59..2a422341fdc84 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -271,6 +271,11 @@ class TargetMachine { return Options.BBSectionsFuncListBuf.get(); } + /// Returns true if a cast between SrcAS and DestAS is a noop. + virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + return false; + } + /// Get a \c TargetIRAnalysis appropriate for the target. /// /// This is used to construct the new pass manager's target IR analysis pass, diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b50cae89b3494..78f18ab8aff6e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2295,6 +2295,16 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { } switch (CE->getOpcode()) { + case Instruction::AddrSpaceCast: { + const Constant *Op = CE->getOperand(0); + unsigned DstAS = CE->getType()->getPointerAddressSpace(); + unsigned SrcAS = Op->getType()->getPointerAddressSpace(); + if (TM.isNoopAddrSpaceCast(SrcAS, DstAS)) + return lowerConstant(Op); + + // Fallthrough to error. + LLVM_FALLTHROUGH; + } default: { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 42cffafbb1ce9..a85ac80ef3652 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4322,7 +4322,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned SrcAS = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); - if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS)) return matchAddr(AddrInst->getOperand(0), Depth); return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 87d2fa15d0377..7fdf8a82bae85 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6394,7 +6394,7 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, unsigned AS) { // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all // pointer operands can be losslessly bitcasted to pointers of address space 0 - if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) { + if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) { report_fatal_error("cannot lower memory intrinsic in address space " + Twine(AS)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d152cf8c4792a..c2a284af592d7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3425,7 +3425,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); - if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS)) N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); setValue(&I, N); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index db4fcf7494c7f..4562e1e018c0b 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -801,6 +801,11 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { } } +bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + return TM.isNoopAddrSpaceCast(SrcAS, DestAS); +} + void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { // If the command-line option was specified, ignore this request. if (!JumpIsExpensiveOverride.getNumOccurrences()) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 344ed96d79609..a793fb6bb4625 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -462,12 +462,6 @@ class AArch64TargetLowering : public TargetLowering { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; - } - /// This method returns a target specific FastISel object, or null if the /// target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/llvm/lib/Target/AArch64/AArch64TargetMachine.h index 7738a42293919..25e6261343179 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -57,6 +57,12 @@ class AArch64TargetMachine : public LLVMTargetMachine { SMDiagnostic &Error, SMRange &SourceRange) const override; + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Addrspacecasts are always noops. + return true; + } + private: bool isLittle; }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 88c79665be60d..251e12ee09f25 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -281,8 +281,6 @@ enum TargetIndex { }; } -} // End namespace llvm - /// OpenCL uses address spaces to differentiate between /// various memory regions on the hardware. On the CPU /// all of the address spaces point to the same memory, @@ -339,4 +337,17 @@ namespace AMDGPUAS { }; } +namespace AMDGPU { + +// FIXME: Missing constant_32bit +inline bool isFlatGlobalAddrSpace(unsigned AS) { + return AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS || + AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; +} +} + +} // End namespace llvm + #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4320151d5758e..c5d5f1675bc8d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1677,8 +1677,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( const AMDGPUTargetMachine &TM = static_cast(MF.getTarget()); - const GCNSubtarget &ST = MF.getSubtarget(); - if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) { + if (TM.isNoopAddrSpaceCast(SrcAS, DestAS)) { MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST)); return true; } @@ -2251,8 +2250,7 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg( Register CmpVal = MI.getOperand(2).getReg(); Register NewVal = MI.getOperand(3).getReg(); - assert(SITargetLowering::isFlatGlobalAddrSpace( - MRI.getType(PtrReg).getAddressSpace()) && + assert(AMDGPU::isFlatGlobalAddrSpace(MRI.getType(PtrReg).getAddressSpace()) && "this should not have been custom lowered"); LLT ValTy = MRI.getType(CmpVal); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 3fed45e84b8f7..9674474cd3cf9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3232,7 +3232,7 @@ AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI, LLT PtrTy = MRI.getType(PtrReg); unsigned Size = PtrTy.getSizeInBits(); if (Subtarget.useFlatForGlobal() || - !SITargetLowering::isFlatGlobalAddrSpace(PtrTy.getAddressSpace())) + !AMDGPU::isFlatGlobalAddrSpace(PtrTy.getAddressSpace())) return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); // If we're using MUBUF instructions for global memory, an SGPR base register @@ -3258,8 +3258,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); - if (PtrBank == &AMDGPU::SGPRRegBank && - SITargetLowering::isFlatGlobalAddrSpace(AS)) { + if (PtrBank == &AMDGPU::SGPRRegBank && AMDGPU::isFlatGlobalAddrSpace(AS)) { if (isScalarLoadLegal(MI)) { // We have a uniform instruction so we want to use an SMRD load ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b4b10835837cd..b49a417dfb09c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -526,6 +526,12 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl( return I.get(); } +bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + return AMDGPU::isFlatGlobalAddrSpace(SrcAS) && + AMDGPU::isFlatGlobalAddrSpace(DestAS); +} + TargetTransformInfo R600TargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(R600TTIImpl(this, F)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index e223fecc88195..aedcaf3fe4149 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -62,6 +62,8 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0; } + + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; }; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index ad15763e1a010..da00a993bd649 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -934,7 +934,10 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Type *MaskTy = MaskOp->getType(); bool DoTruncate = false; - if (!getTLI()->isNoopAddrSpaceCast(OldAS, NewAS)) { + + const GCNTargetMachine &TM = + static_cast(getTLI()->getTargetMachine()); + if (!TM.isNoopAddrSpaceCast(OldAS, NewAS)) { // All valid 64-bit to 32-bit casts work by chopping off the high // bits. Any masking only clearing the low bits will also apply in the new // address space. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4ea44373b976a..59f45f9daf428 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1478,11 +1478,6 @@ EVT SITargetLowering::getOptimalMemOpType( return MVT::Other; } -bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, - unsigned DestAS) const { - return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); -} - bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const { const MemSDNode *MemNode = cast(N); const Value *Ptr = MemNode->getMemOperand()->getValue(); @@ -1497,7 +1492,9 @@ bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS, if (SrcAS == AMDGPUAS::FLAT_ADDRESS) return true; - return isNoopAddrSpaceCast(SrcAS, DestAS); + const GCNTargetMachine &TM = + static_cast(getTargetMachine()); + return TM.isNoopAddrSpaceCast(SrcAS, DestAS); } bool SITargetLowering::isMemOpUniform(const SDNode *N) const { @@ -2285,8 +2282,10 @@ SDValue SITargetLowering::LowerFormalArguments( if (Arg.Flags.isByRef()) { SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, Chain, Offset); - if (!isNoopAddrSpaceCast(AMDGPUAS::CONSTANT_ADDRESS, - Arg.Flags.getPointerAddrSpace())) { + const GCNTargetMachine &TM = + static_cast(getTargetMachine()); + if (!TM.isNoopAddrSpaceCast(AMDGPUAS::CONSTANT_ADDRESS, + Arg.Flags.getPointerAddrSpace())) { Ptr = DAG.getAddrSpaceCast(DL, VT, Ptr, AMDGPUAS::CONSTANT_ADDRESS, Arg.Flags.getPointerAddrSpace()); } @@ -8506,7 +8505,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co unsigned AS = AtomicNode->getAddressSpace(); // No custom lowering required for local address space - if (!isFlatGlobalAddrSpace(AS)) + if (!AMDGPU::isFlatGlobalAddrSpace(AS)) return Op; // Non-local address space requires custom lowering for atomic compare diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index f4c0764640575..19dea37ad410d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -275,15 +275,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { AS == AMDGPUAS::PRIVATE_ADDRESS; } - // FIXME: Missing constant_32bit - static bool isFlatGlobalAddrSpace(unsigned AS) { - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS || - AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; - } - - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; TargetLoweringBase::LegalizeTypeAction diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 1428600ca5240..f5bb097062aff 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -528,12 +528,6 @@ class VectorType; const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent = false) const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; - } - bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const override; diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.h b/llvm/lib/Target/ARM/ARMTargetMachine.h index ac55d2bdcc2b3..8428092bf1794 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -72,6 +72,12 @@ class ARMBaseTargetMachine : public LLVMTargetMachine { } bool targetSchedulesPostRAScheduling() const override { return true; }; + + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Addrspacecasts are always noops. + return true; + } }; /// ARM/Thumb little endian target machine. diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 16b4d51d3ca63..0c5df4ba1bade 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -365,14 +365,6 @@ class TargetRegisterClass; return ABI.IsN64() ? Mips::A1_64 : Mips::A1; } - /// Returns true if a cast between SrcAS and DestAS is a noop. - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Mips doesn't have any special address spaces so we just reserve - // the first 256 for software use (e.g. OpenCL) and treat casts - // between them as noops. - return SrcAS < 256 && DestAS < 256; - } - bool isJumpTableRelative() const override { return getTargetMachine().isPositionIndependent(); } diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.h b/llvm/lib/Target/Mips/MipsTargetMachine.h index 25300504a02dc..e0de924be4fd1 100644 --- a/llvm/lib/Target/Mips/MipsTargetMachine.h +++ b/llvm/lib/Target/Mips/MipsTargetMachine.h @@ -63,6 +63,14 @@ class MipsTargetMachine : public LLVMTargetMachine { return TLOF.get(); } + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Mips doesn't have any special address spaces so we just reserve + // the first 256 for software use (e.g. OpenCL) and treat casts + // between them as noops. + return SrcAS < 256 && DestAS < 256; + } + bool isLittleEndian() const { return isLittle; } const MipsABIInfo &getABI() const { return ABI; } }; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 8cc42226d7f0b..80588a1bd4019 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1022,11 +1022,6 @@ namespace llvm { } }; - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; - } - bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h index fd1d14ae32d4a..21faa4e710e3e 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -58,6 +58,11 @@ class PPCTargetMachine final : public LLVMTargetMachine { const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); }; + + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Addrspacecasts are always noops. + return true; + } }; } // end namespace llvm diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 493d934e5381a..cb1067a06239e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2537,17 +2537,6 @@ Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const { return TargetLowering::getSafeStackPointerLocation(IRB); } -bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, - unsigned DestAS) const { - assert(SrcAS != DestAS && "Expected different address spaces!"); - - const TargetMachine &TM = getTargetMachine(); - if (TM.getPointerSize(SrcAS) != TM.getPointerSize(DestAS)) - return false; - - return SrcAS < 256 && DestAS < 256; -} - //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 7f3dc90a2d735..2c22a62fb506f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1349,8 +1349,6 @@ namespace llvm { Align Alignment, SelectionDAG &DAG) const; - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; - /// Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index f660b99a4511d..685a8e8fa8774 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -311,6 +311,14 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } +bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + assert(SrcAS != DestAS && "Expected different address spaces!"); + if (getPointerSize(SrcAS) != getPointerSize(DestAS)) + return false; + return SrcAS < 256 && DestAS < 256; +} + //===----------------------------------------------------------------------===// // X86 TTI query. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86TargetMachine.h b/llvm/lib/Target/X86/X86TargetMachine.h index 8d98474a39c06..69d7e48b89778 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.h +++ b/llvm/lib/Target/X86/X86TargetMachine.h @@ -54,6 +54,8 @@ class X86TargetMachine final : public LLVMTargetMachine { } bool isJIT() const { return IsJIT; } + + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll new file mode 100644 index 0000000000000..223efcc738188 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll @@ -0,0 +1,7 @@ +; RUN: not --crash llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*) + +@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 + +@gv_flatptr_from_lds = unnamed_addr addrspace(2) global i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll new file mode 100644 index 0000000000000..4f5082f9bd08a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +; CHECK: global.arr: +; CHECK: .zero 1024 +; CHECK: .size global.arr, 1024 + +; CHECK: gv_flatptr_from_global: +; CHECK: .quad global.arr+32 +; CHECK: .size gv_flatptr_from_global, 8 + +; CHECK: gv_global_ptr: +; CHECK: .quad global.arr+32 +; CHECK: .size gv_global_ptr, 8 + +; CHECK: gv_flatptr_from_constant: +; CHECK: .quad constant.arr+32 +; CHECK: .size gv_flatptr_from_constant, 8 + +@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 +@constant.arr = external unnamed_addr addrspace(4) global [256 x i32], align 4 + +@gv_flatptr_from_global = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4 + + +@gv_global_ptr = unnamed_addr addrspace(4) global i32 addrspace(1)* getelementptr ([256 x i32], [256 x i32] addrspace(1)* @global.arr, i64 0, i64 8), align 4 + +@gv_flatptr_from_constant = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(4)* @constant.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4 From dd5ea5674b86bade4904fab4c66a1156b3df033e Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Fri, 17 Jul 2020 09:15:21 +0100 Subject: [PATCH 015/600] [flang] Make interactive behaviour more obvious When flang is invoked with no files it waits for input on stdin. Make it print a message saying this to prevent the user being surprised. Differential Revision: https://reviews.llvm.org/D84855 --- flang/test/Driver/Inputs/hello.f90 | 3 +++ flang/test/Driver/no_files.f90 | 10 ++++++++++ flang/tools/f18/f18.cpp | 2 ++ 3 files changed, 15 insertions(+) create mode 100644 flang/test/Driver/Inputs/hello.f90 create mode 100644 flang/test/Driver/no_files.f90 diff --git a/flang/test/Driver/Inputs/hello.f90 b/flang/test/Driver/Inputs/hello.f90 new file mode 100644 index 0000000000000..d0c7eb94f53c8 --- /dev/null +++ b/flang/test/Driver/Inputs/hello.f90 @@ -0,0 +1,3 @@ +program hello + write (*,*), "hello world" +end program hello diff --git a/flang/test/Driver/no_files.f90 b/flang/test/Driver/no_files.f90 new file mode 100644 index 0000000000000..718985dce4ca5 --- /dev/null +++ b/flang/test/Driver/no_files.f90 @@ -0,0 +1,10 @@ +! RUN: %f18 < %S/Inputs/hello.f90 | FileCheck %s + + +! CHECK: Enter Fortran source +! CHECK: Use EOF character (^D) to end file + +! CHECK: Parse tree comprises {{.*}} objects and occupies {{.*}} total bytes +! CHECK: PROGRAM hello +! CHECK: WRITE (*, *) "hello world" +! CHECK: END PROGRAM hello diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp index dc5ddd12295f6..5df78a467f2b1 100644 --- a/flang/tools/f18/f18.cpp +++ b/flang/tools/f18/f18.cpp @@ -686,6 +686,8 @@ int main(int argc, char *const argv[]) { if (!anyFiles) { driver.measureTree = true; driver.dumpUnparse = true; + llvm::outs() << "Enter Fortran source\n" + << "Use EOF character (^D) to end file\n"; CompileFortran("-", options, driver, defaultKinds); return exitStatus; } From b068d19a151d9d3a73b0265df27836d9fd0ad1e3 Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Thu, 25 Jun 2020 16:01:56 +0100 Subject: [PATCH 016/600] [flang] Add details to --help screen on default behaviour Add a usage string and a defaults section that clarifies: * If no input files are given, f18 reads from stdin * If no input files are given, f18 dumps the parse tree. * The default behaviour is to exec F18_FC. * The fefault F18_FC setting is 'gfortran' Adds a simple regression test which tests the top and tail of the help screen and the exit status. Depends on D84855 Differential Revision: https://reviews.llvm.org/D84856 --- flang/test/Driver/help.f90 | 9 +++++++++ flang/tools/f18/f18.cpp | 16 +++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 flang/test/Driver/help.f90 diff --git a/flang/test/Driver/help.f90 b/flang/test/Driver/help.f90 new file mode 100644 index 0000000000000..66dd14aa5a86a --- /dev/null +++ b/flang/test/Driver/help.f90 @@ -0,0 +1,9 @@ +! RUN: %f18 -help 2>&1 | FileCheck %s +! RUN: %f18 --help 2>&1 | FileCheck %s +! RUN: %f18 -? 2>&1 | FileCheck %s + +! CHECK: f18: LLVM Fortran compiler + +! CHECK: -help print this again +! CHECK: Unrecognised options are passed through to the external compiler +! CHECK: set by F18_FC (see defaults). diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp index 5df78a467f2b1..02bd5b26805ab 100644 --- a/flang/tools/f18/f18.cpp +++ b/flang/tools/f18/f18.cpp @@ -605,6 +605,19 @@ int main(int argc, char *const argv[]) { driver.byteswapio = true; // TODO: Pass to lowering, generate call } else if (arg == "-help" || arg == "--help" || arg == "-?") { llvm::errs() + << "f18: LLVM Fortran compiler\n" + << "\n" + << "Usage: f18 [options] \n" + << "\n" + << "Defaults:\n" + << " When invoked with input files, and no options to tell\n" + << " it otherwise, f18 will unparse its input and pass that on to an\n" + << " external compiler to continue the compilation.\n" + << " The external compiler is specified by the F18_FC environment\n" + << " variable. The default is 'gfortran'.\n" + << " If invoked with no input files, f18 reads source code from\n" + << " stdin and runs with -fdebug-measure-parse-tree -funparse.\n" + << "\n" << "f18 options:\n" << " -Mfixed | -Mfree | -ffixed-form | -ffree-form force the " "source form\n" @@ -638,7 +651,8 @@ int main(int argc, char *const argv[]) { << " -fget-symbols-sources\n" << " -v -c -o -I -D -U have their usual meanings\n" << " -help print this again\n" - << "Other options are passed through to the compiler.\n"; + << "Unrecognised options are passed through to the external compiler\n" + << "set by F18_FC (see defaults).\n"; return exitStatus; } else if (arg == "-V") { llvm::errs() << "\nf18 compiler (under development)\n"; From 30e45f339eb0841dc7fe27fad119cc5db0c052f3 Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Sat, 18 Jul 2020 14:22:18 +0100 Subject: [PATCH 017/600] [flang] Add -h as a synonym for help As expected by user in http://lists.llvm.org/pipermail/flang-dev/2020-June/000404.html Depends on D84856 Differential Revision: https://reviews.llvm.org/D84857 --- flang/test/Driver/help.f90 | 1 + flang/tools/f18/f18.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/test/Driver/help.f90 b/flang/test/Driver/help.f90 index 66dd14aa5a86a..d6162954a8723 100644 --- a/flang/test/Driver/help.f90 +++ b/flang/test/Driver/help.f90 @@ -1,3 +1,4 @@ +! RUN: %f18 -h 2>&1 | FileCheck %s ! RUN: %f18 -help 2>&1 | FileCheck %s ! RUN: %f18 --help 2>&1 | FileCheck %s ! RUN: %f18 -? 2>&1 | FileCheck %s diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp index 02bd5b26805ab..338d04e7e8f5c 100644 --- a/flang/tools/f18/f18.cpp +++ b/flang/tools/f18/f18.cpp @@ -603,7 +603,7 @@ int main(int argc, char *const argv[]) { driver.getSymbolsSources = true; } else if (arg == "-byteswapio") { driver.byteswapio = true; // TODO: Pass to lowering, generate call - } else if (arg == "-help" || arg == "--help" || arg == "-?") { + } else if (arg == "-h" || arg == "-help" || arg == "--help" || arg == "-?") { llvm::errs() << "f18: LLVM Fortran compiler\n" << "\n" From cfb955ac370cb724c51423a05694aaf5b70903a4 Mon Sep 17 00:00:00 2001 From: Thomas Raoux Date: Fri, 31 Jul 2020 08:02:21 -0700 Subject: [PATCH 018/600] [mlir][spirv] Relax restriction on pointer type for CooperativeMatrix load/store This change allow CooperativeMatrix Load/Store operations to use pointer type that may not match the matrix element type. This allow us to declare buffer with a larger type size than the matrix element type. This follows SPIR-V spec and this is needed to be able to use cooperative matrix in combination with shared local memory efficiently. Differential Revision: https://reviews.llvm.org/D84993 --- .../SPIRV/SPIRVCooperativeMatrixOps.td | 16 ++--- mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 61 ++++++++----------- .../Serialization/cooperative-matrix.mlir | 16 ++--- .../Dialect/SPIRV/cooperative-matrix.mlir | 38 +++++++++--- 4 files changed, 74 insertions(+), 57 deletions(-) diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVCooperativeMatrixOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVCooperativeMatrixOps.td index 9c3462a2e5bf1..720cfd697c24e 100644 --- a/mlir/include/mlir/Dialect/SPIRV/SPIRVCooperativeMatrixOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVCooperativeMatrixOps.td @@ -101,16 +101,17 @@ def SPV_CooperativeMatrixLoadNVOp : SPV_Op<"CooperativeMatrixLoadNV", []> { ``` {.ebnf} cooperative-matrixload-op ::= ssa-id `=` `spv.CooperativeMatrixLoadNV` - storage-class ssa-use `,` ssa-use `,` ssa-use + ssa-use `,` ssa-use `,` ssa-use (`[` memory-access `]`)? ` : ` + pointer-type `as` cooperative-matrix-type ``` For example: ``` - %0 = spv.CooperativeMatrixLoadNV "StorageBuffer" %ptr, %stride, %colMajor - : !spv.coopmatrix + %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %colMajor + : !spv.ptr as !spv.coopmatrix ``` }]; @@ -243,16 +244,17 @@ def SPV_CooperativeMatrixStoreNVOp : SPV_Op<"CooperativeMatrixStoreNV", []> { ``` {.ebnf} coop-matrix-store-op ::= `spv.CooperativeMatrixStoreNV ` - storage-class ssa-use `, ` ssa-use `, ` ssa-use `, ` ssa-use `, ` - (`[` memory-access `]`)? `:` spirv-element-type + ssa-use `, ` ssa-use `, ` + (`[` memory-access `]`)? `:` + pointer-type `,` spirv-element-type ``` For example: ``` - spv.CooperativeMatrixStoreNV "StorageBuffer" %arg0, %arg2, %arg1, %arg3 : - !spv.coopmatrix + spv.CooperativeMatrixStoreNV %arg0, %arg2, %arg1, %arg3 : + !spv.ptr, !spv.coopmatrix ``` }]; diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index b0235d419ebe4..bac65a02f63de 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -2793,21 +2793,16 @@ static LogicalResult verify(spirv::VariableOp varOp) { static ParseResult parseCooperativeMatrixLoadNVOp(OpAsmParser &parser, OperationState &state) { - spirv::StorageClass storageClass; SmallVector operandInfo; Type strideType = parser.getBuilder().getIntegerType(32); Type columnMajorType = parser.getBuilder().getIntegerType(1); + Type ptrType; Type elementType; - if (parseEnumStrAttr(storageClass, parser) || - parser.parseOperandList(operandInfo, 3) || + if (parser.parseOperandList(operandInfo, 3) || parseMemoryAccessAttributes(parser, state) || parser.parseColon() || - parser.parseType(elementType)) { + parser.parseType(ptrType) || parser.parseKeywordType("as", elementType)) { return failure(); } - - auto ptrType = spirv::PointerType::get( - elementType.cast().getElementType(), - storageClass); SmallVector OperandType = {ptrType, strideType, columnMajorType}; if (parser.resolveOperands(operandInfo, OperandType, parser.getNameLoc(), state.operands)) { @@ -2819,25 +2814,30 @@ static ParseResult parseCooperativeMatrixLoadNVOp(OpAsmParser &parser, } static void print(spirv::CooperativeMatrixLoadNVOp M, OpAsmPrinter &printer) { - StringRef sc = stringifyStorageClass( - M.pointer().getType().cast().getStorageClass()); - printer << spirv::CooperativeMatrixLoadNVOp::getOperationName() << " \"" << sc - << "\" " << M.pointer() << ", " << M.stride() << ", " - << M.columnmajor(); + printer << spirv::CooperativeMatrixLoadNVOp::getOperationName() << " " + << M.pointer() << ", " << M.stride() << ", " << M.columnmajor(); // Print optional memory access attribute. if (auto memAccess = M.memory_access()) printer << " [\"" << stringifyMemoryAccess(*memAccess) << "\"]"; - printer << " : " << M.getType(); + printer << " : " << M.pointer().getType() << " as " << M.getType(); } static LogicalResult verifyPointerAndCoopMatrixType(Operation *op, Type pointer, Type coopMatrix) { - if (pointer.cast().getPointeeType() != - coopMatrix.cast().getElementType()) + Type pointeeType = pointer.cast().getPointeeType(); + if (!pointeeType.isa() && !pointeeType.isa()) return op->emitError( - "expected the same type for pointer and the cooperative matrix" - "element, bu provided ") - << pointer << " and " << coopMatrix; + "Pointer must point to a scalar or vector type but provided ") + << pointeeType; + spirv::StorageClass storage = + pointer.cast().getStorageClass(); + if (storage != spirv::StorageClass::Workgroup && + storage != spirv::StorageClass::StorageBuffer && + storage != spirv::StorageClass::PhysicalStorageBuffer) + return op->emitError( + "Pointer storage class must be Workgroup, StorageBuffer or " + "PhysicalStorageBufferEXT but provided ") + << stringifyStorageClass(storage); return success(); } @@ -2847,21 +2847,17 @@ static LogicalResult verifyPointerAndCoopMatrixType(Operation *op, Type pointer, static ParseResult parseCooperativeMatrixStoreNVOp(OpAsmParser &parser, OperationState &state) { - spirv::StorageClass storageClass; SmallVector operandInfo; Type strideType = parser.getBuilder().getIntegerType(32); Type columnMajorType = parser.getBuilder().getIntegerType(1); + Type ptrType; Type elementType; - if (parseEnumStrAttr(storageClass, parser) || - parser.parseOperandList(operandInfo, 4) || + if (parser.parseOperandList(operandInfo, 4) || parseMemoryAccessAttributes(parser, state) || parser.parseColon() || + parser.parseType(ptrType) || parser.parseComma() || parser.parseType(elementType)) { return failure(); } - - auto ptrType = spirv::PointerType::get( - elementType.cast().getElementType(), - storageClass); SmallVector OperandType = {ptrType, elementType, strideType, columnMajorType}; if (parser.resolveOperands(operandInfo, OperandType, parser.getNameLoc(), @@ -2874,17 +2870,14 @@ static ParseResult parseCooperativeMatrixStoreNVOp(OpAsmParser &parser, static void print(spirv::CooperativeMatrixStoreNVOp coopMatrix, OpAsmPrinter &printer) { - StringRef sc = stringifyStorageClass(coopMatrix.pointer() - .getType() - .cast() - .getStorageClass()); - printer << spirv::CooperativeMatrixStoreNVOp::getOperationName() << " \"" - << sc << "\" " << coopMatrix.pointer() << ", " << coopMatrix.object() - << ", " << coopMatrix.stride() << ", " << coopMatrix.columnmajor(); + printer << spirv::CooperativeMatrixStoreNVOp::getOperationName() << " " + << coopMatrix.pointer() << ", " << coopMatrix.object() << ", " + << coopMatrix.stride() << ", " << coopMatrix.columnmajor(); // Print optional memory access attribute. if (auto memAccess = coopMatrix.memory_access()) printer << " [\"" << stringifyMemoryAccess(*memAccess) << "\"]"; - printer << " : " << coopMatrix.getOperand(1).getType(); + printer << " : " << coopMatrix.pointer().getType() << ", " + << coopMatrix.getOperand(1).getType(); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SPIRV/Serialization/cooperative-matrix.mlir b/mlir/test/Dialect/SPIRV/Serialization/cooperative-matrix.mlir index ad913dfb1624c..3f8c4bff47387 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/cooperative-matrix.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/cooperative-matrix.mlir @@ -3,29 +3,29 @@ spv.module Logical GLSL450 requires #spv.vce { // CHECK-LABEL: @cooperative_matrix_load spv.func @cooperative_matrix_load(%ptr : !spv.ptr, %stride : i32, %b : i1) "None" { - // CHECK: {{%.*}} = spv.CooperativeMatrixLoadNV "StorageBuffer" {{%.*}}, {{%.*}}, {{%.*}} : !spv.coopmatrix<16x8xi32, Workgroup> - %0 = spv.CooperativeMatrixLoadNV "StorageBuffer" %ptr, %stride, %b : !spv.coopmatrix<16x8xi32, Workgroup> + // CHECK: {{%.*}} = spv.CooperativeMatrixLoadNV {{%.*}}, {{%.*}}, {{%.*}} : !spv.ptr as !spv.coopmatrix<16x8xi32, Workgroup> + %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %b : !spv.ptr as !spv.coopmatrix<16x8xi32, Workgroup> spv.Return } // CHECK-LABEL: @cooperative_matrix_load_memaccess spv.func @cooperative_matrix_load_memaccess(%ptr : !spv.ptr, %stride : i32, %b : i1) "None" { - // CHECK: {{%.*}} = spv.CooperativeMatrixLoadNV "StorageBuffer" {{%.*}}, {{%.*}}, {{%.*}} ["Volatile"] : !spv.coopmatrix<8x16xi32, Subgroup> - %0 = spv.CooperativeMatrixLoadNV "StorageBuffer" %ptr, %stride, %b ["Volatile"] : !spv.coopmatrix<8x16xi32, Subgroup> + // CHECK: {{%.*}} = spv.CooperativeMatrixLoadNV {{%.*}}, {{%.*}}, {{%.*}} ["Volatile"] : !spv.ptr as !spv.coopmatrix<8x16xi32, Subgroup> + %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %b ["Volatile"] : !spv.ptr as !spv.coopmatrix<8x16xi32, Subgroup> spv.Return } // CHECK-LABEL: @cooperative_matrix_store spv.func @cooperative_matrix_store(%ptr : !spv.ptr, %stride : i32, %m : !spv.coopmatrix<16x8xi32, Workgroup>, %b : i1) "None" { - // CHECK: spv.CooperativeMatrixStoreNV "StorageBuffer" {{%.*}}, {{%.*}}, {{%.*}} : !spv.coopmatrix<16x8xi32, Workgroup> - spv.CooperativeMatrixStoreNV "StorageBuffer" %ptr, %m, %stride, %b : !spv.coopmatrix<16x8xi32, Workgroup> + // CHECK: spv.CooperativeMatrixStoreNV {{%.*}}, {{%.*}}, {{%.*}} : !spv.ptr, !spv.coopmatrix<16x8xi32, Workgroup> + spv.CooperativeMatrixStoreNV %ptr, %m, %stride, %b : !spv.ptr, !spv.coopmatrix<16x8xi32, Workgroup> spv.Return } // CHECK-LABEL: @cooperative_matrix_store_memaccess spv.func @cooperative_matrix_store_memaccess(%ptr : !spv.ptr, %m : !spv.coopmatrix<8x16xi32, Subgroup>, %stride : i32, %b : i1) "None" { - // CHECK: spv.CooperativeMatrixStoreNV "StorageBuffer" {{%.*}}, {{%.*}}, {{%.*}} ["Volatile"] : !spv.coopmatrix<8x16xi32, Subgroup> - spv.CooperativeMatrixStoreNV "StorageBuffer" %ptr, %m, %stride, %b ["Volatile"] : !spv.coopmatrix<8x16xi32, Subgroup> + // CHECK: spv.CooperativeMatrixStoreNV {{%.*}}, {{%.*}}, {{%.*}} ["Volatile"] : !spv.ptr, !spv.coopmatrix<8x16xi32, Subgroup> + spv.CooperativeMatrixStoreNV %ptr, %m, %stride, %b ["Volatile"] : !spv.ptr, !spv.coopmatrix<8x16xi32, Subgroup> spv.Return } diff --git a/mlir/test/Dialect/SPIRV/cooperative-matrix.mlir b/mlir/test/Dialect/SPIRV/cooperative-matrix.mlir index 523bd6bfb0309..f0bb50d10f58e 100644 --- a/mlir/test/Dialect/SPIRV/cooperative-matrix.mlir +++ b/mlir/test/Dialect/SPIRV/cooperative-matrix.mlir @@ -2,30 +2,37 @@ // CHECK-LABEL: @cooperative_matrix_load spv.func @cooperative_matrix_load(%ptr : !spv.ptr, %stride : i32, %b : i1) "None" { - // CHECK: {{%.*}} = spv.CooperativeMatrixLoadNV "StorageBuffer" {{%.*}}, {{%.*}}, {{%.*}} : !spv.coopmatrix<16x8xi32, Workgroup> - %0 = spv.CooperativeMatrixLoadNV "StorageBuffer" %ptr, %stride, %b : !spv.coopmatrix<16x8xi32, Workgroup> + // CHECK: {{%.*}} = spv.CooperativeMatrixLoadNV {{%.*}}, {{%.*}}, {{%.*}} : !spv.ptr as !spv.coopmatrix<16x8xi32, Workgroup> + %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %b : !spv.ptr as !spv.coopmatrix<16x8xi32, Workgroup> spv.Return } // ----- // CHECK-LABEL: @cooperative_matrix_load_memaccess spv.func @cooperative_matrix_load_memaccess(%ptr : !spv.ptr, %stride : i32, %b : i1) "None" { - // CHECK: {{%.*}} = spv.CooperativeMatrixLoadNV "StorageBuffer" {{%.*}}, {{%.*}}, {{%.*}} ["Volatile"] : !spv.coopmatrix<8x16xi32, Subgroup> - %0 = spv.CooperativeMatrixLoadNV "StorageBuffer" %ptr, %stride, %b ["Volatile"] : !spv.coopmatrix<8x16xi32, Subgroup> + // CHECK: {{%.*}} = spv.CooperativeMatrixLoadNV {{%.*}}, {{%.*}}, {{%.*}} ["Volatile"] : !spv.ptr as !spv.coopmatrix<8x16xi32, Subgroup> + %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %b ["Volatile"] : !spv.ptr as !spv.coopmatrix<8x16xi32, Subgroup> + spv.Return +} + +// CHECK-LABEL: @cooperative_matrix_load_diff_ptr_type +spv.func @cooperative_matrix_load_diff_ptr_type(%ptr : !spv.ptr, StorageBuffer>, %stride : i32, %b : i1) "None" { + // CHECK: {{%.*}} = spv.CooperativeMatrixLoadNV {{%.*}}, {{%.*}}, {{%.*}} ["Volatile"] : !spv.ptr, StorageBuffer> as !spv.coopmatrix<8x16xi32, Subgroup> + %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %b ["Volatile"] : !spv.ptr, StorageBuffer> as !spv.coopmatrix<8x16xi32, Subgroup> spv.Return } // CHECK-LABEL: @cooperative_matrix_store spv.func @cooperative_matrix_store(%ptr : !spv.ptr, %stride : i32, %m : !spv.coopmatrix<8x16xi32, Workgroup>, %b : i1) "None" { - // CHECK: spv.CooperativeMatrixStoreNV "StorageBuffer" {{%.*}}, {{%.*}}, {{%.*}} : !spv.coopmatrix<8x16xi32, Workgroup> - spv.CooperativeMatrixStoreNV "StorageBuffer" %ptr, %m, %stride, %b : !spv.coopmatrix<8x16xi32, Workgroup> + // CHECK: spv.CooperativeMatrixStoreNV {{%.*}}, {{%.*}}, {{%.*}} : !spv.ptr, !spv.coopmatrix<8x16xi32, Workgroup> + spv.CooperativeMatrixStoreNV %ptr, %m, %stride, %b : !spv.ptr, !spv.coopmatrix<8x16xi32, Workgroup> spv.Return } // CHECK-LABEL: @cooperative_matrix_store_memaccess spv.func @cooperative_matrix_store_memaccess(%ptr : !spv.ptr, %m : !spv.coopmatrix<8x16xi32, Subgroup>, %stride : i32, %b : i1) "None" { - // CHECK: spv.CooperativeMatrixStoreNV "StorageBuffer" {{%.*}}, {{%.*}}, {{%.*}} ["Volatile"] : !spv.coopmatrix<8x16xi32, Subgroup> - spv.CooperativeMatrixStoreNV "StorageBuffer" %ptr, %m, %stride, %b ["Volatile"] : !spv.coopmatrix<8x16xi32, Subgroup> + // CHECK: spv.CooperativeMatrixStoreNV {{%.*}}, {{%.*}}, {{%.*}} ["Volatile"] : !spv.ptr, !spv.coopmatrix<8x16xi32, Subgroup> + spv.CooperativeMatrixStoreNV %ptr, %m, %stride, %b ["Volatile"] : !spv.ptr, !spv.coopmatrix<8x16xi32, Subgroup> spv.Return } @@ -134,3 +141,18 @@ spv.func @cooperative_matrix_muladd(%a : !spv.coopmatrix<8x16xf32, Subgroup>, %b spv.Return } +// ----- + +spv.func @cooperative_matrix_load_memaccess(%ptr : !spv.ptr, StorageBuffer>, %stride : i32, %b : i1) "None" { + // expected-error @+1 {{Pointer must point to a scalar or vector type}} + %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %b : !spv.ptr, StorageBuffer> as !spv.coopmatrix<8x16xi32, Subgroup> + spv.Return +} + +// ----- + +spv.func @cooperative_matrix_load_memaccess(%ptr : !spv.ptr, %stride : i32, %b : i1) "None" { + // expected-error @+1 {{Pointer storage class must be Workgroup, StorageBuffer or PhysicalStorageBufferEXT}} + %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %b : !spv.ptr as !spv.coopmatrix<8x16xi32, Subgroup> + spv.Return +} From d275da17e4f0a17615b24c352aab0d34f647bfa7 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 31 Jul 2020 11:18:01 -0400 Subject: [PATCH 019/600] [libc++] Fix eager generator expression in DefineLinkerScript As explained in https://gitlab.kitware.com/cmake/cmake/-/issues/21045, both branches of an $ generator expression are evaluated eagerly by CMake. As a result, if the non-selected branch contains an invalid generator expression (such as getting the OUTPUT_NAME property of a non-existent target), a hard error will occur. This failed builds using the cxxrt ABI library, which doesn't create a CMake target currently. --- libcxx/cmake/Modules/DefineLinkerScript.cmake | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/libcxx/cmake/Modules/DefineLinkerScript.cmake b/libcxx/cmake/Modules/DefineLinkerScript.cmake index 41426bf067149..be7f026af7e87 100644 --- a/libcxx/cmake/Modules/DefineLinkerScript.cmake +++ b/libcxx/cmake/Modules/DefineLinkerScript.cmake @@ -34,7 +34,13 @@ function(define_linker_script target) if ("${lib}" STREQUAL "cxx-headers") continue() endif() - set(libname "$,$,${lib}>") + # If ${lib} is not a target, we use a dummy target which we know will + # have an OUTPUT_NAME property so that CMake doesn't fail when evaluating + # the non-selected branch of the `IF`. It doesn't matter what it evaluates + # to because it's not selected, but it must not cause an error. + # See https://gitlab.kitware.com/cmake/cmake/-/issues/21045. + set(output_name_tgt "$,${lib},${target}>") + set(libname "$,$,${lib}>") list(APPEND link_libraries "${CMAKE_LINK_LIBRARY_FLAG}${libname}") endforeach() endif() From 9853786ce39b9510eeb2688baaef7a364d58e113 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 31 Jul 2020 17:22:49 +0200 Subject: [PATCH 020/600] Add flang to export.sh to it gets source tarballs in releases --- llvm/utils/release/export.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/release/export.sh b/llvm/utils/release/export.sh index 02a77afd0533f..c3277de38b53a 100755 --- a/llvm/utils/release/export.sh +++ b/llvm/utils/release/export.sh @@ -13,7 +13,7 @@ set -e -projects="llvm clang test-suite compiler-rt libcxx libcxxabi clang-tools-extra polly lldb lld openmp libunwind" +projects="llvm clang test-suite compiler-rt libcxx libcxxabi clang-tools-extra polly lldb lld openmp libunwind flang" release="" rc="" From c6f08b14d4895928232fac38d266bb53aafa6b29 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 31 Jul 2020 17:27:44 +0200 Subject: [PATCH 021/600] Hide some internal symbols. NFC. --- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 8 ++++---- llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp | 7 +++---- llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 2 ++ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 58f251fec5e98..372c5154acef0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -723,10 +723,10 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { /// /// Note: This only supports non-TFE/LWE image intrinsic calls; those have /// struct returns. -Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, - IntrinsicInst &II, - APInt DemandedElts, - int DMaskIdx = -1) { +static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, + IntrinsicInst &II, + APInt DemandedElts, + int DMaskIdx = -1) { auto *IIVTy = cast(II.getType()); unsigned VWidth = IIVTy->getNumElements(); diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index 2746d4d456e4f..d037fe7537d20 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -172,7 +172,7 @@ bool MVEGatherScatterLowering::isLegalTypeAndAlignment(unsigned NumElements, return false; } -bool checkOffsetSize(Value *Offsets, unsigned TargetElemCount) { +static bool checkOffsetSize(Value *Offsets, unsigned TargetElemCount) { // Offsets that are not of type are sign extended by the // getelementptr instruction, and MVE gathers/scatters treat the offset as // unsigned. Thus, if the element size is smaller than 32, we can only allow @@ -1030,9 +1030,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, return true; } -Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP, - IRBuilder<> &Builder) { - +static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP, + IRBuilder<> &Builder) { // Splat the non-vector value to a vector of the given type - if the value is // a constant (and its value isn't too big), we can even use this opportunity // to scale it to the size of the vector elements diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 0af71e9b72b83..523e7b19ecb1d 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1859,6 +1859,7 @@ void DFSanVisitor::visitPHINode(PHINode &PN) { DFSF.setShadow(&PN, ShadowPN); } +namespace { class DataFlowSanitizerLegacyPass : public ModulePass { private: std::vector ABIListFiles; @@ -1874,6 +1875,7 @@ class DataFlowSanitizerLegacyPass : public ModulePass { return DataFlowSanitizer(ABIListFiles).runImpl(M); } }; +} // namespace char DataFlowSanitizerLegacyPass::ID; From 7ad6ea520fe49e9320bd15a4daf88e36259efedc Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Fri, 31 Jul 2020 23:20:44 +0800 Subject: [PATCH 022/600] [DWARFYAML][debug_aranges] Use yaml::Hex64 rather than uint64_t as length. NFC. It's better to use yaml::Hex64 as length in the tuples of the address range table. --- llvm/include/llvm/ObjectYAML/DWARFYAML.h | 2 +- llvm/test/ObjectYAML/MachO/DWARF-debug_aranges.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 5737ceccc0a40..127a529139786 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -61,7 +61,7 @@ struct Abbrev { struct ARangeDescriptor { llvm::yaml::Hex64 Address; - uint64_t Length; + yaml::Hex64 Length; }; struct ARange { diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_aranges.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_aranges.yaml index 1e9b880c3cd3d..762fcbfa2d22c 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_aranges.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_aranges.yaml @@ -333,7 +333,7 @@ DWARF: # CHECK-NEXT: AddressSize: 0x08 # CHECK-NEXT: Descriptors: # CHECK-NEXT: - Address: 0x0000000100000F50 -# CHECK-NEXT: Length: 52 +# CHECK-NEXT: Length: 0x0000000000000034 ## b) Test that if the "debug_aranges" entry is empty, yaml2macho will only emit the ## section header. From c4e574323210feda1a3988e85fdd93b90a63d1b1 Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Mon, 27 Jul 2020 23:45:54 -0500 Subject: [PATCH 023/600] [PowerPC] Implement low-order Vector Modulus Builtins, and add Vector Multiply/Divide/Modulus Builtins Tests Power10 introduces new instructions for vector multiply, divide and modulus. These instructions can be exploited by the builtin functions: vec_mul, vec_div, and vec_mod, respectively. This patch aims adds the function prototype, vec_mod, as vec_mul and vec_div been previously implemented in altivec.h. This patch also adds the following front end tests: vec_mul for v2i64 vec_div for v4i32 and v2i64 vec_mod for v4i32 and v2i64 Differential Revision: https://reviews.llvm.org/D82576 --- clang/lib/Headers/altivec.h | 22 ++++++++ clang/test/CodeGen/builtins-ppc-p10vector.c | 60 +++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 4e25ec118072b..f42200f5bd4e7 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16933,6 +16933,28 @@ vec_cnttzm(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vctzdm(__a, __b); } +/* vec_mod */ + +static __inline__ vector signed int __ATTRS_o_ai +vec_mod(vector signed int __a, vector signed int __b) { + return __a % __b; +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_mod(vector unsigned int __a, vector unsigned int __b) { + return __a % __b; +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_mod(vector signed long long __a, vector signed long long __b) { + return __a % __b; +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_mod(vector unsigned long long __a, vector unsigned long long __b) { + return __a % __b; +} + /* vec_sldbi */ #define vec_sldb(__a, __b, __c) __builtin_altivec_vsldbi(__a, __b, (__c & 0x7)) diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index e67018b062141..571d33d34a220 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -25,6 +25,66 @@ unsigned char uca; unsigned short usa; unsigned long long ulla; +vector signed long long test_vec_mul_sll(void) { + // CHECK: mul <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_mul(vslla, vsllb); +} + +vector unsigned long long test_vec_mul_ull(void) { + // CHECK: mul <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_mul(vulla, vullb); +} + +vector signed int test_vec_div_si(void) { + // CHECK: sdiv <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_div(vsia, vsib); +} + +vector unsigned int test_vec_div_ui(void) { + // CHECK: udiv <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_div(vuia, vuib); +} + +vector signed long long test_vec_div_sll(void) { + // CHECK: sdiv <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_div(vslla, vsllb); +} + +vector unsigned long long test_vec_div_ull(void) { + // CHECK: udiv <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_div(vulla, vullb); +} + +vector signed int test_vec_mod_si(void) { + // CHECK: srem <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_mod(vsia, vsib); +} + +vector unsigned int test_vec_mod_ui(void) { + // CHECK: urem <4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_mod(vuia, vuib); +} + +vector signed long long test_vec_mod_sll(void) { + // CHECK: srem <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_mod(vslla, vsllb); +} + +vector unsigned long long test_vec_mod_ull(void) { + // CHECK: urem <2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_mod(vulla, vullb); +} + vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> // CHECK-NEXT: ret <2 x i64> From df69492cdfa82ff6453c887cd45b3a5903b79afb Mon Sep 17 00:00:00 2001 From: Sameer Arora Date: Thu, 23 Jul 2020 16:26:42 -0700 Subject: [PATCH 024/600] [llvm-libtool-darwin] Refactor Slice and writeUniversalBinary Refactoring `Slice` class and function `createUniversalBinary` from `llvm-lipo` into MachOUniversalWriter. This refactoring is necessary so as to use the refactored code for creating universal binaries under llvm-libtool-darwin. Reviewed by alexshap, smeenai Differential Revision: https://reviews.llvm.org/D84662 --- .../llvm/Object/MachOUniversalWriter.h | 84 ++++++ llvm/lib/Object/CMakeLists.txt | 1 + llvm/lib/Object/MachOUniversalWriter.cpp | 220 ++++++++++++++ llvm/tools/llvm-lipo/llvm-lipo.cpp | 274 +++--------------- 4 files changed, 343 insertions(+), 236 deletions(-) create mode 100644 llvm/include/llvm/Object/MachOUniversalWriter.h create mode 100644 llvm/lib/Object/MachOUniversalWriter.cpp diff --git a/llvm/include/llvm/Object/MachOUniversalWriter.h b/llvm/include/llvm/Object/MachOUniversalWriter.h new file mode 100644 index 0000000000000..c860495ddd6f0 --- /dev/null +++ b/llvm/include/llvm/Object/MachOUniversalWriter.h @@ -0,0 +1,84 @@ +//===- MachOUniversalWriter.h - MachO universal binary writer----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declares the Slice class and writeUniversalBinary function for writing a +// MachO universal binary file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_MACHOUNIVERSALWRITER_H +#define LLVM_OBJECT_MACHOUNIVERSALWRITER_H + +#include "llvm/Object/Archive.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/MachO.h" + +namespace llvm { +namespace object { + +class Slice { + const Binary *B; + uint32_t CPUType; + uint32_t CPUSubType; + std::string ArchName; + + // P2Alignment field stores slice alignment values from universal + // binaries. This is also needed to order the slices so the total + // file size can be calculated before creating the output buffer. + uint32_t P2Alignment; + +public: + explicit Slice(const MachOObjectFile &O); + + Slice(const MachOObjectFile &O, uint32_t Align); + + static Expected create(const Archive *A); + + void setP2Alignment(uint32_t Align) { P2Alignment = Align; } + + const Binary *getBinary() const { return B; } + + uint32_t getCPUType() const { return CPUType; } + + uint32_t getCPUSubType() const { return CPUSubType; } + + uint32_t getP2Alignment() const { return P2Alignment; } + + uint64_t getCPUID() const { + return static_cast(CPUType) << 32 | CPUSubType; + } + + std::string getArchString() const { + if (!ArchName.empty()) + return ArchName; + return ("unknown(" + Twine(CPUType) + "," + + Twine(CPUSubType & ~MachO::CPU_SUBTYPE_MASK) + ")") + .str(); + } + + friend bool operator<(const Slice &Lhs, const Slice &Rhs) { + if (Lhs.CPUType == Rhs.CPUType) + return Lhs.CPUSubType < Rhs.CPUSubType; + // force arm64-family to follow after all other slices for + // compatibility with cctools lipo + if (Lhs.CPUType == MachO::CPU_TYPE_ARM64) + return false; + if (Rhs.CPUType == MachO::CPU_TYPE_ARM64) + return true; + // Sort by alignment to minimize file size + return Lhs.P2Alignment < Rhs.P2Alignment; + } +}; + +Error writeUniversalBinary(ArrayRef Slices, StringRef OutputFileName); + +} // end namespace object + +} // end namespace llvm + +#endif // LLVM_OBJECT_MACHOUNIVERSALWRITER_H diff --git a/llvm/lib/Object/CMakeLists.txt b/llvm/lib/Object/CMakeLists.txt index 61888cbe46f73..9f912ccdff1f1 100644 --- a/llvm/lib/Object/CMakeLists.txt +++ b/llvm/lib/Object/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_component_library(LLVMObject SymbolSize.cpp TapiFile.cpp TapiUniversal.cpp + MachOUniversalWriter.cpp WasmObjectFile.cpp WindowsMachineFlag.cpp WindowsResource.cpp diff --git a/llvm/lib/Object/MachOUniversalWriter.cpp b/llvm/lib/Object/MachOUniversalWriter.cpp new file mode 100644 index 0000000000000..169d64430284e --- /dev/null +++ b/llvm/lib/Object/MachOUniversalWriter.cpp @@ -0,0 +1,220 @@ +//===- MachOUniversalWriter.cpp - MachO universal binary writer---*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the Slice class and writeUniversalBinary function for writing a MachO +// universal binary file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/MachOUniversalWriter.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Support/FileOutputBuffer.h" + +using namespace llvm; +using namespace object; + +// For compatibility with cctools lipo, a file's alignment is calculated as the +// minimum aligment of all segments. For object files, the file's alignment is +// the maximum alignment of its sections. +static uint32_t calculateFileAlignment(const MachOObjectFile &O) { + uint32_t P2CurrentAlignment; + uint32_t P2MinAlignment = MachOUniversalBinary::MaxSectionAlignment; + const bool Is64Bit = O.is64Bit(); + + for (const auto &LC : O.load_commands()) { + if (LC.C.cmd != (Is64Bit ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT)) + continue; + if (O.getHeader().filetype == MachO::MH_OBJECT) { + unsigned NumberOfSections = + (Is64Bit ? O.getSegment64LoadCommand(LC).nsects + : O.getSegmentLoadCommand(LC).nsects); + P2CurrentAlignment = NumberOfSections ? 2 : P2MinAlignment; + for (unsigned SI = 0; SI < NumberOfSections; ++SI) { + P2CurrentAlignment = std::max(P2CurrentAlignment, + (Is64Bit ? O.getSection64(LC, SI).align + : O.getSection(LC, SI).align)); + } + } else { + P2CurrentAlignment = + countTrailingZeros(Is64Bit ? O.getSegment64LoadCommand(LC).vmaddr + : O.getSegmentLoadCommand(LC).vmaddr); + } + P2MinAlignment = std::min(P2MinAlignment, P2CurrentAlignment); + } + // return a value >= 4 byte aligned, and less than MachO MaxSectionAlignment + return std::max( + static_cast(2), + std::min(P2MinAlignment, static_cast( + MachOUniversalBinary::MaxSectionAlignment))); +} + +static uint32_t calculateAlignment(const MachOObjectFile &ObjectFile) { + switch (ObjectFile.getHeader().cputype) { + case MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC64: + return 12; // log2 value of page size(4k) for x86 and PPC + case MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64_32: + return 14; // log2 value of page size(16k) for Darwin ARM + default: + return calculateFileAlignment(ObjectFile); + } +} + +Slice::Slice(const MachOObjectFile &O, uint32_t Align) + : B(&O), CPUType(O.getHeader().cputype), + CPUSubType(O.getHeader().cpusubtype), + ArchName(std::string(O.getArchTriple().getArchName())), + P2Alignment(Align) {} + +Slice::Slice(const MachOObjectFile &O) : Slice(O, calculateAlignment(O)) {} + +Expected Slice::create(const Archive *A) { + Error Err = Error::success(); + std::unique_ptr FO = nullptr; + for (const Archive::Child &Child : A->children(Err)) { + Expected> ChildOrErr = Child.getAsBinary(); + if (!ChildOrErr) + return createFileError(A->getFileName(), ChildOrErr.takeError()); + Binary *Bin = ChildOrErr.get().get(); + if (Bin->isMachOUniversalBinary()) + return createStringError(std::errc::invalid_argument, + ("archive member " + Bin->getFileName() + + " is a fat file (not allowed in an archive)") + .str() + .c_str()); + if (!Bin->isMachO()) + return createStringError( + std::errc::invalid_argument, + ("archive member " + Bin->getFileName() + + " is not a MachO file (not allowed in an archive)") + .str() + .c_str()); + MachOObjectFile *O = cast(Bin); + if (FO && std::tie(FO->getHeader().cputype, FO->getHeader().cpusubtype) != + std::tie(O->getHeader().cputype, O->getHeader().cpusubtype)) { + return createStringError( + std::errc::invalid_argument, + ("archive member " + O->getFileName() + " cputype (" + + Twine(O->getHeader().cputype) + ") and cpusubtype(" + + Twine(O->getHeader().cpusubtype) + + ") does not match previous archive members cputype (" + + Twine(FO->getHeader().cputype) + ") and cpusubtype(" + + Twine(FO->getHeader().cpusubtype) + ") (all members must match) " + + FO->getFileName()) + .str() + .c_str()); + } + if (!FO) { + ChildOrErr.get().release(); + FO.reset(O); + } + } + if (Err) + return createFileError(A->getFileName(), std::move(Err)); + if (!FO) + return createStringError( + std::errc::invalid_argument, + ("empty archive with no architecture specification: " + + A->getFileName() + " (can't determine architecture for it)") + .str() + .c_str()); + + Slice ArchiveSlice = Slice(*(FO.get()), FO->is64Bit() ? 3 : 2); + ArchiveSlice.B = A; + return ArchiveSlice; +} + +static Expected> +buildFatArchList(ArrayRef Slices) { + SmallVector FatArchList; + uint64_t Offset = + sizeof(MachO::fat_header) + Slices.size() * sizeof(MachO::fat_arch); + + for (const auto &S : Slices) { + Offset = alignTo(Offset, 1ull << S.getP2Alignment()); + if (Offset > UINT32_MAX) + return createStringError( + std::errc::invalid_argument, + ("fat file too large to be created because the offset " + "field in struct fat_arch is only 32-bits and the offset " + + Twine(Offset) + " for " + S.getBinary()->getFileName() + + " for architecture " + S.getArchString() + "exceeds that.") + .str() + .c_str()); + + MachO::fat_arch FatArch; + FatArch.cputype = S.getCPUType(); + FatArch.cpusubtype = S.getCPUSubType(); + FatArch.offset = Offset; + FatArch.size = S.getBinary()->getMemoryBufferRef().getBufferSize(); + FatArch.align = S.getP2Alignment(); + Offset += FatArch.size; + FatArchList.push_back(FatArch); + } + return FatArchList; +} + +Error object::writeUniversalBinary(ArrayRef Slices, + StringRef OutputFileName) { + MachO::fat_header FatHeader; + FatHeader.magic = MachO::FAT_MAGIC; + FatHeader.nfat_arch = Slices.size(); + + Expected> FatArchListOrErr = + buildFatArchList(Slices); + if (!FatArchListOrErr) + return FatArchListOrErr.takeError(); + SmallVector FatArchList = *FatArchListOrErr; + + const bool IsExecutable = any_of(Slices, [](Slice S) { + return sys::fs::can_execute(S.getBinary()->getFileName()); + }); + const uint64_t OutputFileSize = + static_cast(FatArchList.back().offset) + + FatArchList.back().size; + Expected> OutFileOrError = + FileOutputBuffer::create(OutputFileName, OutputFileSize, + IsExecutable ? FileOutputBuffer::F_executable + : 0); + if (!OutFileOrError) + return createFileError(OutputFileName, OutFileOrError.takeError()); + std::unique_ptr OutFile = std::move(OutFileOrError.get()); + std::memset(OutFile->getBufferStart(), 0, OutputFileSize); + + if (sys::IsLittleEndianHost) + MachO::swapStruct(FatHeader); + std::memcpy(OutFile->getBufferStart(), &FatHeader, sizeof(MachO::fat_header)); + + for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) { + MemoryBufferRef BufferRef = Slices[Index].getBinary()->getMemoryBufferRef(); + std::copy(BufferRef.getBufferStart(), BufferRef.getBufferEnd(), + OutFile->getBufferStart() + FatArchList[Index].offset); + } + + // FatArchs written after Slices in order to reduce the number of swaps for + // the LittleEndian case + if (sys::IsLittleEndianHost) + for (MachO::fat_arch &FA : FatArchList) + MachO::swapStruct(FA); + std::memcpy(OutFile->getBufferStart() + sizeof(MachO::fat_header), + FatArchList.begin(), + sizeof(MachO::fat_arch) * FatArchList.size()); + + if (Error E = OutFile->commit()) + return createFileError(OutputFileName, std::move(E)); + + return Error::success(); +} diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp index 8c2740d8c94d1..f6d2202579024 100644 --- a/llvm/tools/llvm-lipo/llvm-lipo.cpp +++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp @@ -15,6 +15,7 @@ #include "llvm/Object/Binary.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/MachOUniversalWriter.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" @@ -36,6 +37,15 @@ LLVM_ATTRIBUTE_NORETURN static void reportError(Twine Message) { exit(EXIT_FAILURE); } +LLVM_ATTRIBUTE_NORETURN static void reportError(Error E) { + assert(E); + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(std::move(E), OS); + OS.flush(); + reportError(Buf); +} + LLVM_ATTRIBUTE_NORETURN static void reportError(StringRef File, Error E) { assert(E); std::string Buf; @@ -103,159 +113,13 @@ struct Config { LipoAction ActionToPerform; }; -// For compatibility with cctools lipo, a file's alignment is calculated as the -// minimum aligment of all segments. For object files, the file's alignment is -// the maximum alignment of its sections. -static uint32_t calculateFileAlignment(const MachOObjectFile &O) { - uint32_t P2CurrentAlignment; - uint32_t P2MinAlignment = MachOUniversalBinary::MaxSectionAlignment; - const bool Is64Bit = O.is64Bit(); - - for (const auto &LC : O.load_commands()) { - if (LC.C.cmd != (Is64Bit ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT)) - continue; - if (O.getHeader().filetype == MachO::MH_OBJECT) { - unsigned NumberOfSections = - (Is64Bit ? O.getSegment64LoadCommand(LC).nsects - : O.getSegmentLoadCommand(LC).nsects); - P2CurrentAlignment = NumberOfSections ? 2 : P2MinAlignment; - for (unsigned SI = 0; SI < NumberOfSections; ++SI) { - P2CurrentAlignment = std::max(P2CurrentAlignment, - (Is64Bit ? O.getSection64(LC, SI).align - : O.getSection(LC, SI).align)); - } - } else { - P2CurrentAlignment = - countTrailingZeros(Is64Bit ? O.getSegment64LoadCommand(LC).vmaddr - : O.getSegmentLoadCommand(LC).vmaddr); - } - P2MinAlignment = std::min(P2MinAlignment, P2CurrentAlignment); - } - // return a value >= 4 byte aligned, and less than MachO MaxSectionAlignment - return std::max( - static_cast(2), - std::min(P2MinAlignment, static_cast( - MachOUniversalBinary::MaxSectionAlignment))); -} - -static uint32_t calculateAlignment(const MachOObjectFile *ObjectFile) { - switch (ObjectFile->getHeader().cputype) { - case MachO::CPU_TYPE_I386: - case MachO::CPU_TYPE_X86_64: - case MachO::CPU_TYPE_POWERPC: - case MachO::CPU_TYPE_POWERPC64: - return 12; // log2 value of page size(4k) for x86 and PPC - case MachO::CPU_TYPE_ARM: - case MachO::CPU_TYPE_ARM64: - case MachO::CPU_TYPE_ARM64_32: - return 14; // log2 value of page size(16k) for Darwin ARM - default: - return calculateFileAlignment(*ObjectFile); - } +static Slice archiveSlice(const Archive *A, StringRef File) { + Expected ArchiveOrSlice = Slice::create(A); + if (!ArchiveOrSlice) + reportError(File, ArchiveOrSlice.takeError()); + return *ArchiveOrSlice; } -class Slice { - const Binary *B; - uint32_t CPUType; - uint32_t CPUSubType; - std::string ArchName; - - // P2Alignment field stores slice alignment values from universal - // binaries. This is also needed to order the slices so the total - // file size can be calculated before creating the output buffer. - uint32_t P2Alignment; - -public: - Slice(const MachOObjectFile *O, uint32_t Align) - : B(O), CPUType(O->getHeader().cputype), - CPUSubType(O->getHeader().cpusubtype), - ArchName(std::string(O->getArchTriple().getArchName())), - P2Alignment(Align) {} - - explicit Slice(const MachOObjectFile *O) : Slice(O, calculateAlignment(O)){}; - - explicit Slice(const Archive *A) : B(A) { - Error Err = Error::success(); - std::unique_ptr FO = nullptr; - for (const Archive::Child &Child : A->children(Err)) { - Expected> ChildOrErr = Child.getAsBinary(); - if (!ChildOrErr) - reportError(A->getFileName(), ChildOrErr.takeError()); - Binary *Bin = ChildOrErr.get().get(); - if (Bin->isMachOUniversalBinary()) - reportError(("archive member " + Bin->getFileName() + - " is a fat file (not allowed in an archive)") - .str()); - if (!Bin->isMachO()) - reportError(("archive member " + Bin->getFileName() + - " is not a MachO file (not allowed in an archive)")); - MachOObjectFile *O = cast(Bin); - if (FO && - std::tie(FO->getHeader().cputype, FO->getHeader().cpusubtype) != - std::tie(O->getHeader().cputype, O->getHeader().cpusubtype)) { - reportError(("archive member " + O->getFileName() + " cputype (" + - Twine(O->getHeader().cputype) + ") and cpusubtype(" + - Twine(O->getHeader().cpusubtype) + - ") does not match previous archive members cputype (" + - Twine(FO->getHeader().cputype) + ") and cpusubtype(" + - Twine(FO->getHeader().cpusubtype) + - ") (all members must match) " + FO->getFileName()) - .str()); - } - if (!FO) { - ChildOrErr.get().release(); - FO.reset(O); - } - } - if (Err) - reportError(A->getFileName(), std::move(Err)); - if (!FO) - reportError(("empty archive with no architecture specification: " + - A->getFileName() + " (can't determine architecture for it)") - .str()); - CPUType = FO->getHeader().cputype; - CPUSubType = FO->getHeader().cpusubtype; - ArchName = std::string(FO->getArchTriple().getArchName()); - // Replicate the behavior of cctools lipo. - P2Alignment = FO->is64Bit() ? 3 : 2; - } - - void setP2Alignment(uint32_t Align) { P2Alignment = Align; } - - const Binary *getBinary() const { return B; } - - uint32_t getCPUType() const { return CPUType; } - - uint32_t getCPUSubType() const { return CPUSubType; } - - uint32_t getP2Alignment() const { return P2Alignment; } - - uint64_t getCPUID() const { - return static_cast(CPUType) << 32 | CPUSubType; - } - - std::string getArchString() const { - if (!ArchName.empty()) - return ArchName; - return ("unknown(" + Twine(CPUType) + "," + - Twine(CPUSubType & ~MachO::CPU_SUBTYPE_MASK) + ")") - .str(); - } - - friend bool operator<(const Slice &Lhs, const Slice &Rhs) { - if (Lhs.CPUType == Rhs.CPUType) - return Lhs.CPUSubType < Rhs.CPUSubType; - // force arm64-family to follow after all other slices for - // compatibility with cctools lipo - if (Lhs.CPUType == MachO::CPU_TYPE_ARM64) - return false; - if (Rhs.CPUType == MachO::CPU_TYPE_ARM64) - return true; - // Sort by alignment to minimize file size - return Lhs.P2Alignment < Rhs.P2Alignment; - } -}; - } // end namespace static void validateArchitectureName(StringRef ArchitectureName) { @@ -450,8 +314,8 @@ readInputBinaries(ArrayRef InputFiles) { if (!B->isArchive() && !B->isMachO() && !B->isMachOUniversalBinary()) reportError("File " + IF.FileName + " has unsupported binary format"); if (IF.ArchType && (B->isMachO() || B->isArchive())) { - const auto S = B->isMachO() ? Slice(cast(B)) - : Slice(cast(B)); + const auto S = B->isMachO() ? Slice(*cast(B)) + : archiveSlice(cast(B), IF.FileName); const auto SpecifiedCPUType = MachO::getCPUTypeFromArchitecture( MachO::getArchitectureFromName( Triple(*IF.ArchType).getArchName())) @@ -506,13 +370,15 @@ static void printBinaryArchs(const Binary *Binary, raw_ostream &OS) { Expected> MachOObjOrError = O.getAsObjectFile(); if (MachOObjOrError) { - OS << Slice(MachOObjOrError->get()).getArchString() << " "; + OS << Slice(*(MachOObjOrError->get())).getArchString() << " "; continue; } Expected> ArchiveOrError = O.getAsArchive(); if (ArchiveOrError) { consumeError(MachOObjOrError.takeError()); - OS << Slice(ArchiveOrError->get()).getArchString() << " "; + OS << archiveSlice(ArchiveOrError->get(), Binary->getFileName()) + .getArchString() + << " "; continue; } consumeError(ArchiveOrError.takeError()); @@ -521,7 +387,7 @@ static void printBinaryArchs(const Binary *Binary, raw_ostream &OS) { OS << "\n"; return; } - OS << Slice(cast(Binary)).getArchString() << " \n"; + OS << Slice(*cast(Binary)).getArchString() << " \n"; } LLVM_ATTRIBUTE_NORETURN @@ -646,12 +512,12 @@ static SmallVector buildSlices( if (!BinaryOrError) reportError(InputBinary->getFileName(), BinaryOrError.takeError()); ExtractedObjects.push_back(std::move(BinaryOrError.get())); - Slices.emplace_back(ExtractedObjects.back().get(), O.getAlign()); + Slices.emplace_back(*(ExtractedObjects.back().get()), O.getAlign()); } } else if (auto O = dyn_cast(InputBinary)) { - Slices.emplace_back(O); + Slices.emplace_back(*O); } else if (auto A = dyn_cast(InputBinary)) { - Slices.emplace_back(A); + Slices.push_back(archiveSlice(A, InputBinary->getFileName())); } else { llvm_unreachable("Unexpected binary format"); } @@ -660,79 +526,6 @@ static SmallVector buildSlices( return Slices; } -static SmallVector -buildFatArchList(ArrayRef Slices) { - SmallVector FatArchList; - uint64_t Offset = - sizeof(MachO::fat_header) + Slices.size() * sizeof(MachO::fat_arch); - - for (const auto &S : Slices) { - Offset = alignTo(Offset, 1ull << S.getP2Alignment()); - if (Offset > UINT32_MAX) - reportError("fat file too large to be created because the offset " - "field in struct fat_arch is only 32-bits and the offset " + - Twine(Offset) + " for " + S.getBinary()->getFileName() + - " for architecture " + S.getArchString() + "exceeds that."); - - MachO::fat_arch FatArch; - FatArch.cputype = S.getCPUType(); - FatArch.cpusubtype = S.getCPUSubType(); - FatArch.offset = Offset; - FatArch.size = S.getBinary()->getMemoryBufferRef().getBufferSize(); - FatArch.align = S.getP2Alignment(); - Offset += FatArch.size; - FatArchList.push_back(FatArch); - } - return FatArchList; -} - -static void createUniversalBinary(SmallVectorImpl &Slices, - StringRef OutputFileName) { - MachO::fat_header FatHeader; - FatHeader.magic = MachO::FAT_MAGIC; - FatHeader.nfat_arch = Slices.size(); - - stable_sort(Slices); - SmallVector FatArchList = buildFatArchList(Slices); - - const bool IsExecutable = any_of(Slices, [](Slice S) { - return sys::fs::can_execute(S.getBinary()->getFileName()); - }); - const uint64_t OutputFileSize = - static_cast(FatArchList.back().offset) + - FatArchList.back().size; - Expected> OutFileOrError = - FileOutputBuffer::create(OutputFileName, OutputFileSize, - IsExecutable ? FileOutputBuffer::F_executable - : 0); - if (!OutFileOrError) - reportError(OutputFileName, OutFileOrError.takeError()); - std::unique_ptr OutFile = std::move(OutFileOrError.get()); - std::memset(OutFile->getBufferStart(), 0, OutputFileSize); - - if (sys::IsLittleEndianHost) - MachO::swapStruct(FatHeader); - std::memcpy(OutFile->getBufferStart(), &FatHeader, sizeof(MachO::fat_header)); - - for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) { - MemoryBufferRef BufferRef = Slices[Index].getBinary()->getMemoryBufferRef(); - std::copy(BufferRef.getBufferStart(), BufferRef.getBufferEnd(), - OutFile->getBufferStart() + FatArchList[Index].offset); - } - - // FatArchs written after Slices in order to reduce the number of swaps for - // the LittleEndian case - if (sys::IsLittleEndianHost) - for (MachO::fat_arch &FA : FatArchList) - MachO::swapStruct(FA); - std::memcpy(OutFile->getBufferStart() + sizeof(MachO::fat_header), - FatArchList.begin(), - sizeof(MachO::fat_arch) * FatArchList.size()); - - if (Error E = OutFile->commit()) - reportError(OutputFileName, std::move(E)); -} - LLVM_ATTRIBUTE_NORETURN static void createUniversalBinary(ArrayRef> InputBinaries, const StringMap &Alignments, @@ -745,7 +538,10 @@ static void createUniversalBinary(ArrayRef> InputBinaries, buildSlices(InputBinaries, Alignments, ExtractedObjects); checkArchDuplicates(Slices); checkUnusedAlignments(Slices, Alignments); - createUniversalBinary(Slices, OutputFileName); + + llvm::stable_sort(Slices); + if (Error E = writeUniversalBinary(Slices, OutputFileName)) + reportError(std::move(E)); exit(EXIT_SUCCESS); } @@ -776,7 +572,10 @@ static void extractSlice(ArrayRef> InputBinaries, reportError( "fat input file " + InputBinaries.front().getBinary()->getFileName() + " does not contain the specified architecture " + ArchType); - createUniversalBinary(Slices, OutputFileName); + + llvm::stable_sort(Slices); + if (Error E = writeUniversalBinary(Slices, OutputFileName)) + reportError(std::move(E)); exit(EXIT_SUCCESS); } @@ -792,7 +591,7 @@ buildReplacementSlices(ArrayRef> ReplacementBinaries, if (!O) reportError("replacement file: " + ReplacementBinary->getFileName() + " is a fat file (must be a thin file)"); - Slice S(O); + Slice S(*O); auto Entry = Slices.try_emplace(S.getArchString(), S); if (!Entry.second) reportError("-replace " + S.getArchString() + @@ -843,7 +642,10 @@ static void replaceSlices(ArrayRef> InputBinaries, " does not contain that architecture"); checkUnusedAlignments(Slices, Alignments); - createUniversalBinary(Slices, OutputFileName); + + llvm::stable_sort(Slices); + if (Error E = writeUniversalBinary(Slices, OutputFileName)) + reportError(std::move(E)); exit(EXIT_SUCCESS); } From c75c7d51968d397e5521c8b6c1e906bde1245af6 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 31 Jul 2020 16:23:24 +0000 Subject: [PATCH 025/600] [gn build] Port df69492cdfa --- llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn index 4a3679900fe9f..42dde29a8f8a0 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn @@ -27,6 +27,7 @@ static_library("Object") { "IRSymtab.cpp", "MachOObjectFile.cpp", "MachOUniversal.cpp", + "MachOUniversalWriter.cpp", "Minidump.cpp", "ModuleSymbolTable.cpp", "Object.cpp", From d23c1d6a8dddf0e1b9b9fa64726941e402ede8af Mon Sep 17 00:00:00 2001 From: Hongtao Yu Date: Thu, 30 Jul 2020 18:22:50 -0700 Subject: [PATCH 026/600] [AutoFDO] Avoid merging inlinee samples multiple times A function call can be replicated by optimizations like loop unroll and jump threading and the replicates end up sharing the sample nested callee profile. Therefore when it comes to merging samples for uninlined callees in the sample profile inliner, a callee profile can be merged multiple times which will cause an assert to fire. This change avoids merging same callee profile for duplicate callsites by filtering out callee profiles with a non-zero head sample count. Reviewed By: wenlei, wmi Differential Revision: https://reviews.llvm.org/D84997 --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 30 ++++--- .../SampleProfile/inline-mergeprof-dup.ll | 80 +++++++++++++++++++ 2 files changed, 100 insertions(+), 10 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/inline-mergeprof-dup.ll diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index ca60d35b8aafb..5978f2dc6d1dd 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1106,16 +1106,26 @@ bool SampleProfileLoader::inlineHotFunctions( } if (ProfileMergeInlinee) { - // Use entry samples as head samples during the merge, as inlinees - // don't have head samples. - assert(FS->getHeadSamples() == 0 && "Expect 0 head sample for inlinee"); - const_cast(FS)->addHeadSamples(FS->getEntrySamples()); - - // Note that we have to do the merge right after processing function. - // This allows OutlineFS's profile to be used for annotation during - // top-down processing of functions' annotation. - FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee); - OutlineFS->merge(*FS); + // A function call can be replicated by optimizations like callsite + // splitting or jump threading and the replicates end up sharing the + // sample nested callee profile instead of slicing the original inlinee's + // profile. We want to do merge exactly once by filtering out callee + // profiles with a non-zero head sample count. + if (FS->getHeadSamples() == 0) { + // Use entry samples as head samples during the merge, as inlinees + // don't have head samples. + const_cast(FS)->addHeadSamples( + FS->getEntrySamples()); + + // Note that we have to do the merge right after processing function. + // This allows OutlineFS's profile to be used for annotation during + // top-down processing of functions' annotation. + FunctionSamples *OutlineFS = Reader->getOrCreateSamplesFor(*Callee); + OutlineFS->merge(*FS); + } else + assert(FS->getHeadSamples() == FS->getEntrySamples() && + "Expect same head and entry sample counts for profiles already " + "merged."); } else { auto pair = notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0}); diff --git a/llvm/test/Transforms/SampleProfile/inline-mergeprof-dup.ll b/llvm/test/Transforms/SampleProfile/inline-mergeprof-dup.ll new file mode 100644 index 0000000000000..8d1379a582291 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/inline-mergeprof-dup.ll @@ -0,0 +1,80 @@ +;; Test we merge non-inlined profile only once with '-sample-profile-merge-inlinee' +; RUN: opt < %s -passes='function(callsite-splitting),sample-profile' -sample-profile-file=%S/Inputs/inline-mergeprof.prof -sample-profile-merge-inlinee=true -S | FileCheck %s + +%struct.bitmap = type { i32, %struct.bitmap* } + +; CHECK-LABEL: @main +define void @main(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt) #0 !dbg !6 { +entry: + br label %Top + +Top: + %tobool1 = icmp eq %struct.bitmap* %a_elt, null + br i1 %tobool1, label %CallSiteBB, label %NextCond + +NextCond: + %cmp = icmp ne %struct.bitmap* %b_elt, null + br i1 %cmp, label %CallSiteBB, label %End + +CallSiteBB: + %p = phi i1 [0, %Top], [%c, %NextCond] +;; The call site is replicated by callsite-splitting pass and they end up share the same sample profile +; CHECK: call void @_Z3sumii(%struct.bitmap* null, %struct.bitmap* null, %struct.bitmap* %b_elt, i1 false) +; CHECK: call void @_Z3sumii(%struct.bitmap* nonnull %a_elt, %struct.bitmap* nonnull %a_elt, %struct.bitmap* nonnull %b_elt, i1 %c) + call void @_Z3sumii(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %p), !dbg !8 + br label %End + +End: + ret void +} + +define void @_Z3sumii(%struct.bitmap* %dst_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %c) #0 !dbg !12 { +entry: + %tobool = icmp ne %struct.bitmap* %a_elt, null + %tobool1 = icmp ne %struct.bitmap* %b_elt, null + %or.cond = and i1 %tobool, %tobool1, !dbg !13 + br i1 %or.cond, label %Cond, label %Big + +Cond: + %cmp = icmp eq %struct.bitmap* %dst_elt, %a_elt, !dbg !14 + br i1 %cmp, label %Small, label %Big, !dbg !15 + +Small: + br label %End + +Big: + br label %End + +End: + ret void +} + +attributes #0 = { "use-sample-profile" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "calls.cc", directory: ".") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 1, !"Debug Info Version", i32 3} +!5 = !{!"clang version 3.5 "} +!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 10, scope: !9) +!9 = !DILexicalBlockFile(scope: !10, file: !1, discriminator: 2) +!10 = distinct !DILexicalBlock(scope: !6, file: !1, line: 10) +!11 = !DILocation(line: 12, scope: !6) +!12 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!13 = !DILocation(line: 4, scope: !12) +!14 = !DILocation(line: 5, scope: !12) +!15 = !DILocation(line: 6, scope: !12) + + +;; Check the profile of funciton sum is only merged once though the original callsite is replicted. +; CHECK: name: "sum" +; CHECK-NEXT: {!"function_entry_count", i64 46} +; CHECK: !{!"branch_weights", i32 11, i32 37} +; CHECK: !{!"branch_weights", i32 11, i32 1} From dcdc77619e1af61181e7214d292b7e5b59946186 Mon Sep 17 00:00:00 2001 From: Hiroshi Yamauchi Date: Thu, 30 Jul 2020 11:22:25 -0700 Subject: [PATCH 027/600] [PGO][test] Add test to check memops changes function hash Following up D84782. Differential Revision: https://reviews.llvm.org/D84953 --- llvm/test/Transforms/PGOProfile/memop_hash.ll | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 llvm/test/Transforms/PGOProfile/memop_hash.ll diff --git a/llvm/test/Transforms/PGOProfile/memop_hash.ll b/llvm/test/Transforms/PGOProfile/memop_hash.ll new file mode 100644 index 0000000000000..aca3b0c4dbeb3 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/memop_hash.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) + +define i64 @foo1(i8* %a, i8* %b, i32 %s) { +entry: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 %s, i1 false); + ret i64 0 +} + +define i64 @foo2(i8* %a, i8* %b, i32 %s) { +entry: + ret i64 0 +} + +; The two hashes should not be equal as the existence of the memcpy should change the hash. +; +; CHECK: @foo1 +; CHECK: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_foo1, i32 0, i32 0), i64 [[FOO1_HASH:[0-9]+]], i32 1, i32 0) +; CHECK: @foo2 +; CHECK-NOT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_foo2, i32 0, i32 0), i64 [[FOO1_HASH]], i32 1, i32 0) From 86609b7af79fc9c71371ca255b271105f546c9c4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 31 Jul 2020 12:51:46 +0100 Subject: [PATCH 028/600] [X86][SSE] Cleanup bitwise reduction check prefixes. NFC Add AVX512BW/AVX512BWVL prefixes for a future patch --- llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll | 4 ++-- llvm/test/CodeGen/X86/vector-reduce-and.ll | 12 ++++++------ llvm/test/CodeGen/X86/vector-reduce-or.ll | 12 ++++++------ llvm/test/CodeGen/X86/vector-reduce-xor.ll | 12 ++++++------ 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll index 91668148a9ac4..0df3238e6a2dc 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll @@ -3,8 +3,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BWVL ; ; vXi64 diff --git a/llvm/test/CodeGen/X86/vector-reduce-and.ll b/llvm/test/CodeGen/X86/vector-reduce-and.ll index 088913ed69521..9545d29cee588 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-and.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BWVL ; ; vXi64 diff --git a/llvm/test/CodeGen/X86/vector-reduce-or.ll b/llvm/test/CodeGen/X86/vector-reduce-or.ll index 622f503ef6cbf..35193e9feadcd 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BWVL ; ; vXi64 diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor.ll b/llvm/test/CodeGen/X86/vector-reduce-xor.ll index 3c09e25341ad3..4fd84897445c4 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-xor.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BWVL ; ; vXi64 From 6c75db8b4bc59eace18143ce086419d37da24746 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 31 Jul 2020 10:04:04 -0700 Subject: [PATCH 029/600] Disable getauxval for Go We want the Go build to not use getauxval, as we must support glibc < 2.16 platforms. Reviewed By: dvyukov Differential Revision: https://reviews.llvm.org/D84859 --- compiler-rt/lib/sanitizer_common/sanitizer_getauxval.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_getauxval.h b/compiler-rt/lib/sanitizer_common/sanitizer_getauxval.h index 86ad3a5e2c2aa..38439e44f611e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_getauxval.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_getauxval.h @@ -21,8 +21,9 @@ #if SANITIZER_LINUX || SANITIZER_FUCHSIA -# if __GLIBC_PREREQ(2, 16) || (SANITIZER_ANDROID && __ANDROID_API__ >= 21) || \ - SANITIZER_FUCHSIA +# if (__GLIBC_PREREQ(2, 16) || (SANITIZER_ANDROID && __ANDROID_API__ >= 21) || \ + SANITIZER_FUCHSIA) && \ + !SANITIZER_GO # define SANITIZER_USE_GETAUXVAL 1 # else # define SANITIZER_USE_GETAUXVAL 0 From 2144a3bdbba40f0a78d30921088e2c8407ff9dd2 Mon Sep 17 00:00:00 2001 From: Aditya Nandakumar Date: Fri, 31 Jul 2020 09:41:06 -0700 Subject: [PATCH 030/600] [GISel] Add combiners for G_INTTOPTR and G_PTRTOINT https://reviews.llvm.org/D84909 Patch adds two new GICombinerRules, one for G_INTTOPTR and one for G_PTRTOINT. The G_INTTOPTR elides ptr2int(int2ptr(x)) to a copy of x, if the cast is within the same address space. The G_PTRTOINT elides int2ptr(ptr2int(x)) to a copy of x. Patch additionally adds new combiner tests for the AArch64 target to test these new combiner rules. Patch by mkitzan --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 8 +++++ .../include/llvm/Target/GlobalISel/Combine.td | 21 +++++++++++- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 33 +++++++++++++++++++ .../GlobalISel/combine-inttoptr-ptrtoint.mir | 33 +++++++++++++++++++ .../AArch64/GlobalISel/combine-ptrtoint.mir | 17 ++++++++++ 5 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-inttoptr-ptrtoint.mir create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-ptrtoint.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index e694e7ad2c834..3847112d51836 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -200,6 +200,14 @@ class CombinerHelper { bool applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal); bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount); + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. + bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); + bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); + + /// Transform PtrToInt(IntToPtr(x)) to x. + bool matchCombineP2IToI2P(MachineInstr &MI, Register &Reg); + bool applyCombineP2IToI2P(MachineInstr &MI, Register &Reg); + /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. bool matchAnyExplicitUseIsUndef(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 40ed6be089ac8..ba19a4635570a 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -257,6 +257,24 @@ def simplify_add_to_sub: GICombineRule < (apply [{ return Helper.applySimplifyAddToSub(*${root}, ${info});}]) >; +// Fold int2ptr(ptr2int(x)) -> x +def p2i_to_i2p_matchinfo: GIDefMatchData<"Register">; +def p2i_to_i2p: GICombineRule< + (defs root:$root, p2i_to_i2p_matchinfo:$info), + (match (wip_match_opcode G_INTTOPTR):$root, + [{ return Helper.matchCombineI2PToP2I(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineI2PToP2I(*${root}, ${info}); }]) +>; + +// Fold ptr2int(int2ptr(x)) -> x +def i2p_to_p2i_matchinfo: GIDefMatchData<"Register">; +def i2p_to_p2i: GICombineRule< + (defs root:$root, i2p_to_p2i_matchinfo:$info), + (match (wip_match_opcode G_PTRTOINT):$root, + [{ return Helper.matchCombineP2IToI2P(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineP2IToI2P(*${root}, ${info}); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -267,7 +285,8 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, binop_same_val, binop_left_to_zero, - binop_right_to_zero]>; + binop_right_to_zero, p2i_to_i2p, + i2p_to_p2i]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl]>; def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 422b71b28b6fe..a9ec1d4365a34 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1545,6 +1545,39 @@ bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI, return false; } +bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register SrcReg = MI.getOperand(1).getReg(); + return mi_match(SrcReg, MRI, + m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg)))); +} + +bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR"); + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstr(MI); + Builder.buildCopy(DstReg, Reg); + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); + Register SrcReg = MI.getOperand(1).getReg(); + return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg))); +} + +bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT"); + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInstr(MI); + Builder.buildZExtOrTrunc(DstReg, Reg); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) { return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) { return MO.isReg() && diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-inttoptr-ptrtoint.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-inttoptr-ptrtoint.mir new file mode 100644 index 0000000000000..d99b38900383b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-inttoptr-ptrtoint.mir @@ -0,0 +1,33 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +--- +name: test_combine_inttoptr_same_addressspace +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_combine_inttoptr_same_addressspace + ; CHECK: [[COPY:%[0-9]+]]:_(p64) = COPY $x0 + ; CHECK: $x1 = COPY [[COPY]](p64) + %0:_(p64) = COPY $x0 + %1:_(s64) = G_PTRTOINT %0 + %2:_(p64) = G_INTTOPTR %1 + $x1 = COPY %2 +... +--- +name: test_combine_inttoptr_diff_addressspace +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_combine_inttoptr_diff_addressspace + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) + ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p64) = G_INTTOPTR [[PTRTOINT]](s64) + ; CHECK: $x1 = COPY [[INTTOPTR]](p64) + %0:_(p0) = COPY $x0 + %1:_(s64) = G_PTRTOINT %0 + %2:_(p64) = G_INTTOPTR %1 + $x1 = COPY %2 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptrtoint.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptrtoint.mir new file mode 100644 index 0000000000000..8164b1bf872fc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptrtoint.mir @@ -0,0 +1,17 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +--- +name: test_combine_ptrtoint +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_combine_ptrtoint + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: $x1 = COPY [[COPY]](s64) + %0:_(s64) = COPY $x0 + %1:_(p64) = G_INTTOPTR %0 + %2:_(s64) = G_PTRTOINT %1 + $x1 = COPY %2 +... From 3b0d30ffd30a704d09a34031d8797e22b708caab Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 31 Jul 2020 18:16:01 +0100 Subject: [PATCH 031/600] [SCEVExpander] Name temporary instructions for LCSSA insertion (NFC). --- llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 1a10e580c68c0..d31bf6791075a 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1771,10 +1771,12 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { // instruction. Instruction *Tmp; if (Inst->getType()->isIntegerTy()) - Tmp = cast(Builder.CreateAdd(Inst, Inst)); + Tmp = + cast(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user")); else { assert(Inst->getType()->isPointerTy()); - Tmp = cast(Builder.CreateGEP(Inst, Builder.getInt32(1))); + Tmp = cast( + Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user")); } V = fixupLCSSAFormFor(Tmp, 0); From 93fd8dbdc250330b84eeca3387e895407663d750 Mon Sep 17 00:00:00 2001 From: Albion Fung Date: Fri, 31 Jul 2020 11:57:59 -0500 Subject: [PATCH 032/600] [PowerPC] Add Vector String Isolate instruction definitions and MC Tests This patch implements the instruction definition and MC tests for the vector string isolate instructions. Differential Revision: https://reviews.llvm.org/D84197 --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 41 +++++++++++++++++++ .../PowerPC/ppc64-encoding-ISA31.txt | 23 +++++++++++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s | 21 ++++++++++ 3 files changed, 85 insertions(+) diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index 8ff0d460f47fb..a92168789bbe0 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -59,6 +59,39 @@ class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr, string BaseName = ""; } +// VX-Form: [ PO VT R VB RC XO ] +class VXForm_VTB5_RC xo, bits<5> R, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I<4, OOL, IOL, asmstr, itin> { + bits<5> VT; + bits<5> VB; + bit RC = 0; + + let Pattern = pattern; + + let Inst{6-10} = VT; + let Inst{11-15} = R; + let Inst{16-20} = VB; + let Inst{21} = RC; + let Inst{22-31} = xo; +} + +// Multiclass definition to account for record and non-record form +// instructions of VXRForm. +multiclass VXForm_VTB5_RCr xo, bits<5> R, dag OOL, dag IOL, + string asmbase, string asmstr, + InstrItinClass itin, list pattern> { + let BaseName = asmbase in { + def NAME : VXForm_VTB5_RC, RecFormRel; + let Defs = [CR6] in + def _rec : VXForm_VTB5_RC, isRecordForm, RecFormRel; + } +} + class MLS_DForm_R_SI34_RTA5_MEM opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : PI<1, opcode, OOL, IOL, asmstr, itin> { @@ -822,6 +855,14 @@ let Predicates = [IsISA3_1] in { (int_ppc_altivec_vsrdbi v16i8:$VRA, v16i8:$VRB, i32:$SH))]>; + defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB), + "vstribr", "$vT, $vB", IIC_VecGeneral, []>; + defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$vT), (ins vrrc:$vB), + "vstribl", "$vT, $vB", IIC_VecGeneral, []>; + defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$vT), (ins vrrc:$vB), + "vstrihr", "$vT, $vB", IIC_VecGeneral, []>; + defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$vT), (ins vrrc:$vB), + "vstrihl", "$vT, $vB", IIC_VecGeneral, []>; def VINSW : VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB), "vinsw $vD, $rB, $UIM", IIC_VecGeneral, diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt index c8dae6a160a5b..569068ab21a7a 100644 --- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt @@ -459,3 +459,26 @@ # CHECK: xscvsqqp 8, 28 0xfd 0xb 0xe6 0x88 +# CHECK: vstribr 2, 2 +0x10 0x41 0x10 0x0d + +# CHECK: vstribl 2, 2 +0x10 0x40 0x10 0x0d + +# CHECK: vstrihr 2, 2 +0x10 0x43 0x10 0x0d + +# CHECK: vstrihl 2, 2 +0x10 0x42 0x10 0x0d + +# CHECK: vstribr. 2, 2 +0x10 0x41 0x14 0x0d + +# CHECK: vstribl. 2, 2 +0x10 0x40 0x14 0x0d + +# CHECK: vstrihr. 2, 2 +0x10 0x43 0x14 0x0d + +# CHECK: vstrihl. 2, 2 +0x10 0x42 0x14 0x0d diff --git a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s index bd1187f18ed8f..6968821a3c09b 100644 --- a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s +++ b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s @@ -585,3 +585,24 @@ # CHECK-BE: xscvsqqp 8, 28 # encoding: [0xfd,0x0b,0xe6,0x88] # CHECK-LE: xscvsqqp 8, 28 # encoding: [0x88,0xe6,0x0b,0xfd] xscvsqqp 8, 28 +# CHECK-BE: vstribr 2, 2 # encoding: [0x10,0x41,0x10,0x0d] +# CHECK-LE: vstribr 2, 2 # encoding: [0x0d,0x10,0x41,0x10] + vstribr 2, 2 +# CHECK-BE: vstribl 2, 2 # encoding: [0x10,0x40,0x10,0x0d] +# CHECK-LE: vstribl 2, 2 # encoding: [0x0d,0x10,0x40,0x10] + vstribl 2, 2 +# CHECK-BE: vstrihr 2, 2 # encoding: [0x10,0x43,0x10,0x0d] +# CHECK-LE: vstrihr 2, 2 # encoding: [0x0d,0x10,0x43,0x10] + vstrihr 2, 2 +# CHECK-BE: vstribr. 2, 2 # encoding: [0x10,0x41,0x14,0x0d] +# CHECK-LE: vstribr. 2, 2 # encoding: [0x0d,0x14,0x41,0x10] + vstribr. 2, 2 +# CHECK-BE: vstribl. 2, 2 # encoding: [0x10,0x40,0x14,0x0d] +# CHECK-LE: vstribl. 2, 2 # encoding: [0x0d,0x14,0x40,0x10] + vstribl. 2, 2 +# CHECK-BE: vstrihr. 2, 2 # encoding: [0x10,0x43,0x14,0x0d] +# CHECK-LE: vstrihr. 2, 2 # encoding: [0x0d,0x14,0x43,0x10] + vstrihr. 2, 2 +# CHECK-BE: vstrihl. 2, 2 # encoding: [0x10,0x42,0x14,0x0d] +# CHECK-LE: vstrihl. 2, 2 # encoding: [0x0d,0x14,0x42,0x10] + vstrihl. 2, 2 From c068e9c8c123e7f8c8f3feb57245a012ccd09ccf Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 31 Jul 2020 10:46:27 -0700 Subject: [PATCH 033/600] [Support][CommandLine] Delete unused llvm::cl::ParseEnvrironmentOptions The function was added in 2003. It is not used and can be emulated with ParseCommandLineOptions. --- llvm/docs/CommandLine.rst | 23 ---------------- llvm/include/llvm/Support/CommandLine.h | 7 ----- llvm/lib/Support/CommandLine.cpp | 30 -------------------- llvm/unittests/Support/CommandLineTest.cpp | 32 ---------------------- 4 files changed, 92 deletions(-) diff --git a/llvm/docs/CommandLine.rst b/llvm/docs/CommandLine.rst index ab2826d789f2f..431ebc0e67e67 100644 --- a/llvm/docs/CommandLine.rst +++ b/llvm/docs/CommandLine.rst @@ -1369,29 +1369,6 @@ The ``cl::ParseCommandLineOptions`` function requires two parameters (``argc`` and ``argv``), but may also take an optional third parameter which holds `additional extra text`_ to emit when the ``-help`` option is invoked. -.. _cl::ParseEnvironmentOptions: - -The ``cl::ParseEnvironmentOptions`` function -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The ``cl::ParseEnvironmentOptions`` function has mostly the same effects as -`cl::ParseCommandLineOptions`_, except that it is designed to take values for -options from an environment variable, for those cases in which reading the -command line is not convenient or desired. It fills in the values of all the -command line option variables just like `cl::ParseCommandLineOptions`_ does. - -It takes four parameters: the name of the program (since ``argv`` may not be -available, it can't just look in ``argv[0]``), the name of the environment -variable to examine, and the optional `additional extra text`_ to emit when the -``-help`` option is invoked. - -``cl::ParseEnvironmentOptions`` will break the environment variable's value up -into words and then process them using `cl::ParseCommandLineOptions`_. -**Note:** Currently ``cl::ParseEnvironmentOptions`` does not support quoting, so -an environment variable containing ``-option "foo bar"`` will be parsed as three -words, ``-option``, ``"foo``, and ``bar"``, which is different from what you -would get from the shell with the same input. - The ``cl::SetVersionPrinter`` function ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 466945e40a9ce..62e44aeefe9cf 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -71,13 +71,6 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, const char *EnvVar = nullptr, bool LongOptionsUseDoubleDash = false); -//===----------------------------------------------------------------------===// -// ParseEnvironmentOptions - Environment variable option processing alternate -// entry point. -// -void ParseEnvironmentOptions(const char *progName, const char *envvar, - const char *Overview = ""); - // Function pointer type for printing version information. using VersionPrinterTy = std::function; diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 12ef0d511b147..4fba6a9ada2c0 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1271,36 +1271,6 @@ bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver, /*MarkEOLs*/ false, /*RelativeNames*/ true); } -/// ParseEnvironmentOptions - An alternative entry point to the -/// CommandLine library, which allows you to read the program's name -/// from the caller (as PROGNAME) and its command-line arguments from -/// an environment variable (whose name is given in ENVVAR). -/// -void cl::ParseEnvironmentOptions(const char *progName, const char *envVar, - const char *Overview) { - // Check args. - assert(progName && "Program name not specified"); - assert(envVar && "Environment variable name missing"); - - // Get the environment variable they want us to parse options out of. - llvm::Optional envValue = sys::Process::GetEnv(StringRef(envVar)); - if (!envValue) - return; - - // Get program's "name", which we wouldn't know without the caller - // telling us. - SmallVector newArgv; - BumpPtrAllocator A; - StringSaver Saver(A); - newArgv.push_back(Saver.save(progName).data()); - - // Parse the value of the environment variable into a "command line" - // and hand it off to ParseCommandLineOptions(). - TokenizeGNUCommandLine(*envValue, Saver, newArgv); - int newArgc = static_cast(newArgv.size()); - ParseCommandLineOptions(newArgc, &newArgv[0], StringRef(Overview)); -} - bool cl::ParseCommandLineOptions(int argc, const char *const *argv, StringRef Overview, raw_ostream *Errs, const char *EnvVar, diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp index e8c2cef18e367..be8217b109627 100644 --- a/llvm/unittests/Support/CommandLineTest.cpp +++ b/llvm/unittests/Support/CommandLineTest.cpp @@ -45,8 +45,6 @@ class TempEnvVar { EXPECT_EQ(nullptr, old_value) << old_value; #if HAVE_SETENV setenv(name, value, true); -#else -# define SKIP_ENVIRONMENT_TESTS #endif } @@ -137,36 +135,6 @@ TEST(CommandLineTest, ModifyExisitingOption) { ASSERT_EQ(cl::Hidden, TestOption.getOptionHiddenFlag()) << "Failed to modify option's hidden flag."; } -#ifndef SKIP_ENVIRONMENT_TESTS - -const char test_env_var[] = "LLVM_TEST_COMMAND_LINE_FLAGS"; - -cl::opt EnvironmentTestOption("env-test-opt"); -TEST(CommandLineTest, ParseEnvironment) { - TempEnvVar TEV(test_env_var, "-env-test-opt=hello"); - EXPECT_EQ("", EnvironmentTestOption); - cl::ParseEnvironmentOptions("CommandLineTest", test_env_var); - EXPECT_EQ("hello", EnvironmentTestOption); -} - -// This test used to make valgrind complain -// ("Conditional jump or move depends on uninitialised value(s)") -// -// Warning: Do not run any tests after this one that try to gain access to -// registered command line options because this will likely result in a -// SEGFAULT. This can occur because the cl::opt in the test below is declared -// on the stack which will be destroyed after the test completes but the -// command line system will still hold a pointer to a deallocated cl::Option. -TEST(CommandLineTest, ParseEnvironmentToLocalVar) { - // Put cl::opt on stack to check for proper initialization of fields. - StackOption EnvironmentTestOptionLocal("env-test-opt-local"); - TempEnvVar TEV(test_env_var, "-env-test-opt-local=hello-local"); - EXPECT_EQ("", EnvironmentTestOptionLocal); - cl::ParseEnvironmentOptions("CommandLineTest", test_env_var); - EXPECT_EQ("hello-local", EnvironmentTestOptionLocal); -} - -#endif // SKIP_ENVIRONMENT_TESTS TEST(CommandLineTest, UseOptionCategory) { StackOption TestOption2("test-option", cl::cat(TestCategory)); From 1479cdfe4ff603e7b0140dab3ca08ff095473cbd Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 30 Jul 2020 13:49:49 -0700 Subject: [PATCH 034/600] [ThinLTO] Compile time improvement to propagateAttributes I found that propagateAttributes was ~23% of a thin link's run time (almost 4x higher than the second hottest function). The main reason is that it re-examines a global var each time it is referenced. This becomes unnecessary once it is marked both non read only and non write only. I added a set to avoid doing redundant work, which dropped the runtime of that thin link by almost 15%. I made a smaller efficiency improvement (no measurable impact) to skip all summaries for a VI if the first copy is dead. I added an assert to ensure that all copies are dead if any is. The code in computeDeadSymbols marks all summaries for a VI as live. There is one corner case where it was skipping marking an alias as live, that I fixed. However, since the code earlier marked all copies of a preserved GUID's VI as live, and each 'visit' marks all copies live, the only case where this could make a difference is summaries that were marked live when they were built initially, and that is only a few special compiler generated symbols and inline assembly symbols, so it likely is never provoked in practice. Differential Revision: https://reviews.llvm.org/D84985 --- llvm/lib/IR/ModuleSummaryIndex.cpp | 28 ++++++++++++++++++---- llvm/lib/Transforms/IPO/FunctionImport.cpp | 3 +-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index 91612eafada73..5346323ceabba 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -163,7 +163,9 @@ bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const { return false; } -static void propagateAttributesToRefs(GlobalValueSummary *S) { +static void +propagateAttributesToRefs(GlobalValueSummary *S, + DenseSet &MarkedNonReadWriteOnly) { // If reference is not readonly or writeonly then referenced summary is not // read/writeonly either. Note that: // - All references from GlobalVarSummary are conservatively considered as @@ -174,6 +176,11 @@ static void propagateAttributesToRefs(GlobalValueSummary *S) { // for them. for (auto &VI : S->refs()) { assert(VI.getAccessSpecifier() == 0 || isa(S)); + if (!VI.getAccessSpecifier()) { + if (!MarkedNonReadWriteOnly.insert(VI).second) + continue; + } else if (MarkedNonReadWriteOnly.find(VI) != MarkedNonReadWriteOnly.end()) + continue; for (auto &Ref : VI.getSummaryList()) // If references to alias is not read/writeonly then aliasee // is not read/writeonly @@ -216,11 +223,24 @@ void ModuleSummaryIndex::propagateAttributes( const DenseSet &GUIDPreservedSymbols) { if (!PropagateAttrs) return; + DenseSet MarkedNonReadWriteOnly; for (auto &P : *this) for (auto &S : P.second.SummaryList) { - if (!isGlobalValueLive(S.get())) + if (!isGlobalValueLive(S.get())) { + // computeDeadSymbols should have marked all copies live. Note that + // it is possible that there is a GUID collision between internal + // symbols with the same name in different files of the same name but + // not enough distinguishing path. Because computeDeadSymbols should + // conservatively mark all copies live we can assert here that all are + // dead if any copy is dead. + assert(llvm::none_of( + P.second.SummaryList, + [&](const std::unique_ptr &Summary) { + return isGlobalValueLive(Summary.get()); + })); // We don't examine references from dead objects - continue; + break; + } // Global variable can't be marked read/writeonly if it is not eligible // to import since we need to ensure that all external references get @@ -240,7 +260,7 @@ void ModuleSummaryIndex::propagateAttributes( GVS->setReadOnly(false); GVS->setWriteOnly(false); } - propagateAttributesToRefs(S.get()); + propagateAttributesToRefs(S.get(), MarkedNonReadWriteOnly); } setWithAttributePropagation(); if (llvm::AreStatisticsEnabled()) diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 468bf19f2e48a..e02f8d62da7a0 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -884,6 +884,7 @@ void llvm::computeDeadSymbols( while (!Worklist.empty()) { auto VI = Worklist.pop_back_val(); for (auto &Summary : VI.getSummaryList()) { + Summary->setLive(true); if (auto *AS = dyn_cast(Summary.get())) { // If this is an alias, visit the aliasee VI to ensure that all copies // are marked live and it is added to the worklist for further @@ -891,8 +892,6 @@ void llvm::computeDeadSymbols( visit(AS->getAliaseeVI(), true); continue; } - - Summary->setLive(true); for (auto Ref : Summary->refs()) visit(Ref, false); if (auto *FS = dyn_cast(Summary.get())) From 162e9f72ac57342a0ea12036576d6212720cc93f Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 31 Jul 2020 10:52:49 -0700 Subject: [PATCH 035/600] [lldb/Test] Reduce code duplication by importing subprocess globally Import the subprocess module once instead of doing it inline which is error prone and leads to needless code duplication. --- lldb/test/API/lit.cfg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index a211abe6cc41c..e083e2fd9bebb 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -6,6 +6,7 @@ import platform import shlex import shutil +import subprocess import lit.formats @@ -33,7 +34,6 @@ def mkdir_p(path): def find_sanitizer_runtime(name): - import subprocess resource_dir = subprocess.check_output( [config.cmake_cxx_compiler, '-print-resource-dir']).decode('utf-8').strip() @@ -60,7 +60,6 @@ def find_python_interpreter(): return copied_python # Find the "real" python binary. - import shutil, subprocess real_python = subprocess.check_output([ config.python_executable, os.path.join(os.path.dirname(os.path.realpath(__file__)), From 49660234db94faf54c2d0ffa150841b9b52671c9 Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Fri, 31 Jul 2020 01:02:04 +0530 Subject: [PATCH 036/600] [Flang] Checks for constraint C7110-C7115. Added more tests. Annotate sources and tests. Improve error message. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D85014 --- flang/lib/Semantics/expression.cpp | 21 ++++--- flang/test/Semantics/array-constr-values.f90 | 60 ++++++++++++++++++++ 2 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 flang/test/Semantics/array-constr-values.f90 diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index e78c2d20edcf7..3ec2af5d70fa8 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -1202,6 +1202,7 @@ class ArrayConstructorContext { bool explicitType_{type_.has_value()}; std::optional constantLength_; ArrayConstructorValues values_; + bool messageDisplayedOnce{false}; }; void ArrayConstructorContext::Push(MaybeExpr &&x) { @@ -1252,17 +1253,21 @@ void ArrayConstructorContext::Push(MaybeExpr &&x) { } } } else { - exprAnalyzer_.Say( - "Values in array constructor must have the same declared type " - "when no explicit type appears"_err_en_US); + if (!messageDisplayedOnce) { + exprAnalyzer_.Say( + "Values in array constructor must have the same declared type " + "when no explicit type appears"_err_en_US); // C7110 + messageDisplayedOnce = true; + } } } else { if (auto cast{ConvertToType(*type_, std::move(*x))}) { values_.Push(std::move(*cast)); } else { exprAnalyzer_.Say( - "Value in array constructor could not be converted to the type " - "of the array"_err_en_US); + "Value in array constructor of type '%s' could not " + "be converted to the type of the array '%s'"_err_en_US, + x->GetType()->AsFortran(), type_->AsFortran()); // C7111, C7112 } } } @@ -1304,7 +1309,7 @@ void ArrayConstructorContext::Add(const parser::AcValue &x) { if (exprType->IsUnlimitedPolymorphic()) { exprAnalyzer_.Say( "Cannot have an unlimited polymorphic value in an " - "array constructor"_err_en_US); + "array constructor"_err_en_US); // C7113 } } Push(std::move(*v)); @@ -1346,7 +1351,7 @@ void ArrayConstructorContext::Add(const parser::AcValue &x) { } else { exprAnalyzer_.SayAt(name, "Implied DO index is active in surrounding implied DO loop " - "and may not have the same name"_err_en_US); + "and may not have the same name"_err_en_US); // C7115 } }, }, @@ -1386,7 +1391,7 @@ MaybeExpr ExpressionAnalyzer::Analyze( "ABSTRACT derived type '%s' may not be used in a " "structure constructor"_err_en_US, typeName), - typeSymbol); + typeSymbol); // C7114 } // This iterator traverses all of the components in the derived type and its diff --git a/flang/test/Semantics/array-constr-values.f90 b/flang/test/Semantics/array-constr-values.f90 new file mode 100644 index 0000000000000..30739f8c095bc --- /dev/null +++ b/flang/test/Semantics/array-constr-values.f90 @@ -0,0 +1,60 @@ +! RUN: %S/test_errors.sh %s %t %f18 +! Confirm enforcement of constraints and restrictions in 7.8 +! C7110, C7111, C7112, C7113, C7114, C7115 + +subroutine arrayconstructorvalues() + integer :: intarray(5) + integer(KIND=8) :: k8 = 20 + + TYPE EMPLOYEE + INTEGER AGE + CHARACTER (LEN = 30) NAME + END TYPE EMPLOYEE + TYPE EMPLOYEER + CHARACTER (LEN = 30) NAME + END TYPE EMPLOYEER + + TYPE(EMPLOYEE) :: emparray(3) + class(*), pointer :: unlim_polymorphic + TYPE, ABSTRACT :: base_type + INTEGER :: CARPRIZE + END TYPE + ! Different declared type + !ERROR: Values in array constructor must have the same declared type when no explicit type appears + intarray = (/ 1, 2, 3, 4., 5/) ! C7110 + ! Different kind type parameter + !ERROR: Values in array constructor must have the same declared type when no explicit type appears + intarray = (/ 1,2,3,4, k8 /) ! C7110 + + ! C7111 + !ERROR: Value in array constructor of type 'LOGICAL(4)' could not be converted to the type of the array 'INTEGER(4)' + intarray = [integer:: .true., 2, 3, 4, 5] + !ERROR: Value in array constructor of type 'CHARACTER(1)' could not be converted to the type of the array 'INTEGER(4)' + intarray = [integer:: "RAM stores information", 2, 3, 4, 5] + !ERROR: Value in array constructor of type 'employee' could not be converted to the type of the array 'INTEGER(4)' + intarray = [integer:: EMPLOYEE (19, "Jack"), 2, 3, 4, 5] + + ! C7112 + !ERROR: Value in array constructor of type 'INTEGER(4)' could not be converted to the type of the array 'employee' + emparray = (/ EMPLOYEE:: EMPLOYEE(19, "Ganesh"), EMPLOYEE(22, "Omkar"), 19 /) + !ERROR: Value in array constructor of type 'employeer' could not be converted to the type of the array 'employee' + emparray = (/ EMPLOYEE:: EMPLOYEE(19, "Ganesh"), EMPLOYEE(22, "Ram"),EMPLOYEER("ShriniwasPvtLtd") /) + + ! C7113 + !ERROR: Cannot have an unlimited polymorphic value in an array constructor + !ERROR: Values in array constructor must have the same declared type when no explicit type appears + intarray = (/ unlim_polymorphic, 2, 3, 4, 5/) + + ! C7114 + !ERROR: No intrinsic or user-defined ASSIGNMENT(=) matches operand types INTEGER(4) and TYPE(base_type) + !ERROR: ABSTRACT derived type 'base_type' may not be used in a structure constructor + !ERROR: Values in array constructor must have the same declared type when no explicit type appears + intarray = (/ base_type(10), 2, 3, 4, 5 /) +end subroutine arrayconstructorvalues +subroutine checkC7115() + real, dimension(10), parameter :: good1 = [(99.9, i = 1, 10)] + real, dimension(100), parameter :: good2 = [((88.8, i = 1, 10), j = 1, 10)] + !ERROR: Implied DO index is active in surrounding implied DO loop and may not have the same name + !ERROR: 'i' is already declared in this scoping unit + real, dimension(100), parameter :: bad = [((88.8, i = 1, 10), i = 1, 10)] +end subroutine checkC7115 From 0e0aebc5273c6d676584ecb50ed49e888029c2de Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Jul 2020 10:43:00 -0700 Subject: [PATCH 037/600] [ValueTracking] Add ComputeNumSignBits support for llvm.abs intrinsic If absolute value needs turn a negative number into a positive number it reduces the number of sign bits by at most 1. Differential Revision: https://reviews.llvm.org/D84971 --- llvm/lib/Analysis/ValueTracking.cpp | 13 +++++++++++++ llvm/test/Transforms/InstCombine/abs-intrinsic.ll | 15 +++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 52132072a7d1e..4cd2d07bf4b0c 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3001,6 +3001,19 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, "Failed to determine minimum sign bits"); return Tmp; } + case Instruction::Call: { + if (const auto *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::abs: + Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (Tmp == 1) break; + + // Absolute value reduces number of sign bits by at most 1. + return Tmp - 1; + } + } + } } } diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index 780f28238bf60..8b965b3d1e440 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -51,3 +51,18 @@ define i32 @abs_trailing_zeros_negative(i32 %x) { %and2 = and i32 %abs, -4 ret i32 %and2 } + +; Make sure we infer this add doesn't overflow. The input to the abs has 3 +; sign bits, the abs reduces this to 2 sign bits. +define i32 @abs_signbits(i30 %x) { +; CHECK-LABEL: @abs_signbits( +; CHECK-NEXT: [[AND:%.*]] = sext i30 [[X:%.*]] to i32 +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[AND]], i1 false) +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[ABS]], 1 +; CHECK-NEXT: ret i32 [[ADD]] +; + %ext = sext i30 %x to i32 + %abs = call i32 @llvm.abs.i32(i32 %ext, i1 false) + %add = add i32 %abs, 1 + ret i32 %add +} From 18eba165e7ba80328a910cad3407599d8ff60f4f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 31 Jul 2020 13:00:06 -0500 Subject: [PATCH 038/600] [OpenMP][docs] Update loop tiling status. --- clang/docs/OpenMPSupport.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 28fbd7baebb24..af5e538b1435a 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -266,7 +266,7 @@ want to help with the implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | misc extension | default(firstprivate) & default(private) | :part:`partial` | firstprivate done: D75591 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| loop extension | Loop tiling transformation | :part:`claimed` | | +| loop extension | Loop tiling transformation | :part:`worked on` | D76342 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | device extension | 'present' map type modifier | :part:`mostly done` | D83061, D83062, D84422 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ From 46591b95362325d262ca29ce13e7b5ddda624bc8 Mon Sep 17 00:00:00 2001 From: kristina Date: Fri, 31 Jul 2020 18:44:02 +0100 Subject: [PATCH 039/600] [libunwind] Add -Wno-suggest-override to CMakeLists.txt. Set -Wno-suggest-override where such warning is provided by the compiler when building libunwind, alongside libcxx and libcxxabi, using recent Clang. This extends behavior introduced in 77e0e9e17daf0865620abcd41f692ab0642367c4 to libunwind, avoiding a large amount of warnings during builds. See D84126 for the original patch. --- libunwind/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index 4606360f07ab7..8419d851ab7f4 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -271,6 +271,8 @@ add_compile_flags_if_supported(-Wunused-variable) add_compile_flags_if_supported(-Wwrite-strings) add_compile_flags_if_supported(-Wundef) +add_compile_flags_if_supported(-Wno-suggest-override) + if (LIBUNWIND_ENABLE_WERROR) add_compile_flags_if_supported(-Werror) add_compile_flags_if_supported(-WX) From e2d4bf6ceca84c2ff515d6bc89da7d40d1c971fb Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 31 Jul 2020 14:04:36 -0400 Subject: [PATCH 040/600] [libc] Add islower and isupper implementation. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D84960 --- libc/config/linux/aarch64/entrypoints.txt | 2 ++ libc/config/linux/api.td | 2 ++ libc/config/linux/x86_64/entrypoints.txt | 2 ++ libc/spec/stdc.td | 10 ++++++++++ libc/src/ctype/CMakeLists.txt | 16 ++++++++++++++++ libc/src/ctype/islower.cpp | 22 ++++++++++++++++++++++ libc/src/ctype/islower.h | 18 ++++++++++++++++++ libc/src/ctype/isupper.cpp | 22 ++++++++++++++++++++++ libc/src/ctype/isupper.h | 18 ++++++++++++++++++ libc/test/src/ctype/CMakeLists.txt | 20 ++++++++++++++++++++ libc/test/src/ctype/isalnum_test.cpp | 12 ++++-------- libc/test/src/ctype/isalpha_test.cpp | 14 +++++--------- libc/test/src/ctype/isdigit_test.cpp | 14 +++++--------- libc/test/src/ctype/islower_test.cpp | 21 +++++++++++++++++++++ libc/test/src/ctype/isupper_test.cpp | 21 +++++++++++++++++++++ 15 files changed, 188 insertions(+), 26 deletions(-) create mode 100644 libc/src/ctype/islower.cpp create mode 100644 libc/src/ctype/islower.h create mode 100644 libc/src/ctype/isupper.cpp create mode 100644 libc/src/ctype/isupper.h create mode 100644 libc/test/src/ctype/islower_test.cpp create mode 100644 libc/test/src/ctype/isupper_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 8d2cc16189465..5f058e6116001 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -3,6 +3,8 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.ctype.isalnum libc.src.ctype.isalpha libc.src.ctype.isdigit + libc.src.ctype.islower + libc.src.ctype.isupper # errno.h entrypoints libc.src.errno.__errno_location diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 01e6eab92f016..03d5d66e41e79 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -91,6 +91,8 @@ def CTypeAPI : PublicAPI<"ctype.h"> { "isalnum", "isalpha", "isdigit", + "islower", + "isupper", ]; } diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 4484f6cbf1a79..5dc1d38455dcf 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -6,6 +6,8 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.ctype.isalnum libc.src.ctype.isalpha libc.src.ctype.isdigit + libc.src.ctype.islower + libc.src.ctype.isupper # errno.h entrypoints libc.src.errno.__errno_location diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index fed24cb2dbdd3..4fffc5cdc8579 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -61,6 +61,16 @@ def StdC : StandardSpec<"stdc"> { RetValSpec, [ArgSpec] >, + FunctionSpec< + "islower", + RetValSpec, + [ArgSpec] + >, + FunctionSpec< + "isupper", + RetValSpec, + [ArgSpec] + >, ] >; diff --git a/libc/src/ctype/CMakeLists.txt b/libc/src/ctype/CMakeLists.txt index 53161b8b179fd..30995ab2f714c 100644 --- a/libc/src/ctype/CMakeLists.txt +++ b/libc/src/ctype/CMakeLists.txt @@ -33,3 +33,19 @@ add_entrypoint_object( DEPENDS .ctype_utils ) + +add_entrypoint_object( + islower + SRCS + islower.cpp + HDRS + islower.h +) + +add_entrypoint_object( + isupper + SRCS + isupper.cpp + HDRS + isupper.h +) diff --git a/libc/src/ctype/islower.cpp b/libc/src/ctype/islower.cpp new file mode 100644 index 0000000000000..df21355f31ac6 --- /dev/null +++ b/libc/src/ctype/islower.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of islower------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/islower.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +// TODO: Currently restricted to default locale. +// These should be extended using locale information. +int LLVM_LIBC_ENTRYPOINT(islower)(int c) { + const unsigned ch = c; + return (ch - 'a') < 26; +} + +} // namespace __llvm_libc diff --git a/libc/src/ctype/islower.h b/libc/src/ctype/islower.h new file mode 100644 index 0000000000000..7643542fb7a99 --- /dev/null +++ b/libc/src/ctype/islower.h @@ -0,0 +1,18 @@ +//===-- Implementation header for islower -------------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_CTYPE_ISLOWER_H +#define LLVM_LIBC_SRC_CTYPE_ISLOWER_H + +namespace __llvm_libc { + +int islower(int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_CTYPE_ISLOWER_H diff --git a/libc/src/ctype/isupper.cpp b/libc/src/ctype/isupper.cpp new file mode 100644 index 0000000000000..57aed961d1e57 --- /dev/null +++ b/libc/src/ctype/isupper.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of isupper------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isupper.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +// TODO: Currently restricted to default locale. +// These should be extended using locale information. +int LLVM_LIBC_ENTRYPOINT(isupper)(int c) { + const unsigned ch = c; + return (ch - 'A') < 26; +} + +} // namespace __llvm_libc diff --git a/libc/src/ctype/isupper.h b/libc/src/ctype/isupper.h new file mode 100644 index 0000000000000..7a1f2270943a9 --- /dev/null +++ b/libc/src/ctype/isupper.h @@ -0,0 +1,18 @@ +//===-- Implementation header for isupper -------------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_CTYPE_ISUPPER_H +#define LLVM_LIBC_SRC_CTYPE_ISUPPER_H + +namespace __llvm_libc { + +int isupper(int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_CTYPE_ISUPPER_H diff --git a/libc/test/src/ctype/CMakeLists.txt b/libc/test/src/ctype/CMakeLists.txt index 7834746ab1d73..c9959465c697c 100644 --- a/libc/test/src/ctype/CMakeLists.txt +++ b/libc/test/src/ctype/CMakeLists.txt @@ -29,3 +29,23 @@ add_libc_unittest( DEPENDS libc.src.ctype.isdigit ) + +add_libc_unittest( + islower + SUITE + libc_ctype_unittests + SRCS + islower_test.cpp + DEPENDS + libc.src.ctype.islower +) + +add_libc_unittest( + isupper + SUITE + libc_ctype_unittests + SRCS + isupper_test.cpp + DEPENDS + libc.src.ctype.isupper +) diff --git a/libc/test/src/ctype/isalnum_test.cpp b/libc/test/src/ctype/isalnum_test.cpp index 1c4ad7d3ff8ac..ca77285c5614e 100644 --- a/libc/test/src/ctype/isalnum_test.cpp +++ b/libc/test/src/ctype/isalnum_test.cpp @@ -7,21 +7,17 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isalnum.h" -#include "utils/UnitTest/Test.h" -// Helper function that makes a call to isalnum a bit cleaner -// for use with testing utilities, since it explicitly requires -// a boolean value for EXPECT_TRUE and EXPECT_FALSE. -bool call_isalnum(int c) { return __llvm_libc::isalnum(c); } +#include "utils/UnitTest/Test.h" TEST(IsAlNum, DefaultLocale) { // Loops through all characters, verifying that numbers and letters - // return true and everything else returns false. + // return non-zero integer and everything else returns a zero. for (int c = 0; c < 255; ++c) { if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) - EXPECT_TRUE(call_isalnum(c)); + EXPECT_NE(__llvm_libc::isalnum(c), 0); else - EXPECT_FALSE(call_isalnum(c)); + EXPECT_EQ(__llvm_libc::isalnum(c), 0); } } diff --git a/libc/test/src/ctype/isalpha_test.cpp b/libc/test/src/ctype/isalpha_test.cpp index 81fc7248f8714..d91219b504063 100644 --- a/libc/test/src/ctype/isalpha_test.cpp +++ b/libc/test/src/ctype/isalpha_test.cpp @@ -7,20 +7,16 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isalpha.h" -#include "utils/UnitTest/Test.h" -// Helper function that makes a call to isalpha a bit cleaner -// for use with testing utilities, since it explicitly requires -// a boolean value for EXPECT_TRUE and EXPECT_FALSE. -bool call_isalpha(int c) { return __llvm_libc::isalpha(c); } +#include "utils/UnitTest/Test.h" TEST(IsAlpha, DefaultLocale) { - // Loops through all characters, verifying that letters return true - // and everything else returns false. + // Loops through all characters, verifying that letters return a + // non-zero integer and everything else returns zero. for (int ch = 0; ch < 255; ++ch) { if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) - EXPECT_TRUE(call_isalpha(ch)); + EXPECT_NE(__llvm_libc::isalpha(ch), 0); else - EXPECT_FALSE(call_isalpha(ch)); + EXPECT_EQ(__llvm_libc::isalpha(ch), 0); } } diff --git a/libc/test/src/ctype/isdigit_test.cpp b/libc/test/src/ctype/isdigit_test.cpp index 6fea9564db674..2430a92425c4a 100644 --- a/libc/test/src/ctype/isdigit_test.cpp +++ b/libc/test/src/ctype/isdigit_test.cpp @@ -7,20 +7,16 @@ //===----------------------------------------------------------------------===// #include "src/ctype/isdigit.h" -#include "utils/UnitTest/Test.h" -// Helper function that makes a call to isdigit a bit cleaner -// for use with testing utilities, since it explicitly requires -// a boolean value for EXPECT_TRUE and EXPECT_FALSE. -bool call_isdigit(int c) { return __llvm_libc::isdigit(c); } +#include "utils/UnitTest/Test.h" TEST(IsDigit, DefaultLocale) { - // Loops through all characters, verifying that numbers return true - // and everything else returns false. + // Loops through all characters, verifying that numbers return a + // non-zero integer and everything else returns zero. for (int ch = 0; ch < 255; ++ch) { if ('0' <= ch && ch <= '9') - EXPECT_TRUE(call_isdigit(ch)); + EXPECT_NE(__llvm_libc::isdigit(ch), 0); else - EXPECT_FALSE(call_isdigit(ch)); + EXPECT_EQ(__llvm_libc::isdigit(ch), 0); } } diff --git a/libc/test/src/ctype/islower_test.cpp b/libc/test/src/ctype/islower_test.cpp new file mode 100644 index 0000000000000..9b38cabc67aa7 --- /dev/null +++ b/libc/test/src/ctype/islower_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for islower----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/islower.h" +#include "utils/UnitTest/Test.h" + +TEST(IsLower, DefaultLocale) { + // Loops through all characters, verifying that lowercase letters + // return a non-zero integer and everything else returns zero. + for (int ch = 0; ch < 255; ++ch) { + if ('a' <= ch && ch <= 'z') + EXPECT_NE(__llvm_libc::islower(ch), 0); + else + EXPECT_EQ(__llvm_libc::islower(ch), 0); + } +} diff --git a/libc/test/src/ctype/isupper_test.cpp b/libc/test/src/ctype/isupper_test.cpp new file mode 100644 index 0000000000000..0a13f4e11b0e1 --- /dev/null +++ b/libc/test/src/ctype/isupper_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for isupper----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isupper.h" +#include "utils/UnitTest/Test.h" + +TEST(IsUpper, DefaultLocale) { + // Loops through all characters, verifying that uppercase letters + // return a non-zero integer and everything else returns zero. + for (int ch = 0; ch < 255; ++ch) { + if ('A' <= ch && ch <= 'Z') + EXPECT_NE(__llvm_libc::isupper(ch), 0); + else + EXPECT_EQ(__llvm_libc::isupper(ch), 0); + } +} From 6a3b07a4bf14be32569550f2e9814d8797d27d31 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 23 Jul 2020 11:17:16 +0200 Subject: [PATCH 041/600] RuntimeDyldELF: report_fatal_error instead of asserting for unimplemented relocations (PR46816) This fixes the ExecutionEngine/MCJIT/stubs-sm-pic.ll test in no-asserts builds which is set to XFAIL on some platforms like 32-bit x86. More importantly, we probably don't want to silently error in these cases. Differential revision: https://reviews.llvm.org/D84390 --- .../ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 7c39ddc8b1da0..7ed8a718ed3c1 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -269,7 +269,7 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, uint64_t SymOffset) { switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_X86_64_NONE: break; @@ -359,7 +359,7 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section, default: // There are other relocation types, but it appears these are the // only ones currently used by the LLVM ELF object writer - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; } } @@ -382,7 +382,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_AARCH64_ABS16: { uint64_t Result = Value + Addend; @@ -721,7 +721,7 @@ void RuntimeDyldELF::resolvePPC32Relocation(const SectionEntry &Section, uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_PPC_ADDR16_LO: writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); @@ -741,7 +741,7 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_PPC64_ADDR16: writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); @@ -835,7 +835,7 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_390_PC16DBL: case ELF::R_390_PLT16DBL: { @@ -890,7 +890,7 @@ void RuntimeDyldELF::resolveBPFRelocation(const SectionEntry &Section, switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_BPF_NONE: break; From e591713bff1fdec8fe2b09d084b5cf665794cb65 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 31 Jul 2020 11:50:39 -0400 Subject: [PATCH 042/600] [ConstantFolding] fold abs intrinsic The handling for minimum value is similar to cttz/ctlz with 0 just above this case. Differential Revision: https://reviews.llvm.org/D84942 --- llvm/lib/Analysis/ConstantFolding.cpp | 13 +++++++++++++ llvm/test/Analysis/ConstantFolding/abs.ll | 15 +++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index b55ed6647d355..28a30090a0093 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1436,6 +1436,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::masked_load: + case Intrinsic::abs: case Intrinsic::smax: case Intrinsic::smin: case Intrinsic::umax: @@ -2505,6 +2506,18 @@ static Constant *ConstantFoldScalarCall2(StringRef Name, return ConstantInt::get(Ty, C0->countTrailingZeros()); else return ConstantInt::get(Ty, C0->countLeadingZeros()); + + case Intrinsic::abs: + // Undef or minimum val operand with poison min --> undef + assert(C1 && "Must be constant int"); + if (C1->isOneValue() && (!C0 || C0->isMinSignedValue())) + return UndefValue::get(Ty); + + // Undef operand with no poison min --> 0 (sign bit must be clear) + if (C1->isNullValue() && !C0) + return Constant::getNullValue(Ty); + + return ConstantInt::get(Ty, C0->abs()); } return nullptr; diff --git a/llvm/test/Analysis/ConstantFolding/abs.ll b/llvm/test/Analysis/ConstantFolding/abs.ll index 24171b1d4c6e4..7b3a146e86143 100644 --- a/llvm/test/Analysis/ConstantFolding/abs.ll +++ b/llvm/test/Analysis/ConstantFolding/abs.ll @@ -6,8 +6,7 @@ declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1) define i8 @undef_val_min_poison() { ; CHECK-LABEL: @undef_val_min_poison( -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.abs.i8(i8 undef, i1 true) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 undef ; %r = call i8 @llvm.abs.i8(i8 undef, i1 true) ret i8 %r @@ -15,8 +14,7 @@ define i8 @undef_val_min_poison() { define i8 @undef_val_min_not_poison() { ; CHECK-LABEL: @undef_val_min_not_poison( -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.abs.i8(i8 undef, i1 false) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %r = call i8 @llvm.abs.i8(i8 undef, i1 false) ret i8 %r @@ -24,8 +22,7 @@ define i8 @undef_val_min_not_poison() { define i8 @min_val_min_poison() { ; CHECK-LABEL: @min_val_min_poison( -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.abs.i8(i8 -128, i1 true) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 undef ; %r = call i8 @llvm.abs.i8(i8 -128, i1 true) ret i8 %r @@ -33,8 +30,7 @@ define i8 @min_val_min_poison() { define i8 @min_val_min_not_poison() { ; CHECK-LABEL: @min_val_min_not_poison( -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.abs.i8(i8 -128, i1 false) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 -128 ; %r = call i8 @llvm.abs.i8(i8 -128, i1 false) ret i8 %r @@ -42,8 +38,7 @@ define i8 @min_val_min_not_poison() { define <8 x i8> @vec_const() { ; CHECK-LABEL: @vec_const( -; CHECK-NEXT: [[R:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> , i1 true) -; CHECK-NEXT: ret <8 x i8> [[R]] +; CHECK-NEXT: ret <8 x i8> ; %r = call <8 x i8> @llvm.abs.v8i8(<8 x i8> , i1 1) ret <8 x i8> %r From 765b81f6b93f747bbca57e9042bdd3ce9fea5c5b Mon Sep 17 00:00:00 2001 From: Tim Keith Date: Fri, 31 Jul 2020 11:10:44 -0700 Subject: [PATCH 043/600] Revert "[flang] Fix multi-config generator builds." This reverts commit 332170356e35ea9cdc2c1d612b61a50ec5ea322e. The change breaks out-of-tree builds. Discussion in https://reviews.llvm.org/D84022 --- flang/CMakeLists.txt | 4 +++- flang/test/lit.cfg.py | 8 +++++++- flang/test/lit.site.cfg.py.in | 1 + flang/tools/f18/CMakeLists.txt | 10 ++-------- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 337311fc0a63d..f1aaa5c6473fe 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -180,7 +180,7 @@ else() ${LLVM_INCLUDE_TESTS}) set(FLANG_GTEST_AVAIL 1) - set(FLANG_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + set(FLANG_BINARY_DIR ${CMAKE_BINARY_DIR}/tools/flang) set(BACKEND_PACKAGE_STRING "${PACKAGE_STRING}") if (LINK_WITH_FIR) set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --src-root @@ -194,6 +194,8 @@ endif() if(LINK_WITH_FIR) # tco tool and FIR lib output directories + set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/bin) + set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/lib) # Always build tco tool set(LLVM_BUILD_TOOLS ON) message(STATUS "Linking driver with FIR and LLVM") diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 521ae968d1cb9..8ad5a9b6357f9 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -48,6 +48,12 @@ llvm_config.with_environment('PATH', config.flang_tools_dir, append_path=True) llvm_config.with_environment('PATH', config.llvm_tools_dir, append_path=True) +# For builds with FIR, set path for tco and enable related tests +if config.flang_llvm_tools_dir != "" : + config.available_features.add('fir') + if config.llvm_tools_dir != config.flang_llvm_tools_dir : + llvm_config.with_environment('PATH', config.flang_llvm_tools_dir, append_path=True) + # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. tools = [ @@ -55,7 +61,7 @@ extra_args=["-intrinsic-module-directory "+config.flang_intrinsic_modules_dir], unresolved='fatal') ] -llvm_config.add_tool_substitutions(tools, config.llvm_tools_dir) +llvm_config.add_tool_substitutions(tools, [config.flang_llvm_tools_dir]) # Enable libpgmath testing result = lit_config.params.get("LIBPGMATH") diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index 34bcdab11b55b..e8e2945a2cbf0 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -7,6 +7,7 @@ config.flang_obj_root = "@FLANG_BINARY_DIR@" config.flang_src_dir = "@FLANG_SOURCE_DIR@" config.flang_tools_dir = "@FLANG_TOOLS_DIR@" config.flang_intrinsic_modules_dir = "@FLANG_INTRINSIC_MODULES_DIR@" +config.flang_llvm_tools_dir = "@CMAKE_BINARY_DIR@/bin" config.python_executable = "@PYTHON_EXECUTABLE@" # Support substitution of the tools_dir with user parameters. This is diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 8e8b3d6b6ab11..46c38fa43a2e5 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -59,14 +59,8 @@ add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) install(TARGETS f18 DESTINATION bin) set(FLANG_INTRINSIC_MODULES_DIR ${FLANG_BINARY_DIR}/include/flang) -# This flang shell script will only work in a POSIX shell. -if (NOT WIN32) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${CMAKE_CURRENT_BINARY_DIR}/tools/flang/bin/flang @ONLY) -add_custom_command(TARGET f18 - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/tools/flang/bin/flang ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang - COMMAND chmod +x ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang) -endif() +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${CMAKE_BINARY_DIR}/tools/flang/bin/flang @ONLY) +file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) # The flang script to be installed needs a different path to the headers. set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_INSTALL_PREFIX}/include/flang) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${FLANG_BINARY_DIR}/bin/flang-install.sh @ONLY) From 8830f1170dae898d7a0d6a95897e1c23e316ec1e Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Fri, 24 Jul 2020 13:49:29 +0530 Subject: [PATCH 044/600] [flang]Verify C7107, C7108, C7109 from - Clause 7 constraint checks for f18. 1. Annotate the sources with constraint numbers. 2. Add tests for *C7107 (R765) digit shall have one of the values 0 or 1. *C7108 (R766) digit shall have one of the values 0 through 7. *C7109 (R764) A boz-literal-constant shall appear only as a data-stmt-constant in a DATA statement, or where explicitly allowed in 16.9 as an actual argument of an intrinsic procedure. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D84504 --- flang/lib/Evaluate/intrinsics.cpp | 5 +- flang/lib/Semantics/expression.cpp | 3 +- .../test/Semantics/boz-literal-constants.f90 | 79 +++++++++++++++++++ 3 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 flang/test/Semantics/boz-literal-constants.f90 diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 35a69e4e9b937..ca9a91c98dafe 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -1129,9 +1129,10 @@ std::optional IntrinsicInterface::Match( d.rank == Rank::elementalOrBOZ) { continue; } else { + const IntrinsicDummyArgument &nextParam{dummy[j + 1]}; messages.Say( - "Typeless (BOZ) not allowed for '%s=' argument"_err_en_US, - d.keyword); + "Typeless (BOZ) not allowed for both '%s=' & '%s=' arguments"_err_en_US, // C7109 + d.keyword, nextParam.keyword); } } else { // NULL(), procedure, or procedure pointer diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 3ec2af5d70fa8..0a6e448e98a22 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -633,7 +633,8 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::BOZLiteralConstant &x) { ++p; auto value{BOZLiteralConstant::Read(p, base, false /*unsigned*/)}; if (*p != '"') { - Say("Invalid digit ('%c') in BOZ literal '%s'"_err_en_US, *p, x.v); + Say("Invalid digit ('%c') in BOZ literal '%s'"_err_en_US, *p, + x.v); // C7107, C7108 return std::nullopt; } if (value.overflow) { diff --git a/flang/test/Semantics/boz-literal-constants.f90 b/flang/test/Semantics/boz-literal-constants.f90 new file mode 100644 index 0000000000000..23155cc29cb7d --- /dev/null +++ b/flang/test/Semantics/boz-literal-constants.f90 @@ -0,0 +1,79 @@ +! RUN: %S/test_errors.sh %s %t %f18 +! Confirm enforcement of constraints and restrictions in 7.7 +! C7107, C7108, C7109 + +subroutine bozchecks + ! Type declaration statements + integer :: f, realpart = B"0101", img = B"1111", resint + logical :: resbit + complex :: rescmplx + real :: dbl, e + ! C7107 + !ERROR: Invalid digit ('a') in BOZ literal 'b"110a"' + integer, parameter :: a = B"110A" + !ERROR: Invalid digit ('2') in BOZ literal 'b"1232"' + integer, parameter :: b = B"1232" + !ERROR: BOZ literal 'b"010101010101010101010101011111111111111111111111111111111111111111111111111111111111111111111111111111111111000000000000000000000000000000000000"' too large + integer, parameter :: b1 = B"010101010101010101010101011111111111111111111& + &111111111111111111111111111111111111111111111& + &111111111111111111000000000000000000000000000& + &000000000" + ! C7108 + !ERROR: Invalid digit ('8') in BOZ literal 'o"8"' + integer :: c = O"8" + !ERROR: Invalid digit ('a') in BOZ literal 'o"a"' + integer :: d = O"A" + + ! C7109 + ! A) can appear only in data statement + ! B) Argument to intrinsics listed from 16.9 below + ! BGE, BGT, BLE, BLT, CMPLX, DBLE, DSHIFTL, + ! DSHIFTR, IAND, IEOR, INT, IOR, MERGE_BITS, REAL + + ! part A + data f / Z"AA" / ! OK + !ERROR: DATA statement value could not be converted to the type 'COMPLEX(4)' of the object 'rescmplx' + data rescmplx / B"010101" / + ! part B + resbit = BGE(B"0101", B"1111") + resbit = BGT(Z"0101", B"1111") + resbit = BLE(B"0101", B"1111") + resbit = BLT(B"0101", B"1111") + + res = CMPLX (realpart, img, 4) + res = CMPLX (B"0101", B"1111", 4) + + dbl = DBLE(B"1111") + dbl = DBLE(realpart) + + !ERROR: Typeless (BOZ) not allowed for both 'i=' & 'j=' arguments + dbl = DSHIFTL(B"0101",B"0101",2) + !ERROR: Typeless (BOZ) not allowed for both 'i=' & 'j=' arguments + dbl = DSHIFTR(B"1010",B"1010",2) + dbl = DSHIFTL(B"0101",5,2) ! OK + dbl = DSHIFTR(B"1010",5,2) ! OK + + !ERROR: Typeless (BOZ) not allowed for both 'i=' & 'j=' arguments + resint = IAND(B"0001", B"0011") + resint = IAND(B"0001", 3) + + !ERROR: Typeless (BOZ) not allowed for both 'i=' & 'j=' arguments + resint = IEOR(B"0001", B"0011") + resint = IEOR(B"0001", 3) + + resint = INT(B"1010") + + !ERROR: Typeless (BOZ) not allowed for both 'i=' & 'j=' arguments + res = IOR(B"0101", B"0011") + res = IOR(B"0101", 3) + + res = MERGE_BITS(13,3,11) + res = MERGE_BITS(B"1101",3,11) + !ERROR: Typeless (BOZ) not allowed for both 'i=' & 'j=' arguments + res = MERGE_BITS(B"1101",B"0011",11) + !ERROR: Typeless (BOZ) not allowed for both 'i=' & 'j=' arguments + res = MERGE_BITS(B"1101",B"0011",B"1011") + res = MERGE_BITS(B"1101",3,B"1011") + + res = REAL(B"1101") +end subroutine From cd53ded557c3487b8dae2f9de894fdb5b75cb8c8 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 31 Jul 2020 11:20:15 -0700 Subject: [PATCH 045/600] [Support] Fix computeHostNumPhysicalCores() to respect affinity computeHostNumPhysicalCores() is designed to respect CPU affinity. D84764 used sysconf(_SC_NPROCESSORS_ONLN) which does not respect affinity. SupportTests Threading.PhysicalConcurrency may fail if taskset -c is specified. --- llvm/lib/Support/Host.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 01ede62c755a6..d3b255ae0f2ec 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1271,11 +1271,14 @@ int computeHostNumPhysicalCores() { } return CPU_COUNT(&Enabled); } -#elif (defined(__linux__) && \ - (defined(__ppc__) || defined(__powerpc__) || defined(__s390x__))) -#include - -// Gets the number of *physical cores* on the machine. +#elif defined(__linux__) && defined(__powerpc__) +int computeHostNumPhysicalCores() { + cpu_set_t Affinity; + if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) + return -1; + return CPU_COUNT(&Affinity); +} +#elif defined(__linux__) && defined(__s390x__) int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); } #elif defined(__APPLE__) && defined(__x86_64__) #include From 19bc9ea480b60b607a3e303f20c7a3a2ea553369 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 31 Jul 2020 12:56:36 -0400 Subject: [PATCH 046/600] [libc++] Avoid including from Block.h is a pretty common name, which can lead to nasty collisions with user provided headers. Since we're only getting a few simple declarations from the header, it's better to declare them manually than to include the header. rdar://66384326 Differential Revision: https://reviews.llvm.org/D85035 --- libcxx/include/functional | 13 ++++++------- .../utilities/function.objects/func.blocks.sh.cpp | 2 ++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/libcxx/include/functional b/libcxx/include/functional index 3e9425320fc32..9a0ca96c4611b 100644 --- a/libcxx/include/functional +++ b/libcxx/include/functional @@ -508,10 +508,6 @@ POLICY: For non-variadic implementations, the number of arguments is limited #include <__functional_base> -#if defined(_LIBCPP_HAS_BLOCKS_RUNTIME) && !defined(_LIBCPP_HAS_OBJC_ARC) -#include -#endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif @@ -2257,6 +2253,9 @@ template class __policy_func<_Rp(_ArgTypes...)> #if defined(_LIBCPP_HAS_BLOCKS_RUNTIME) && !defined(_LIBCPP_HAS_OBJC_ARC) +extern "C" void *_Block_copy(const void *); +extern "C" void _Block_release(const void *); + template class __func<_Rp1(^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> : public __base<_Rp(_ArgTypes...)> @@ -2267,14 +2266,14 @@ class __func<_Rp1(^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> public: _LIBCPP_INLINE_VISIBILITY explicit __func(__block_type const& __f) - : __f_(__f ? Block_copy(__f) : (__block_type)0) + : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) { } // [TODO] add && to save on a retain _LIBCPP_INLINE_VISIBILITY explicit __func(__block_type __f, const _Alloc& /* unused */) - : __f_(__f ? Block_copy(__f) : (__block_type)0) + : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) { } virtual __base<_Rp(_ArgTypes...)>* __clone() const { @@ -2291,7 +2290,7 @@ public: virtual void destroy() _NOEXCEPT { if (__f_) - Block_release(__f_); + _Block_release(__f_); __f_ = 0; } diff --git a/libcxx/test/libcxx/utilities/function.objects/func.blocks.sh.cpp b/libcxx/test/libcxx/utilities/function.objects/func.blocks.sh.cpp index 33c11651f12f5..9a8e9389426a4 100644 --- a/libcxx/test/libcxx/utilities/function.objects/func.blocks.sh.cpp +++ b/libcxx/test/libcxx/utilities/function.objects/func.blocks.sh.cpp @@ -21,6 +21,8 @@ #include #include +#include + #include "test_macros.h" #include "count_new.h" From b752a8ca499b84d46c5b710f7a3b475fdf0d752e Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Fri, 24 Jul 2020 12:25:32 +0530 Subject: [PATCH 047/600] [flang][NFC] Verify C781 from - Clause 7 constraint checks for f18. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D84503 --- flang/lib/Semantics/resolve-names.cpp | 2 +- flang/test/Semantics/bindings01.f90 | 2 +- flang/test/Semantics/resolve80.f90 | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index eb7dd697b2748..e85dfa9c91ef5 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1547,7 +1547,7 @@ bool AttrsVisitor::IsConflictingAttr(Attr attrName) { return HaveAttrConflict(attrName, Attr::INTENT_IN, Attr::INTENT_INOUT) || HaveAttrConflict(attrName, Attr::INTENT_IN, Attr::INTENT_OUT) || HaveAttrConflict(attrName, Attr::INTENT_INOUT, Attr::INTENT_OUT) || - HaveAttrConflict(attrName, Attr::PASS, Attr::NOPASS) || + HaveAttrConflict(attrName, Attr::PASS, Attr::NOPASS) || // C781 HaveAttrConflict(attrName, Attr::PURE, Attr::IMPURE) || HaveAttrConflict(attrName, Attr::PUBLIC, Attr::PRIVATE) || HaveAttrConflict(attrName, Attr::RECURSIVE, Attr::NON_RECURSIVE); diff --git a/flang/test/Semantics/bindings01.f90 b/flang/test/Semantics/bindings01.f90 index 26dd25a24650c..f2d5876167c25 100644 --- a/flang/test/Semantics/bindings01.f90 +++ b/flang/test/Semantics/bindings01.f90 @@ -1,6 +1,6 @@ ! RUN: %S/test_errors.sh %s %t %f18 ! Confirm enforcement of constraints and restrictions in 7.5.7.3 -! and C733, C734 and C779, C780, C781, C782, C783, C784, and C785. +! and C733, C734 and C779, C780, C782, C783, C784, and C785. module m !ERROR: An ABSTRACT derived type must be extensible diff --git a/flang/test/Semantics/resolve80.f90 b/flang/test/Semantics/resolve80.f90 index 7e9df344a1611..689187d4e15ed 100644 --- a/flang/test/Semantics/resolve80.f90 +++ b/flang/test/Semantics/resolve80.f90 @@ -26,7 +26,7 @@ module m !WARNING: Attribute 'PASS' cannot be used more than once procedure(subPass), pass, deferred, pass :: passBinding !ERROR: Attributes 'PASS' and 'NOPASS' conflict with each other - procedure(subPassNopass), pass, deferred, nopass :: passNopassBinding + procedure(subPassNopass), pass, deferred, nopass :: passNopassBinding ! C781 end type boundProcType contains From bf812c145ca2edc4fb76133ec8104267d66f8ee6 Mon Sep 17 00:00:00 2001 From: Sourabh Singh Tomar Date: Thu, 30 Jul 2020 23:47:51 +0530 Subject: [PATCH 048/600] [flang][OpenMP] Added initial support for lowering OpenMP parallel construct This patch lower `!OMP PARALLEL` construct from PFT to OpenMPDialect operations. This is first patch in this direction(lowering parallel construct). OpenMP parallel construct can have multiple clauses and parameters. This patch only implements lowering of an empty(contains no code in body) parallel construct without any clauses or parameters. Patch is carved out of following approved PR: https://github.com/flang-compiler/f18-llvm-project/pull/322 Reviewed By: kiranchandramohan, DavidTruby Differential Revision: https://reviews.llvm.org/D84965 --- flang/lib/Lower/OpenMP.cpp | 32 +++++++++++++++++++- flang/unittests/Lower/OpenMPLoweringTest.cpp | 27 +++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index bc7cd27df320e..017187069dcbb 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -75,6 +75,36 @@ genOMP(Fortran::lower::AbstractConverter &absConv, standaloneConstruct.u); } +static void +genOMP(Fortran::lower::AbstractConverter &absConv, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPBlockConstruct &blockConstruct) { + const auto &blockDirective = + std::get(blockConstruct.t); + const auto ¶llelDirective = + std::get(blockDirective.t); + if (parallelDirective.v == llvm::omp::OMPD_parallel) { + auto &firOpBuilder = absConv.getFirOpBuilder(); + auto currentLocation = absConv.getCurrentLocation(); + auto insertPt = firOpBuilder.saveInsertionPoint(); + llvm::ArrayRef argTy; + mlir::ValueRange range; + llvm::SmallVector operandSegmentSizes(6 /*Size=*/, + 0 /*Value=*/); + // create and insert the operation. + auto parallelOp = firOpBuilder.create( + currentLocation, argTy, range); + parallelOp.setAttr(mlir::omp::ParallelOp::getOperandSegmentSizeAttr(), + firOpBuilder.getI32VectorAttr(operandSegmentSizes)); + parallelOp.getRegion().push_back(new Block{}); + auto &block = parallelOp.getRegion().back(); + firOpBuilder.setInsertionPointToStart(&block); + // ensure the block is well-formed. + firOpBuilder.create(currentLocation); + firOpBuilder.restoreInsertionPoint(insertPt); + } +} + void Fortran::lower::genOpenMPConstruct( Fortran::lower::AbstractConverter &absConv, Fortran::lower::pft::Evaluation &eval, @@ -92,7 +122,7 @@ void Fortran::lower::genOpenMPConstruct( TODO(); }, [&](const Fortran::parser::OpenMPBlockConstruct &blockConstruct) { - TODO(); + genOMP(absConv, eval, blockConstruct); }, [&](const Fortran::parser::OpenMPAtomicConstruct &atomicConstruct) { TODO(); diff --git a/flang/unittests/Lower/OpenMPLoweringTest.cpp b/flang/unittests/Lower/OpenMPLoweringTest.cpp index fd580d71fab3c..ad6fe739d16b4 100644 --- a/flang/unittests/Lower/OpenMPLoweringTest.cpp +++ b/flang/unittests/Lower/OpenMPLoweringTest.cpp @@ -71,4 +71,31 @@ TEST_F(OpenMPLoweringTest, TaskYield) { EXPECT_EQ(succeeded(taskYieldOp.verify()), true); } +TEST_F(OpenMPLoweringTest, EmptyParallel) { + // Construct a dummy parse tree node for `!OMP parallel`. + struct Fortran::parser::OmpSimpleStandaloneDirective parallelDirective( + llvm::omp::Directive::OMPD_parallel); + + // Check and lower the `!OMP parallel` node to `ParallelOp` operation of + // OpenMPDialect. + EXPECT_EQ(parallelDirective.v, llvm::omp::Directive::OMPD_parallel); + auto insertPt = mlirOpBuilder->saveInsertionPoint(); + llvm::ArrayRef argTy; + mlir::ValueRange range; + llvm::SmallVector operandSegmentSizes(6 /*Size=*/, 0 /*Value=*/); + // create and insert the operation. + auto parallelOp = mlirOpBuilder->create( + mlirOpBuilder->getUnknownLoc(), argTy, range); + parallelOp.setAttr(mlir::omp::ParallelOp::getOperandSegmentSizeAttr(), + mlirOpBuilder->getI32VectorAttr(operandSegmentSizes)); + parallelOp.getRegion().push_back(new mlir::Block{}); + auto &block = parallelOp.getRegion().back(); + mlirOpBuilder->setInsertionPointToStart(&block); + // ensure the block is well-formed. + mlirOpBuilder->create( + mlirOpBuilder->getUnknownLoc()); + mlirOpBuilder->restoreInsertionPoint(insertPt); + EXPECT_EQ(succeeded(parallelOp.verify()), true); +} + // main() from gtest_main From b7cfa6ca92830b3c331cb44706bb279996663439 Mon Sep 17 00:00:00 2001 From: Sidharth Baveja Date: Fri, 31 Jul 2020 18:31:58 +0000 Subject: [PATCH 049/600] [Loop Peeling] Separate the Loop Peeling Utilities from the Loop Unrolling Utilities Summary: This patch separates the Loop Peeling Utilities from Loop Unrolling. The reason for this change is that Loop Peeling is no longer only being used by loop unrolling; Patch D82927 introduces loop peeling with fusion, such that loops can be modified to have to same trip count, making them legal to be peeled. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D83056 --- llvm/include/llvm/Transforms/Utils/LoopPeel.h | 40 ++++++++++ .../llvm/Transforms/Utils/UnrollLoop.h | 17 ----- .../Hexagon/HexagonTargetTransformInfo.cpp | 1 + llvm/lib/Transforms/Scalar/LoopFuse.cpp | 2 +- .../Scalar/LoopUnrollAndJamPass.cpp | 1 + llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 51 +------------ llvm/lib/Transforms/Utils/CMakeLists.txt | 2 +- .../{LoopUnrollPeel.cpp => LoopPeel.cpp} | 76 +++++++++++++++---- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 1 + 9 files changed, 109 insertions(+), 82 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Utils/LoopPeel.h rename llvm/lib/Transforms/Utils/{LoopUnrollPeel.cpp => LoopPeel.cpp} (92%) diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h new file mode 100644 index 0000000000000..8f857e1e5c215 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h @@ -0,0 +1,40 @@ +//===- llvm/Transforms/Utils/LoopPeel.h ----- Peeling utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some loop peeling utilities. It does not define any +// actual pass or policy. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_LOOPPEEL_H +#define LLVM_TRANSFORMS_UTILS_LOOPPEEL_H + +#include "llvm/Analysis/TargetTransformInfo.h" + +namespace llvm { + +bool canPeel(Loop *L); + +bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); + +TargetTransformInfo::PeelingPreferences +gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, + const TargetTransformInfo &TTI, + Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling, + bool UnrollingSpecficValues = false); + +void computePeelCount(Loop *L, unsigned LoopSize, + TargetTransformInfo::PeelingPreferences &PP, + unsigned &TripCount, ScalarEvolution &SE, + unsigned Threshold = UINT_MAX); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_LOOPPEEL_H diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index bb3d02b959564..4254bd71a41c0 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -92,16 +92,6 @@ bool UnrollRuntimeLoopRemainder( const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop = nullptr); -void computePeelCount(Loop *L, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, - TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, ScalarEvolution &SE); - -bool canPeel(Loop *L); - -bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); - LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, @@ -121,7 +111,6 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, - bool &UseUpperBound); void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, @@ -138,12 +127,6 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( Optional UserAllowPartial, Optional UserRuntime, Optional UserUpperBound, Optional UserFullUnrollMaxCount); -TargetTransformInfo::PeelingPreferences -gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, - const TargetTransformInfo &TTI, - Optional UserAllowPeeling, - Optional UserAllowProfileBasedPeeling); - unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 68efaf767502c..0cdb383eb9241 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/UnrollLoop.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index 5573af834c597..90356ed0f2a75 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -66,7 +66,7 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeMoverUtils.h" -#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Transforms/Utils/LoopPeel.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 285cba6ee2054..bd62419323065 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -41,6 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 87f40bb7ba852..2b610392dcfd5 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -56,6 +56,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SizeOpts.h" @@ -115,10 +116,6 @@ static cl::opt UnrollFullMaxCount( cl::desc( "Set the max unroll count for full unrolling, for testing purposes")); -static cl::opt UnrollPeelCount( - "unroll-peel-count", cl::Hidden, - cl::desc("Set the unroll peeling count, for testing purposes")); - static cl::opt UnrollAllowPartial("unroll-allow-partial", cl::Hidden, cl::desc("Allows loops to be partially unrolled until " @@ -149,15 +146,6 @@ static cl::opt FlatLoopTripCountThreshold( "threshold, the loop is considered as flat and will be less " "aggressively unrolled.")); -static cl::opt - UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, - cl::desc("Allows loops to be peeled when the dynamic " - "trip count is known to be low.")); - -static cl::opt UnrollAllowLoopNestsPeeling( - "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, - cl::desc("Allows loop nests to be peeled.")); - static cl::opt UnrollUnrollRemainder( "unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled.")); @@ -275,39 +263,6 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( return UP; } -TargetTransformInfo::PeelingPreferences -llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, - const TargetTransformInfo &TTI, - Optional UserAllowPeeling, - Optional UserAllowProfileBasedPeeling) { - TargetTransformInfo::PeelingPreferences PP; - - // Default values - PP.PeelCount = 0; - PP.AllowPeeling = true; - PP.AllowLoopNestsPeeling = false; - PP.PeelProfiledIterations = true; - - // Get Target Specifc Values - TTI.getPeelingPreferences(L, SE, PP); - - // User Specified Values using cl::opt - if (UnrollPeelCount.getNumOccurrences() > 0) - PP.PeelCount = UnrollPeelCount; - if (UnrollAllowPeeling.getNumOccurrences() > 0) - PP.AllowPeeling = UnrollAllowPeeling; - if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) - PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; - - // User Specifed values provided by argument - if (UserAllowPeeling.hasValue()) - PP.AllowPeeling = *UserAllowPeeling; - if (UserAllowProfileBasedPeeling.hasValue()) - PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; - - return PP; -} - namespace { /// A struct to densely store the state of an instruction after unrolling at @@ -881,7 +836,7 @@ bool llvm::computeUnrollCount( } // 4th priority is loop peeling. - computePeelCount(L, LoopSize, UP, PP, TripCount, SE); + computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold); if (PP.PeelCount) { UP.Runtime = false; UP.Count = 1; @@ -1087,7 +1042,7 @@ static LoopUnrollResult tryToUnrollLoop( ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedFullUnrollMaxCount); TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences( - L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling); + L, SE, TTI, ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, true); // Exit early if unrolling is disabled. For OptForSize, we pick the loop size // as threshold later on. diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index 19f655c3a78fb..2ef17dc595fa8 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -35,11 +35,11 @@ add_llvm_component_library(LLVMTransformUtils LCSSA.cpp LibCallsShrinkWrap.cpp Local.cpp + LoopPeel.cpp LoopRotationUtils.cpp LoopSimplify.cpp LoopUnroll.cpp LoopUnrollAndJam.cpp - LoopUnrollPeel.cpp LoopUnrollRuntime.cpp LoopUtils.cpp LoopVersioning.cpp diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp similarity index 92% rename from llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp rename to llvm/lib/Transforms/Utils/LoopPeel.cpp index c653aacbee6cc..ff9b9eca8fdf7 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -1,4 +1,4 @@ -//===- UnrollLoopPeel.cpp - Loop peeling utilities ------------------------===// +//===- LoopPeel.cpp -------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file implements some loop unrolling utilities for peeling loops -// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for -// unrolling loops with compile-time constant trip counts. -// +// Loop Peeling Utilities. //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" @@ -49,10 +47,24 @@ using namespace llvm; using namespace llvm::PatternMatch; -#define DEBUG_TYPE "loop-unroll" +#define DEBUG_TYPE "loop-peel" STATISTIC(NumPeeled, "Number of loops peeled"); +static cl::opt UnrollPeelCount( + "unroll-peel-count", cl::Hidden, + cl::desc("Set the unroll peeling count, for testing purposes")); + +static cl::opt + UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden, + cl::desc("Allows loops to be peeled when the dynamic " + "trip count is known to be low.")); + +static cl::opt + UnrollAllowLoopNestsPeeling("unroll-allow-loop-nests-peeling", + cl::init(false), cl::Hidden, + cl::desc("Allows loop nests to be peeled.")); + static cl::opt UnrollPeelMaxCount( "unroll-peel-max-count", cl::init(7), cl::Hidden, cl::desc("Max average trip count which will cause loop peeling.")); @@ -278,9 +290,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, ScalarEvolution &SE) { + unsigned &TripCount, ScalarEvolution &SE, + unsigned Threshold) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); // Save the PP.PeelCount value set by the target in // TTI.getPeelingPreferences or by the flag -unroll-peel-count. @@ -322,7 +334,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // maximum number of iterations among these values, thus turning all those // Phis into invariants. // First, check that we can peel at least one iteration. - if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) { + if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) { // Store the pre-calculated values here. SmallDenseMap IterationsToInvariance; // Now go through all Phis to calculate their the number of iterations they @@ -342,7 +354,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // Pay respect to limitations implied by loop size and the max peel count. unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + MaxPeelCount = std::min(MaxPeelCount, Threshold / LoopSize - 1); DesiredPeelCount = std::max(DesiredPeelCount, countToEliminateCompares(*L, MaxPeelCount, SE)); @@ -385,7 +397,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (*PeelCount) { if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && - (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { + (LoopSize * (*PeelCount + 1) <= Threshold)) { LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); PP.PeelCount = *PeelCount; @@ -396,7 +408,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); - LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); + LLVM_DEBUG(dbgs() << "Max peel cost: " << Threshold << "\n"); } } } @@ -491,7 +503,7 @@ static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, /// instructions in the last peeled-off iteration. static void cloneLoopBlocks( Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, - SmallVectorImpl > &ExitEdges, + SmallVectorImpl> &ExitEdges, SmallVectorImpl &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, LoopInfo *LI) { @@ -599,6 +611,40 @@ static void cloneLoopBlocks( LVMap[KV.first] = KV.second; } +TargetTransformInfo::PeelingPreferences llvm::gatherPeelingPreferences( + Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, + Optional UserAllowPeeling, + Optional UserAllowProfileBasedPeeling, bool UnrollingSpecficValues) { + TargetTransformInfo::PeelingPreferences PP; + + // Set the default values. + PP.PeelCount = 0; + PP.AllowPeeling = true; + PP.AllowLoopNestsPeeling = false; + PP.PeelProfiledIterations = true; + + // Get the target specifc values. + TTI.getPeelingPreferences(L, SE, PP); + + // User specified values using cl::opt. + if (UnrollingSpecficValues) { + if (UnrollPeelCount.getNumOccurrences() > 0) + PP.PeelCount = UnrollPeelCount; + if (UnrollAllowPeeling.getNumOccurrences() > 0) + PP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; + } + + // User specifed values provided by argument. + if (UserAllowPeeling.hasValue()) + PP.AllowPeeling = *UserAllowPeeling; + if (UserAllowProfileBasedPeeling.hasValue()) + PP.PeelProfiledIterations = *UserAllowProfileBasedPeeling; + + return PP; +} + /// Peel off the first \p PeelCount iterations of loop \p L. /// /// Note that this does not peel them off as a single straight-line block. @@ -609,8 +655,8 @@ static void cloneLoopBlocks( /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, bool PreserveLCSSA) { + ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + bool PreserveLCSSA) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 8e8aeea15dbf9..7bea696a853a9 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -59,6 +59,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" From 09746fbfb9cd80a06280e9ca34eeadea93bcb3b2 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 31 Jul 2020 18:32:54 +0000 Subject: [PATCH 050/600] [gn build] Port b7cfa6ca928 --- llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn index 5fae2e450b481..a0de4e4980b12 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn @@ -42,11 +42,11 @@ static_library("Utils") { "LCSSA.cpp", "LibCallsShrinkWrap.cpp", "Local.cpp", + "LoopPeel.cpp", "LoopRotationUtils.cpp", "LoopSimplify.cpp", "LoopUnroll.cpp", "LoopUnrollAndJam.cpp", - "LoopUnrollPeel.cpp", "LoopUnrollRuntime.cpp", "LoopUtils.cpp", "LoopVersioning.cpp", From e8a2af28afd5b4370fb66d8810ff979d1cb4d45b Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 31 Jul 2020 14:36:23 -0400 Subject: [PATCH 051/600] [libc] Adds strrchr implementation. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D84875 --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/src/string/CMakeLists.txt | 8 +++ libc/src/string/strrchr.cpp | 28 ++++++++ libc/src/string/strrchr.h | 18 +++++ libc/test/src/string/CMakeLists.txt | 10 +++ libc/test/src/string/strrchr_test.cpp | 81 +++++++++++++++++++++++ 7 files changed, 147 insertions(+) create mode 100644 libc/src/string/strrchr.cpp create mode 100644 libc/src/string/strrchr.h create mode 100644 libc/test/src/string/strrchr_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 5f058e6116001..8314df89b0636 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -20,6 +20,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.strcpy libc.src.string.strlen libc.src.string.strnlen + libc.src.string.strrchr libc.src.string.strstr ) diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 5dc1d38455dcf..56a99d00d7847 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -38,6 +38,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.strcpy libc.src.string.strlen libc.src.string.strnlen + libc.src.string.strrchr libc.src.string.strstr # sys/mman.h entrypoints diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 99450d5564593..d0eab632e9d79 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -86,6 +86,14 @@ add_entrypoint_object( memrchr.h ) +add_entrypoint_object( + strrchr + SRCS + strrchr.cpp + HDRS + strrchr.h +) + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/strrchr.cpp b/libc/src/string/strrchr.cpp new file mode 100644 index 0000000000000..28716c28a2664 --- /dev/null +++ b/libc/src/string/strrchr.cpp @@ -0,0 +1,28 @@ +//===-- Implementation of strrchr------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strrchr.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +char *LLVM_LIBC_ENTRYPOINT(strrchr)(const char *src, int c) { + unsigned char *str = + const_cast(reinterpret_cast(src)); + const unsigned char ch = c; + + unsigned char *last_occurrence = nullptr; + do { + if (*str == ch) + last_occurrence = str; + } while (*str++); + return reinterpret_cast(last_occurrence); +} + +} // namespace __llvm_libc diff --git a/libc/src/string/strrchr.h b/libc/src/string/strrchr.h new file mode 100644 index 0000000000000..7b85929b290b7 --- /dev/null +++ b/libc/src/string/strrchr.h @@ -0,0 +1,18 @@ +//===-- Implementation header for strrchr -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRRCHR_H +#define LLVM_LIBC_SRC_STRING_STRRCHR_H + +namespace __llvm_libc { + +char *strrchr(const char *src, int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRRCHR_H diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt index a116effef2719..0fff250fabd15 100644 --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -92,6 +92,16 @@ add_libc_unittest( libc.src.string.memrchr ) +add_libc_unittest( + strrchr_test + SUITE + libc_string_unittests + SRCS + strrchr_test.cpp + DEPENDS + libc.src.string.strrchr +) + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/strrchr_test.cpp b/libc/test/src/string/strrchr_test.cpp new file mode 100644 index 0000000000000..18fddda600870 --- /dev/null +++ b/libc/test/src/string/strrchr_test.cpp @@ -0,0 +1,81 @@ +//===-- Unittests for strrchr ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strrchr.h" +#include "utils/UnitTest/Test.h" + +TEST(StrRChrTest, FindsFirstCharacter) { + const char *src = "abcde"; + const char *src_copy = src; + + // Should return original string since 'a' is the first character. + ASSERT_STREQ(__llvm_libc::strrchr(src, 'a'), "abcde"); + // Source string should not change. + ASSERT_STREQ(src, src_copy); +} + +TEST(StrRChrTest, FindsMiddleCharacter) { + const char *src = "abcde"; + const char *src_copy = src; + + // Should return characters after (and including) 'c'. + ASSERT_STREQ(__llvm_libc::strrchr(src, 'c'), "cde"); + // Source string should not change. + ASSERT_STREQ(src, src_copy); +} + +TEST(StrRChrTest, FindsLastCharacterThatIsNotNullTerminator) { + const char *src = "abcde"; + const char *src_copy = src; + + // Should return 'e' and null-terminator. + ASSERT_STREQ(__llvm_libc::strrchr(src, 'e'), "e"); + // Source string should not change. + ASSERT_STREQ(src, src_copy); +} + +TEST(StrRChrTest, FindsNullTerminator) { + const char *src = "abcde"; + const char *src_copy = src; + + // Should return null terminator. + ASSERT_STREQ(__llvm_libc::strrchr(src, '\0'), ""); + // Source string should not change. + ASSERT_STREQ(src, src_copy); +} + +TEST(StrRChrTest, FindsLastNullTerminator) { + const char src[5] = {'a', '\0', 'b', '\0', 'c'}; + // 'b' is behind a null terminator, so should not be found. + ASSERT_STREQ(__llvm_libc::strrchr(src, 'b'), nullptr); + // Same goes for 'c'. + ASSERT_STREQ(__llvm_libc::strrchr(src, 'c'), nullptr); +} + +TEST(StrRChrTest, CharacterNotWithinStringShouldReturnNullptr) { + // Since 'z' is not within the string, should return nullptr. + ASSERT_STREQ(__llvm_libc::strrchr("123?", 'z'), nullptr); +} + +TEST(StrRChrTest, ShouldFindLastOfDuplicates) { + // '1' is duplicated in the string, but it should find the last copy. + ASSERT_STREQ(__llvm_libc::strrchr("abc1def1ghi", '1'), "1ghi"); + + const char *dups = "XXXXX"; + // Should return the last occurrence of 'X'. + ASSERT_STREQ(__llvm_libc::strrchr(dups, 'X'), "X"); +} + +TEST(StrRChrTest, EmptyStringShouldOnlyMatchNullTerminator) { + // Null terminator should match. + ASSERT_STREQ(__llvm_libc::strrchr("", '\0'), ""); + // All other characters should not match. + ASSERT_STREQ(__llvm_libc::strrchr("", 'A'), nullptr); + ASSERT_STREQ(__llvm_libc::strrchr("", '2'), nullptr); + ASSERT_STREQ(__llvm_libc::strrchr("", '*'), nullptr); +} From 43bf902c2e3416179cf41eba9307fc74bcba0ecd Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Fri, 31 Jul 2020 11:38:10 -0700 Subject: [PATCH 052/600] [compiler-rt][Darwin] Fix GetOSMajorKernelOffset() on watchOS `TARGET_OS_IOS` and `TARGET_OS_WATCH` are not mutually exclusive. `SANITIZER_IOS` is defined for all embedded platforms. So the branch for watchOS is never taken. We could fix this by switching the order of the branches (but the reason for doing so is non-obvious). Instead, lets use the Darwin-specific `TARGET_OS_*` macros which are mutually exclusive. --- compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp index a10ba774b9552..21a9c01bf2a9a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp @@ -610,8 +610,8 @@ HandleSignalMode GetHandleSignalMode(int signum) { // XNU 17 -- macOS 10.13 -- iOS 11 -- tvOS 11 -- watchOS 4 constexpr u16 GetOSMajorKernelOffset() { if (TARGET_OS_OSX) return 4; - if (SANITIZER_IOS || SANITIZER_TVOS) return 6; - if (SANITIZER_WATCHOS) return 13; + if (TARGET_OS_IOS || TARGET_OS_TV) return 6; + if (TARGET_OS_WATCH) return 13; } using VersStr = char[64]; @@ -661,9 +661,9 @@ static void MapToMacos(u16 *major, u16 *minor) { if (TARGET_OS_OSX) return; - if (SANITIZER_IOS || SANITIZER_TVOS) + if (TARGET_OS_IOS || TARGET_OS_TV) *major += 2; - else if (SANITIZER_WATCHOS) + else if (TARGET_OS_WATCH) *major += 9; else UNREACHABLE("unsupported platform"); From ca6b6d40ffba27fe231f55f7edc533f0a1815d31 Mon Sep 17 00:00:00 2001 From: Sriraman Tallam Date: Fri, 31 Jul 2020 11:14:49 -0700 Subject: [PATCH 053/600] Rename basic block sections options to be consistent. D68049 created options for basic block sections: -fbasic-block-sections=, -funique-basic-block-section-names. Rename options in llc and lld (--lto-) to be consistent. Specifically, + Rename basicblock-sections to basic-block-sections + Rename unique-bb-section-names to unique-basic-block-section-names Differential Revision: https://reviews.llvm.org/D84462 --- lld/ELF/Driver.cpp | 6 +++--- lld/ELF/LTO.cpp | 2 +- lld/ELF/Options.td | 4 ++-- ...-sections-and-icf.s => basic-block-sections-and-icf.s} | 2 +- ...-fallthru.s => basic-block-sections-delete-fallthru.s} | 2 +- ...tions-pc32reloc.s => basic-block-sections-pc32reloc.s} | 2 +- llvm/lib/CodeGen/CommandFlags.cpp | 4 ++-- ...nches.ll => basic-block-sections-clusters-branches.ll} | 4 ++-- ...clusters-eh.ll => basic-block-sections-clusters-eh.ll} | 4 ++-- ...rs-error.ll => basic-block-sections-clusters-error.ll} | 8 ++++---- ...tions-clusters.ll => basic-block-sections-clusters.ll} | 4 ++-- ...lock-sections-cold.ll => basic-block-sections-cold.ll} | 2 +- ...directjumps.ll => basic-block-sections-directjumps.ll} | 4 ++-- ...sicblock-sections-eh.ll => basic-block-sections-eh.ll} | 2 +- ...-sections-labels.ll => basic-block-sections-labels.ll} | 2 +- ...lock-sections-list.ll => basic-block-sections-list.ll} | 2 +- ...-sections-listbb.ll => basic-block-sections-listbb.ll} | 2 +- ...s-mir-parse.mir => basic-block-sections-mir-parse.mir} | 2 +- ...ons-mir-print.ll => basic-block-sections-mir-print.ll} | 2 +- .../{basicblock-sections.ll => basic-block-sections.ll} | 4 ++-- llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll | 6 +++--- ...inserter-basic-block-sections-callee-save-registers.ll | 2 +- ...basicblock-sections_1.ll => basic-block-sections_1.ll} | 6 +++--- 23 files changed, 39 insertions(+), 39 deletions(-) rename lld/test/ELF/{bb-sections-and-icf.s => basic-block-sections-and-icf.s} (96%) rename lld/test/ELF/{bb-sections-delete-fallthru.s => basic-block-sections-delete-fallthru.s} (98%) rename lld/test/ELF/{bb-sections-pc32reloc.s => basic-block-sections-pc32reloc.s} (96%) rename llvm/test/CodeGen/X86/{basicblock-sections-clusters-branches.ll => basic-block-sections-clusters-branches.ll} (93%) rename llvm/test/CodeGen/X86/{basicblock-sections-clusters-eh.ll => basic-block-sections-clusters-eh.ll} (95%) rename llvm/test/CodeGen/X86/{basicblock-sections-clusters-error.ll => basic-block-sections-clusters-error.ll} (75%) rename llvm/test/CodeGen/X86/{basicblock-sections-clusters.ll => basic-block-sections-clusters.ll} (94%) rename llvm/test/CodeGen/X86/{basicblock-sections-cold.ll => basic-block-sections-cold.ll} (93%) rename llvm/test/CodeGen/X86/{basicblock-sections-directjumps.ll => basic-block-sections-directjumps.ll} (84%) rename llvm/test/CodeGen/X86/{basicblock-sections-eh.ll => basic-block-sections-eh.ll} (96%) rename llvm/test/CodeGen/X86/{basicblock-sections-labels.ll => basic-block-sections-labels.ll} (92%) rename llvm/test/CodeGen/X86/{basicblock-sections-list.ll => basic-block-sections-list.ll} (95%) rename llvm/test/CodeGen/X86/{basicblock-sections-listbb.ll => basic-block-sections-listbb.ll} (93%) rename llvm/test/CodeGen/X86/{basicblock-sections-mir-parse.mir => basic-block-sections-mir-parse.mir} (98%) rename llvm/test/CodeGen/X86/{basicblock-sections-mir-print.ll => basic-block-sections-mir-print.ll} (91%) rename llvm/test/CodeGen/X86/{basicblock-sections.ll => basic-block-sections.ll} (84%) rename llvm/test/DebugInfo/X86/{basicblock-sections_1.ll => basic-block-sections_1.ll} (88%) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 64a41ba77ba2f..cdb7355968377 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -965,10 +965,10 @@ static void readConfigs(opt::InputArgList &args) { config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); config->ltoBasicBlockSections = - args.getLastArgValue(OPT_lto_basicblock_sections); + args.getLastArgValue(OPT_lto_basic_block_sections); config->ltoUniqueBasicBlockSectionNames = - args.hasFlag(OPT_lto_unique_bb_section_names, - OPT_no_lto_unique_bb_section_names, false); + args.hasFlag(OPT_lto_unique_basic_block_section_names, + OPT_no_lto_unique_basic_block_section_names, false); config->mapFile = args.getLastArgValue(OPT_Map); config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0); config->mergeArmExidx = diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index b8041afed6c96..ae77fadcc78d3 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -76,7 +76,7 @@ static lto::Config createConfig() { c.Options.DataSections = true; // Check if basic block sections must be used. - // Allowed values for --lto-basicblock-sections are "all", "labels", + // Allowed values for --lto-basic-block-sections are "all", "labels", // "", or none. This is the equivalent // of -fbasic-block-sections= flag in clang. if (!config->ltoBasicBlockSections.empty()) { diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index c3c1309aca1a7..18bc612f6af45 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -545,9 +545,9 @@ def opt_remarks_with_hotness: FF<"opt-remarks-with-hotness">, def opt_remarks_format: Separate<["--"], "opt-remarks-format">, HelpText<"The format used for serializing remarks (default: YAML)">; def save_temps: F<"save-temps">; -def lto_basicblock_sections: JJ<"lto-basicblock-sections=">, +def lto_basic_block_sections: JJ<"lto-basic-block-sections=">, HelpText<"Enable basic block sections for LTO">; -defm lto_unique_bb_section_names: BB<"lto-unique-bb-section-names", +defm lto_unique_basic_block_section_names: BB<"lto-unique-basic-block-section-names", "Give unique names to every basic block section for LTO", "Do not give unique names to every basic block section for LTO (default)">; def shuffle_sections: JJ<"shuffle-sections=">, MetaVarName<"">, diff --git a/lld/test/ELF/bb-sections-and-icf.s b/lld/test/ELF/basic-block-sections-and-icf.s similarity index 96% rename from lld/test/ELF/bb-sections-and-icf.s rename to lld/test/ELF/basic-block-sections-and-icf.s index bcc9193c6ed8f..998d63b47b01c 100644 --- a/lld/test/ELF/bb-sections-and-icf.s +++ b/lld/test/ELF/basic-block-sections-and-icf.s @@ -1,5 +1,5 @@ # REQUIRES: x86 -## basicblock-sections tests. +## basic-block-sections tests. ## This simple test checks foo is folded into bar with bb sections ## and the jumps are deleted. diff --git a/lld/test/ELF/bb-sections-delete-fallthru.s b/lld/test/ELF/basic-block-sections-delete-fallthru.s similarity index 98% rename from lld/test/ELF/bb-sections-delete-fallthru.s rename to lld/test/ELF/basic-block-sections-delete-fallthru.s index c8a0e93534242..b029e664a1e12 100644 --- a/lld/test/ELF/bb-sections-delete-fallthru.s +++ b/lld/test/ELF/basic-block-sections-delete-fallthru.s @@ -1,5 +1,5 @@ # REQUIRES: x86 -## basicblock-sections tests. +## basic-block-sections tests. ## This simple test checks if redundant direct jumps are converted to ## implicit fallthrus. The jcc's must be converted to their inverted ## opcode, for instance jne to je and jmp must be deleted. diff --git a/lld/test/ELF/bb-sections-pc32reloc.s b/lld/test/ELF/basic-block-sections-pc32reloc.s similarity index 96% rename from lld/test/ELF/bb-sections-pc32reloc.s rename to lld/test/ELF/basic-block-sections-pc32reloc.s index 9631a3cfe6c3a..f276cd9fac731 100644 --- a/lld/test/ELF/bb-sections-pc32reloc.s +++ b/lld/test/ELF/basic-block-sections-pc32reloc.s @@ -1,5 +1,5 @@ # REQUIRES: x86 -## basicblock-sections tests. +## basic-block-sections tests. ## This simple test checks if redundant direct jumps are converted to ## implicit fallthrus when PC32 reloc is present. The jcc's must be converted ## to their inverted opcode, for instance jne to je and jmp must be deleted. diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 12dadf97e02c6..0ada09f469b76 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -332,7 +332,7 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { CGBINDOPT(FunctionSections); static cl::opt BBSections( - "basicblock-sections", + "basic-block-sections", cl::desc("Emit basic blocks into separate sections"), cl::value_desc("all | | labels | none"), cl::init("none")); @@ -352,7 +352,7 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { CGBINDOPT(UniqueSectionNames); static cl::opt UniqueBasicBlockSectionNames( - "unique-bb-section-names", + "unique-basic-block-section-names", cl::desc("Give unique names to every basic block section"), cl::init(false)); CGBINDOPT(UniqueBasicBlockSectionNames); diff --git a/llvm/test/CodeGen/X86/basicblock-sections-clusters-branches.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-branches.ll similarity index 93% rename from llvm/test/CodeGen/X86/basicblock-sections-clusters-branches.ll rename to llvm/test/CodeGen/X86/basic-block-sections-clusters-branches.ll index a6f297392d94d..ec90c279e6ffa 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-clusters-branches.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-branches.ll @@ -7,7 +7,7 @@ ; RUN: echo '!foo' > %t1 ; RUN: echo '!!0 2' >> %t1 ; RUN: echo '!!1' >> %t1 -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t1 | FileCheck %s -check-prefix=LINUX-SECTIONS1 +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 | FileCheck %s -check-prefix=LINUX-SECTIONS1 ; ; Test2: Basic blocks #1 and #3 will be placed in the same section. ; The rest (#0 and #2) go into the function's section. @@ -15,7 +15,7 @@ ; #2 must have an explicit jump to #3. ; RUN: echo '!foo' > %t2 ; RUN: echo '!!1 3' >> %t2 -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 define void @foo(i1 zeroext) nounwind { %2 = alloca i8, align 1 diff --git a/llvm/test/CodeGen/X86/basicblock-sections-clusters-eh.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-eh.ll similarity index 95% rename from llvm/test/CodeGen/X86/basicblock-sections-clusters-eh.ll rename to llvm/test/CodeGen/X86/basic-block-sections-clusters-eh.ll index 60f62d4c152ff..4e80c72f11036 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-clusters-eh.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-eh.ll @@ -5,14 +5,14 @@ ; The rest will be placed in a section along with the entry basic block. ; RUN: echo '!main' > %t1 ; RUN: echo '!!1 2' >> %t1 -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t1 | FileCheck %s -check-prefix=LINUX-SECTIONS1 +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 | FileCheck %s -check-prefix=LINUX-SECTIONS1 ; ; Test2: Basic blocks #1, #2, and #3 go into a separate section. ; No separate exception section will be created as #1 and #3 are already in one section. ; The rest will be placed in a section along with the entry basic block. ; RUN: echo '!main' > %t2 ; RUN: echo '!!1 2 3' >> %t2 -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 @_ZTIi = external constant i8* diff --git a/llvm/test/CodeGen/X86/basicblock-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll similarity index 75% rename from llvm/test/CodeGen/X86/basicblock-sections-clusters-error.ll rename to llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll index 29946d0f5b4bf..28c79e28fc038 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-clusters-error.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll @@ -2,19 +2,19 @@ ; RUN: echo '!f' > %t1 ; RUN: echo '!!1 4' >> %t1 ; RUN: echo '!!1' >> %t1 -; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t1 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR1 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR1 ; CHECK-ERROR1: LLVM ERROR: Invalid profile {{.*}} at line 3: Duplicate basic block id found '1'. ; RUN: echo '!f' > %t2 ; RUN: echo '!!4 0' >> %t2 -; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t2 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR2 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR2 ; CHECK-ERROR2: LLVM ERROR: Invalid profile {{.*}} at line 2: Entry BB (0) does not begin a cluster. ; RUN: echo '!f' > %t3 ; RUN: echo '!!-1' >> %t3 -; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t3 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR3 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t3 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR3 ; CHECK-ERROR3: LLVM ERROR: Invalid profile {{.*}} at line 2: Unsigned integer expected: '-1'. ; RUN: echo '!!1' > %t4 ; RUN: echo '!f' >> %t4 -; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t4 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR4 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t4 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR4 ; CHECK-ERROR4: LLVM ERROR: Invalid profile {{.*}} at line 1: Cluster list does not follow a function name specifier. define i32 @dummy(i32 %x, i32 %y, i32 %z) { diff --git a/llvm/test/CodeGen/X86/basicblock-sections-clusters.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters.ll similarity index 94% rename from llvm/test/CodeGen/X86/basicblock-sections-clusters.ll rename to llvm/test/CodeGen/X86/basic-block-sections-clusters.ll index 2c40542f6808f..12759496fddd3 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-clusters.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters.ll @@ -6,13 +6,13 @@ ; RUN: echo '!foo' > %t1 ; RUN: echo '!!0 2' >> %t1 ; RUN: echo '!!1' >> %t1 -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t1 | FileCheck %s -check-prefix=LINUX-SECTIONS1 +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 | FileCheck %s -check-prefix=LINUX-SECTIONS1 ; ; Test2: Basic blocks #1 and #3 will be placed in the same section. ; All other BBs (including the entry block) go into the function's section. ; RUN: echo '!foo' > %t2 ; RUN: echo '!!1 3' >> %t2 -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 | FileCheck %s -check-prefix=LINUX-SECTIONS2 define void @foo(i1 zeroext) nounwind { %2 = alloca i8, align 1 diff --git a/llvm/test/CodeGen/X86/basicblock-sections-cold.ll b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll similarity index 93% rename from llvm/test/CodeGen/X86/basicblock-sections-cold.ll rename to llvm/test/CodeGen/X86/basic-block-sections-cold.ll index c7282a1e57368..432c0952a4a08 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-cold.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-cold.ll @@ -2,7 +2,7 @@ ; Basic block with id 1 and 2 must be in the cold section. ; RUN: echo '!_Z3bazb' > %t ; RUN: echo '!!0' >> %t -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS define void @_Z3bazb(i1 zeroext) nounwind { %2 = alloca i8, align 1 diff --git a/llvm/test/CodeGen/X86/basicblock-sections-directjumps.ll b/llvm/test/CodeGen/X86/basic-block-sections-directjumps.ll similarity index 84% rename from llvm/test/CodeGen/X86/basicblock-sections-directjumps.ll rename to llvm/test/CodeGen/X86/basic-block-sections-directjumps.ll index 8604b129b54cf..99a64ef130319 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-directjumps.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-directjumps.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=all -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -function-sections -basicblock-sections=all -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=all -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -function-sections -basic-block-sections=all -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS define void @_Z3bazb(i1 zeroext) { %2 = alloca i8, align 1 diff --git a/llvm/test/CodeGen/X86/basicblock-sections-eh.ll b/llvm/test/CodeGen/X86/basic-block-sections-eh.ll similarity index 96% rename from llvm/test/CodeGen/X86/basicblock-sections-eh.ll rename to llvm/test/CodeGen/X86/basic-block-sections-eh.ll index 7e5f4a2fe3927..3cc5979d7c0c0 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-eh.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-eh.ll @@ -1,5 +1,5 @@ ; Check if landing pads are kept in a separate eh section -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -function-sections -basicblock-sections=all -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -function-sections -basic-block-sections=all -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS @_ZTIb = external constant i8* define i32 @_Z3foob(i1 zeroext %0) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { diff --git a/llvm/test/CodeGen/X86/basicblock-sections-labels.ll b/llvm/test/CodeGen/X86/basic-block-sections-labels.ll similarity index 92% rename from llvm/test/CodeGen/X86/basicblock-sections-labels.ll rename to llvm/test/CodeGen/X86/basic-block-sections-labels.ll index 2f077e6866924..80aaf79c115a4 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-labels.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-labels.ll @@ -1,5 +1,5 @@ ; Check the basic block sections labels option -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=labels | FileCheck %s -check-prefix=LINUX-LABELS +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=labels | FileCheck %s -check-prefix=LINUX-LABELS define void @_Z3bazb(i1 zeroext) { %2 = alloca i8, align 1 diff --git a/llvm/test/CodeGen/X86/basicblock-sections-list.ll b/llvm/test/CodeGen/X86/basic-block-sections-list.ll similarity index 95% rename from llvm/test/CodeGen/X86/basicblock-sections-list.ll rename to llvm/test/CodeGen/X86/basic-block-sections-list.ll index 9a5056af39ed0..7c48628f73a8c 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-list.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-list.ll @@ -1,6 +1,6 @@ ; Check the basic block sections list option. ; RUN: echo '!_Z3foob' > %t -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS define i32 @_Z3foob(i1 zeroext %0) nounwind { %2 = alloca i32, align 4 diff --git a/llvm/test/CodeGen/X86/basicblock-sections-listbb.ll b/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll similarity index 93% rename from llvm/test/CodeGen/X86/basicblock-sections-listbb.ll rename to llvm/test/CodeGen/X86/basic-block-sections-listbb.ll index ac17a461d7af9..ab729ecf9eb24 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-listbb.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-listbb.ll @@ -2,7 +2,7 @@ ; Only basic block with id 2 must get a section. ; RUN: echo '!_Z3bazb' > %t ; RUN: echo '!!2' >> %t -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS define void @_Z3bazb(i1 zeroext) nounwind { %2 = alloca i8, align 1 diff --git a/llvm/test/CodeGen/X86/basicblock-sections-mir-parse.mir b/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir similarity index 98% rename from llvm/test/CodeGen/X86/basicblock-sections-mir-parse.mir rename to llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir index 6011342a6f003..a4219fa1509d5 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-mir-parse.mir +++ b/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir @@ -10,7 +10,7 @@ # } # # clang -O0 -S -emit-llvm foo.cc -# llc < foo.ll -stop-after=bbsections-prepare -basicblock-sections=all +# llc < foo.ll -stop-after=bbsections-prepare -basic-block-sections=all --- | ; Function Attrs: noinline nounwind optnone uwtable diff --git a/llvm/test/CodeGen/X86/basicblock-sections-mir-print.ll b/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll similarity index 91% rename from llvm/test/CodeGen/X86/basicblock-sections-mir-print.ll rename to llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll index bd66f44ecc6db..7b3c7246971da 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections-mir-print.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-mir-print.ll @@ -2,7 +2,7 @@ ; RUN: echo '!_Z3foob' > %t ; RUN: echo '!!1' >> %t ; RUN: echo '!!2' >> %t -; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=%t -stop-after=bbsections-prepare | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t -stop-after=bbsections-prepare | FileCheck %s -check-prefix=CHECK @_ZTIb = external constant i8* define dso_local i32 @_Z3foob(i1 zeroext %0) { diff --git a/llvm/test/CodeGen/X86/basicblock-sections.ll b/llvm/test/CodeGen/X86/basic-block-sections.ll similarity index 84% rename from llvm/test/CodeGen/X86/basicblock-sections.ll rename to llvm/test/CodeGen/X86/basic-block-sections.ll index 5c17b755fa70b..d996f5e9f5397 100644 --- a/llvm/test/CodeGen/X86/basicblock-sections.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basicblock-sections=all -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -function-sections -basicblock-sections=all -unique-bb-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=all -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -function-sections -basic-block-sections=all -unique-basic-block-section-names | FileCheck %s -check-prefix=LINUX-SECTIONS define void @_Z3bazb(i1 zeroext) nounwind { %2 = alloca i8, align 1 diff --git a/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll b/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll index 62e669eff9e4e..a5a6d697451fc 100644 --- a/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll +++ b/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64 -filetype=asm --frame-pointer=all -o - | FileCheck --check-prefix=SECTIONS_CFI %s -; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64 -filetype=asm --frame-pointer=none -o - | FileCheck --check-prefix=SECTIONS_NOFP_CFI %s -; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64 -filetype=obj --frame-pointer=all -o - | llvm-dwarfdump --eh-frame - | FileCheck --check-prefix=EH_FRAME %s +; RUN: llc -O0 %s --basic-block-sections=all -mtriple=x86_64 -filetype=asm --frame-pointer=all -o - | FileCheck --check-prefix=SECTIONS_CFI %s +; RUN: llc -O0 %s --basic-block-sections=all -mtriple=x86_64 -filetype=asm --frame-pointer=none -o - | FileCheck --check-prefix=SECTIONS_NOFP_CFI %s +; RUN: llc -O0 %s --basic-block-sections=all -mtriple=x86_64 -filetype=obj --frame-pointer=all -o - | llvm-dwarfdump --eh-frame - | FileCheck --check-prefix=EH_FRAME %s ;; void f1(); ;; void f3(bool b) { diff --git a/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll b/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll index 19725138f6ed9..d87ead0e864cf 100644 --- a/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll +++ b/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll @@ -1,6 +1,6 @@ ;; This test checks if CFI instructions for all callee saved registers are emitted ;; correctly with basic block sections. -; RUN: llc %s -mtriple=x86_64 -filetype=asm --basicblock-sections=all --frame-pointer=all -o - | FileCheck --check-prefix=SECTIONS_CFI %s +; RUN: llc %s -mtriple=x86_64 -filetype=asm --basic-block-sections=all --frame-pointer=all -o - | FileCheck --check-prefix=SECTIONS_CFI %s ; SECTIONS_CFI: _Z3foob: ; SECTIONS_CFI: .cfi_offset %rbp, -16 diff --git a/llvm/test/DebugInfo/X86/basicblock-sections_1.ll b/llvm/test/DebugInfo/X86/basic-block-sections_1.ll similarity index 88% rename from llvm/test/DebugInfo/X86/basicblock-sections_1.ll rename to llvm/test/DebugInfo/X86/basic-block-sections_1.ll index f3bedd977693f..655e84731cba1 100644 --- a/llvm/test/DebugInfo/X86/basicblock-sections_1.ll +++ b/llvm/test/DebugInfo/X86/basic-block-sections_1.ll @@ -1,7 +1,7 @@ ; RUN: llc -O0 %s -mtriple=x86_64-* -filetype=obj -o %t && llvm-dwarfdump -debug-info -v %t | FileCheck --check-prefix=NO-SECTIONS %s -; RUN: llc -O0 %s --basicblock-sections=all --unique-bb-section-names -mtriple=x86_64-* -filetype=obj -o %t && llvm-dwarfdump -debug-info -v %t | FileCheck --check-prefix=BB-SECTIONS %s -; RUN: llc -O0 %s --basicblock-sections=all --unique-bb-section-names -mtriple=x86_64-* -filetype=obj -split-dwarf-file=%t.dwo -o %t && llvm-dwarfdump -debug-info -v %t | FileCheck --check-prefix=BB-SECTIONS %s -; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64-* -filetype=asm -o - | FileCheck --check-prefix=BB-SECTIONS-ASM %s +; RUN: llc -O0 %s --basic-block-sections=all --unique-basic-block-section-names -mtriple=x86_64-* -filetype=obj -o %t && llvm-dwarfdump -debug-info -v %t | FileCheck --check-prefix=BB-SECTIONS %s +; RUN: llc -O0 %s --basic-block-sections=all --unique-basic-block-section-names -mtriple=x86_64-* -filetype=obj -split-dwarf-file=%t.dwo -o %t && llvm-dwarfdump -debug-info -v %t | FileCheck --check-prefix=BB-SECTIONS %s +; RUN: llc -O0 %s --basic-block-sections=all -mtriple=x86_64-* -filetype=asm -o - | FileCheck --check-prefix=BB-SECTIONS-ASM %s ; From: ; int foo(int a) { From 7212ad067e6efcd8431a9e38f26de45ae21eeafb Mon Sep 17 00:00:00 2001 From: Chris Gyurgyik <37983775+cgyurgyik@users.noreply.github.com> Date: Fri, 31 Jul 2020 14:57:46 -0400 Subject: [PATCH 054/600] [libc] [obvious] Add rest of strrchr test. --- libc/test/src/string/strrchr_test.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libc/test/src/string/strrchr_test.cpp b/libc/test/src/string/strrchr_test.cpp index 18fddda600870..cf29de220d498 100644 --- a/libc/test/src/string/strrchr_test.cpp +++ b/libc/test/src/string/strrchr_test.cpp @@ -49,12 +49,15 @@ TEST(StrRChrTest, FindsNullTerminator) { ASSERT_STREQ(src, src_copy); } -TEST(StrRChrTest, FindsLastNullTerminator) { - const char src[5] = {'a', '\0', 'b', '\0', 'c'}; +TEST(StrRChrTest, FindsLastBehindFirstNullTerminator) { + const char src[6] = {'a', 'a', '\0', 'b', '\0', 'c'}; // 'b' is behind a null terminator, so should not be found. ASSERT_STREQ(__llvm_libc::strrchr(src, 'b'), nullptr); // Same goes for 'c'. ASSERT_STREQ(__llvm_libc::strrchr(src, 'c'), nullptr); + + // Should find the second of the two a's. + ASSERT_STREQ(__llvm_libc::strrchr(src, 'a'), "a"); } TEST(StrRChrTest, CharacterNotWithinStringShouldReturnNullptr) { From a77afc62d9e48c334e5a7a03fe6ffe3b614742d5 Mon Sep 17 00:00:00 2001 From: Sriraman Tallam Date: Fri, 31 Jul 2020 12:00:59 -0700 Subject: [PATCH 055/600] New test for basic block sections options. This tests lld basic block sections options: + --lto-basic-block-sections= + --lto-unique-basic-block-section-names Differential Revision: https://reviews.llvm.org/D84462 --- lld/test/ELF/lto/basic-block-sections.ll | 41 ++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 lld/test/ELF/lto/basic-block-sections.ll diff --git a/lld/test/ELF/lto/basic-block-sections.ll b/lld/test/ELF/lto/basic-block-sections.ll new file mode 100644 index 0000000000000..04cd296262b1e --- /dev/null +++ b/lld/test/ELF/lto/basic-block-sections.ll @@ -0,0 +1,41 @@ +; REQUIRES: x86 +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld %t.o -o %t --lto-basic-block-sections=all --lto-O0 --save-temps +; RUN: llvm-readobj -s %t.lto.o | FileCheck --check-prefix=SECNAMES %s +; RUN: ld.lld %t.o -o %t --lto-basic-block-sections=all --lto-unique-basic-block-section-names --lto-O0 --save-temps +; RUN: llvm-readobj -s %t.lto.o | FileCheck --check-prefix=SECNAMES-FULL %s +; RUN: llvm-nm %t.out | FileCheck --check-prefix=SYMS %s + +; SECNAMES: Name: .text.foo {{.*}} +; SECNAMES: Name: .text.foo {{.*}} +; SECNAMES: Name: .text.foo {{.*}} + +; SECNAMES-FULL: Name: .text.foo {{.*}} +; SECNAMES-FULL: Name: .text.foo.foo.1 {{.*}} +; SECNAMES-FULL: Name: .text.foo.foo.2 {{.*}} + +; SYMS: foo +; SYMS: foo.1 +; SYMS: foo.2 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @foo(i32 %b) local_unnamed_addr { +entry: + %tobool.not = icmp eq i32 %b, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @foo(i32 0) + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + +define void @_start() { + call void @foo(i32 1) + ret void +} From 938adf42e6b07414d4194ce8b76bbdcdc9df6459 Mon Sep 17 00:00:00 2001 From: Sriraman Tallam Date: Fri, 31 Jul 2020 12:26:53 -0700 Subject: [PATCH 056/600] Fix a test typo which caused a breakage. --- lld/test/ELF/lto/basic-block-sections.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/test/ELF/lto/basic-block-sections.ll b/lld/test/ELF/lto/basic-block-sections.ll index 04cd296262b1e..1f932ac50a879 100644 --- a/lld/test/ELF/lto/basic-block-sections.ll +++ b/lld/test/ELF/lto/basic-block-sections.ll @@ -4,7 +4,7 @@ ; RUN: llvm-readobj -s %t.lto.o | FileCheck --check-prefix=SECNAMES %s ; RUN: ld.lld %t.o -o %t --lto-basic-block-sections=all --lto-unique-basic-block-section-names --lto-O0 --save-temps ; RUN: llvm-readobj -s %t.lto.o | FileCheck --check-prefix=SECNAMES-FULL %s -; RUN: llvm-nm %t.out | FileCheck --check-prefix=SYMS %s +; RUN: llvm-nm %t | FileCheck --check-prefix=SYMS %s ; SECNAMES: Name: .text.foo {{.*}} ; SECNAMES: Name: .text.foo {{.*}} From 38d3e7533279fd4bfefcd88eac7d3b64f804c53a Mon Sep 17 00:00:00 2001 From: Jaydeep Chauhan Date: Fri, 31 Jul 2020 20:22:22 +0100 Subject: [PATCH 057/600] [clang] Use the location of the void parameters when complaining that only a single void parameter should be present. Fixes PR46417. Differential Revision: https://reviews.llvm.org/D84678 Reviewed By: aaron.ballman --- clang/lib/Sema/SemaType.cpp | 2 +- clang/test/SemaCXX/void-argument.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 clang/test/SemaCXX/void-argument.cpp diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index abf1d6450036f..ff5223c0795e5 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -5114,7 +5114,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // is an incomplete type (C99 6.2.5p19) and function decls cannot // have parameters of incomplete type. if (FTI.NumParams != 1 || FTI.isVariadic) { - S.Diag(DeclType.Loc, diag::err_void_only_param); + S.Diag(FTI.Params[i].IdentLoc, diag::err_void_only_param); ParamTy = Context.IntTy; Param->setType(ParamTy); } else if (FTI.Params[i].Ident) { diff --git a/clang/test/SemaCXX/void-argument.cpp b/clang/test/SemaCXX/void-argument.cpp new file mode 100644 index 0000000000000..8354347f5559e --- /dev/null +++ b/clang/test/SemaCXX/void-argument.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +void fun( + void a, // expected-error{{'void' must be the first and only parameter if specified}} + double b, + int c, + void d, // expected-error{{'void' must be the first and only parameter if specified}} + int e, + void f) // expected-error{{'void' must be the first and only parameter if specified}} +{} + +void foo( + int a, + void, // expected-error{{'void' must be the first and only parameter if specified}} + int b); + +void bar( + void, // expected-error{{'void' must be the first and only parameter if specified}} + ...); + +struct S { + S( + void, // expected-error{{'void' must be the first and only parameter if specified}} + void); // expected-error{{'void' must be the first and only parameter if specified}} +}; From 8739445e3221ea05bc71a592f4114e10510b0b34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Thu, 30 Jul 2020 21:25:38 +0200 Subject: [PATCH 058/600] [lldb] force full gui redraw on Ctrl+L As is common with curses apps, this allows to redraw everything in case something corrupts the screen. Apparently key modifiers are difficult with curses (curses FAQ it "doesn't do that"), thankfully Ctrl+key are simply control characters, so it's (ascii & 037) => 12. Differential Revision: https://reviews.llvm.org/D84972 --- lldb/source/Core/IOHandlerCursesGUI.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index fda3aa1886795..262a19dc04b4c 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -1287,6 +1287,10 @@ class Application { update = true; break; case eKeyNotHandled: + if (ch == 12) { // Ctrl+L, force full redraw + redrawwin(m_window_sp->get()); + update = true; + } break; case eQuitApplication: done = true; From 77d5a63c191ca791f081ff153276170bbfb10cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Thu, 30 Jul 2020 18:08:13 +0200 Subject: [PATCH 059/600] [lldb] report an error if a CLI option lacks an argument Differential Revision: https://reviews.llvm.org/D84955 --- lldb/test/Shell/Driver/TestError.test | 2 ++ lldb/tools/driver/Driver.cpp | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 lldb/test/Shell/Driver/TestError.test diff --git a/lldb/test/Shell/Driver/TestError.test b/lldb/test/Shell/Driver/TestError.test new file mode 100644 index 0000000000000..3d34a72b14aba --- /dev/null +++ b/lldb/test/Shell/Driver/TestError.test @@ -0,0 +1,2 @@ +RUN: not %lldb --arch 2>&1 | FileCheck %s +CHECK: error: argument to '--arch' is missing diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index cea9e5a44aa80..0cd1ffc57aa7a 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -853,10 +853,11 @@ int main(int argc, char const *argv[]) { // Parse arguments. LLDBOptTable T; - unsigned MAI; - unsigned MAC; + unsigned MissingArgIndex; + unsigned MissingArgCount; ArrayRef arg_arr = makeArrayRef(argv + 1, argc - 1); - opt::InputArgList input_args = T.ParseArgs(arg_arr, MAI, MAC); + opt::InputArgList input_args = + T.ParseArgs(arg_arr, MissingArgIndex, MissingArgCount); llvm::StringRef argv0 = llvm::sys::path::filename(argv[0]); if (input_args.hasArg(OPT_help)) { @@ -864,11 +865,19 @@ int main(int argc, char const *argv[]) { return 0; } + // Check for missing argument error. + if (MissingArgCount) { + WithColor::error() << "argument to '" + << input_args.getArgString(MissingArgIndex) + << "' is missing\n"; + } // Error out on unknown options. if (input_args.hasArg(OPT_UNKNOWN)) { for (auto *arg : input_args.filtered(OPT_UNKNOWN)) { WithColor::error() << "unknown option: " << arg->getSpelling() << '\n'; } + } + if (MissingArgCount || input_args.hasArg(OPT_UNKNOWN)) { llvm::errs() << "Use '" << argv0 << " --help' for a complete list of options.\n"; return 1; From eb8c72cb0d8949114350d2f24a1741898b63dae5 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Fri, 31 Jul 2020 11:39:49 -0700 Subject: [PATCH 060/600] [MLIR][NFC] Add FuncOp::getArgumentTypes() Differential Revision: https://reviews.llvm.org/D85038 --- mlir/include/mlir/IR/FunctionSupport.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mlir/include/mlir/IR/FunctionSupport.h b/mlir/include/mlir/IR/FunctionSupport.h index b358215ca9624..7e281f393af94 100644 --- a/mlir/include/mlir/IR/FunctionSupport.h +++ b/mlir/include/mlir/IR/FunctionSupport.h @@ -206,6 +206,7 @@ class FunctionLike : public OpTrait::TraitBase { //===--------------------------------------------------------------------===// // Argument Handling //===--------------------------------------------------------------------===// + using BlockArgListType = Region::BlockArgListType; unsigned getNumArguments() { return static_cast(this)->getNumFuncArguments(); @@ -224,6 +225,10 @@ class FunctionLike : public OpTrait::TraitBase { args_iterator args_end() { return getBody().args_end(); } Block::BlockArgListType getArguments() { return getBody().getArguments(); } + ValueTypeRange getArgumentTypes() { + return getBody().getArgumentTypes(); + } + //===--------------------------------------------------------------------===// // Argument Attributes //===--------------------------------------------------------------------===// From 2a6c8b2e9581ebca4b05d1e64458f2dccf3db61f Mon Sep 17 00:00:00 2001 From: River Riddle Date: Fri, 31 Jul 2020 13:18:13 -0700 Subject: [PATCH 061/600] [mlir][PassIncGen] Refactor how pass registration is generated The current output is a bit clunky and requires including files+macros everywhere, or manually wrapping the file inclusion in a registration function. This revision refactors the pass backend to automatically generate `registerFooPass`/`registerFooPasses` functions that wrap the pass registration. `gen-pass-decls` now takes a `-name` input that specifies a tag name for the group of passes that are being generated. For each pass, the generator now produces a `registerFooPass` where `Foo` is the name of the definition specified in tablegen. It also generates a `registerGroupPasses`, where `Group` is the tag provided via the `-name` input parameter, that registers all of the passes present. Differential Revision: https://reviews.llvm.org/D84983 --- .../flang/Optimizer/CodeGen/CMakeLists.txt | 2 +- .../include/flang/Optimizer/CodeGen/CodeGen.h | 3 - .../flang/Optimizer/Transforms/CMakeLists.txt | 2 +- .../flang/Optimizer/Transforms/Passes.h | 3 - mlir/docs/PassManagement.md | 30 +++++-- .../AVX512ToLLVM/ConvertAVX512ToLLVM.h | 6 +- .../AffineToStandard/AffineToStandard.h | 7 ++ mlir/include/mlir/Conversion/CMakeLists.txt | 2 +- mlir/include/mlir/Conversion/Passes.h | 41 ++++++++++ .../mlir/Dialect/Affine/CMakeLists.txt | 2 +- mlir/include/mlir/Dialect/Affine/Passes.h | 15 ++-- mlir/include/mlir/Dialect/GPU/CMakeLists.txt | 2 +- mlir/include/mlir/Dialect/GPU/Passes.h | 16 ++-- .../Dialect/LLVMIR/Transforms/CMakeLists.txt | 2 +- .../mlir/Dialect/LLVMIR/Transforms/Passes.h | 26 ++++++ .../mlir/Dialect/Linalg/CMakeLists.txt | 2 +- mlir/include/mlir/Dialect/Linalg/Passes.h | 18 ++--- .../include/mlir/Dialect/Quant/CMakeLists.txt | 2 +- mlir/include/mlir/Dialect/Quant/Passes.h | 13 ++- mlir/include/mlir/Dialect/SCF/CMakeLists.txt | 2 +- mlir/include/mlir/Dialect/SCF/Passes.h | 13 ++- .../include/mlir/Dialect/SPIRV/CMakeLists.txt | 2 +- mlir/include/mlir/Dialect/SPIRV/Passes.h | 8 ++ .../Dialect/Shape/Transforms/CMakeLists.txt | 2 +- .../mlir/Dialect/Shape/Transforms/Passes.h | 16 ++-- .../StandardOps/Transforms/CMakeLists.txt | 2 +- .../Dialect/StandardOps/Transforms/Passes.h | 12 ++- mlir/include/mlir/InitAllPasses.h | 80 ++++--------------- mlir/include/mlir/Transforms/CMakeLists.txt | 2 +- mlir/include/mlir/Transforms/Passes.h | 23 ++++-- mlir/tools/mlir-tblgen/PassGen.cpp | 59 +++++++++----- 31 files changed, 257 insertions(+), 158 deletions(-) create mode 100644 mlir/include/mlir/Conversion/Passes.h create mode 100644 mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h diff --git a/flang/include/flang/Optimizer/CodeGen/CMakeLists.txt b/flang/include/flang/Optimizer/CodeGen/CMakeLists.txt index ab6526ee18330..9acf6f89e12f5 100644 --- a/flang/include/flang/Optimizer/CodeGen/CMakeLists.txt +++ b/flang/include/flang/Optimizer/CodeGen/CMakeLists.txt @@ -1,6 +1,6 @@ set(LLVM_TARGET_DEFINITIONS CGPasses.td) -mlir_tablegen(CGPasses.h.inc -gen-pass-decls) +mlir_tablegen(CGPasses.h.inc -gen-pass-decls -name OptCodeGen) add_public_tablegen_target(FIROptCodeGenPassIncGen) add_mlir_doc(Passes -gen-pass-doc OptimizerCodeGenPasses ./) diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h index 9b968172f3486..a90d0a50dac64 100644 --- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h +++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h @@ -28,12 +28,9 @@ std::unique_ptr createFIRToLLVMPass(NameUniquer &uniquer); std::unique_ptr createLLVMDialectToLLVMPass(llvm::raw_ostream &output); -inline void registerOptCodeGenPasses() { - using mlir::Pass; // declarative passes #define GEN_PASS_REGISTRATION #include "flang/Optimizer/CodeGen/CGPasses.h.inc" -} } // namespace fir diff --git a/flang/include/flang/Optimizer/Transforms/CMakeLists.txt b/flang/include/flang/Optimizer/Transforms/CMakeLists.txt index fde17eb88622e..b928991e0a37e 100644 --- a/flang/include/flang/Optimizer/Transforms/CMakeLists.txt +++ b/flang/include/flang/Optimizer/Transforms/CMakeLists.txt @@ -1,6 +1,6 @@ set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name OptTransform) add_public_tablegen_target(FIROptTransformsPassIncGen) add_mlir_doc(Passes -gen-pass-doc OptimizerTransformPasses ./) diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index 9377c2dc61cc3..5e71995736e6a 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -46,12 +46,9 @@ std::unique_ptr createMemToRegPass(); bool canLegallyInline(mlir::Operation *op, mlir::Region *reg, mlir::BlockAndValueMapping &map); -inline void registerOptTransformPasses() { -using mlir::Pass; // declarative passes #define GEN_PASS_REGISTRATION #include "flang/Optimizer/Transforms/Passes.h.inc" -} } // namespace fir diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index 04a4ca0a7b3c5..92ca92218219c 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -622,18 +622,34 @@ def MyPass : Pass<"my-pass", "ModuleOp"> { } ``` -We can include the generated registration calls via: +Using the `gen-pass-decls` generator, we can generate the much of the +boilerplater above automatically. This generator takes as an input a `-name` +parameter, that provides a tag for the group of passes that are being generated. +This generator produces two chunks of output: + +The first is the code for registering the declarative passes with the global +registry. For each pass, the generator produces a `registerFooPass` where `Foo` +is the name of the definition specified in tablegen. It also generates a +`registerGroupPasses`, where `Group` is the tag provided via the `-name` input +parameter, that registers all of the passes present. ```c++ -void registerMyPasses() { - // The generated registration is not static, so we need to include this in - // a location that we can call into. #define GEN_PASS_REGISTRATION #include "Passes.h.inc" + +void registerMyPasses() { + // Register all of our passes. + registerMyPasses(); + + // Register `MyPass` specifically. + registerMyPassPass(); } ``` -We can then update the original C++ pass definition: +The second is a base class for each of the passes, with each containing most of +the boiler plate related to pass definition. These classes are named in the form +of `MyPassBase`, where `MyPass` is the name of the definition in tablegen. We +can update the original C++ pass definition as so: ```c++ /// Include the generated base pass class definitions. @@ -651,6 +667,10 @@ std::unique_ptr foo::createMyPass() { } ``` +Using the `gen-pass-doc` generator, we can generate markdown documentation for +each of our passes. See [Passes.md](Passes.md) for example output of real MLIR +passes. + ### Tablegen Specification The `Pass` class is used to begin a new pass definition. This class takes as an diff --git a/mlir/include/mlir/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.h b/mlir/include/mlir/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.h index fdd203a6f6ef1..aff5c4ca2c70d 100644 --- a/mlir/include/mlir/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.h +++ b/mlir/include/mlir/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_EDGE_CONVERSION_AVX512TOLLVM_CONVERTAVX512TOLLVM_H_ -#define MLIR_EDGE_CONVERSION_AVX512TOLLVM_CONVERTAVX512TOLLVM_H_ +#ifndef MLIR_CONVERSION_AVX512TOLLVM_CONVERTAVX512TOLLVM_H_ +#define MLIR_CONVERSION_AVX512TOLLVM_CONVERTAVX512TOLLVM_H_ #include @@ -26,4 +26,4 @@ std::unique_ptr> createConvertAVX512ToLLVMPass(); } // namespace mlir -#endif // MLIR_EDGE_CONVERSION_AVX512TOLLVM_CONVERTAVX512TOLLVM_H_ +#endif // MLIR_CONVERSION_AVX512TOLLVM_CONVERTAVX512TOLLVM_H_ diff --git a/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h b/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h index 4deffafe0ec60..4647cacdd9cd8 100644 --- a/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h +++ b/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h @@ -20,6 +20,7 @@ class Location; struct LogicalResult; class MLIRContext; class OpBuilder; +class Pass; class RewritePattern; class Value; class ValueRange; @@ -57,6 +58,12 @@ Value lowerAffineLowerBound(AffineForOp op, OpBuilder &builder); /// Emit code that computes the upper bound of the given affine loop using /// standard arithmetic operations. Value lowerAffineUpperBound(AffineForOp op, OpBuilder &builder); + +/// Lowers affine control flow operations (ForStmt, IfStmt and AffineApplyOp) +/// to equivalent lower-level constructs (flow of basic blocks and arithmetic +/// primitives). +std::unique_ptr createLowerAffinePass(); + } // namespace mlir #endif // MLIR_CONVERSION_AFFINETOSTANDARD_AFFINETOSTANDARD_H diff --git a/mlir/include/mlir/Conversion/CMakeLists.txt b/mlir/include/mlir/Conversion/CMakeLists.txt index d4ce2634f4505..ae0afc97dc639 100644 --- a/mlir/include/mlir/Conversion/CMakeLists.txt +++ b/mlir/include/mlir/Conversion/CMakeLists.txt @@ -1,6 +1,6 @@ set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name Conversion) add_public_tablegen_target(MLIRConversionPassIncGen) add_mlir_doc(Passes -gen-pass-doc ConversionPasses ./) diff --git a/mlir/include/mlir/Conversion/Passes.h b/mlir/include/mlir/Conversion/Passes.h new file mode 100644 index 0000000000000..87f2c97e766df --- /dev/null +++ b/mlir/include/mlir/Conversion/Passes.h @@ -0,0 +1,41 @@ +//===- Passes.h - Conversion Pass Construction and Registration -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_PASSES_H +#define MLIR_CONVERSION_PASSES_H + +#include "mlir/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.h" +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" +#include "mlir/Conversion/GPUCommon/GPUCommonPass.h" +#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" +#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" +#include "mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h" +#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h" +#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h" +#include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h" +#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h" +#include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h" +#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" +#include "mlir/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVMPass.h" +#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" +#include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h" +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" +#include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h" +#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" +#include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h" +#include "mlir/Conversion/VectorToSCF/VectorToSCF.h" + +namespace mlir { + +/// Generate the code for registering conversion passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Conversion/Passes.h.inc" + +} // namespace mlir + +#endif // MLIR_CONVERSION_PASSES_H diff --git a/mlir/include/mlir/Dialect/Affine/CMakeLists.txt b/mlir/include/mlir/Dialect/Affine/CMakeLists.txt index 404c926f60eda..96d951dedf4c6 100644 --- a/mlir/include/mlir/Dialect/Affine/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Affine/CMakeLists.txt @@ -1,7 +1,7 @@ add_subdirectory(IR) set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name Affine) add_public_tablegen_target(MLIRAffinePassIncGen) add_mlir_doc(Passes -gen-pass-doc AffinePasses ./) diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h index 18b3b790338d8..f2cef42a43561 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -14,17 +14,12 @@ #ifndef MLIR_DIALECT_AFFINE_TRANSFORMS_PASSES_H #define MLIR_DIALECT_AFFINE_TRANSFORMS_PASSES_H -#include "mlir/Support/LLVM.h" -#include +#include "mlir/Pass/Pass.h" #include namespace mlir { class AffineForOp; -class FuncOp; -class ModuleOp; -class Pass; -template class OperationPass; /// Creates a simplification pass for affine structures (maps and sets). In /// addition, this pass also normalizes memrefs to have the trivial (identity) @@ -79,6 +74,14 @@ createSuperVectorizePass(ArrayRef virtualVectorSize); /// Overload relying on pass options for initialization. std::unique_ptr> createSuperVectorizePass(); +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/Affine/Passes.h.inc" + } // end namespace mlir #endif // MLIR_DIALECT_AFFINE_RANSFORMS_PASSES_H diff --git a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt index 6c80b4c8e3b98..68313c9788422 100644 --- a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt @@ -12,7 +12,7 @@ mlir_tablegen(ParallelLoopMapperEnums.cpp.inc -gen-enum-defs) add_public_tablegen_target(MLIRParallelLoopMapperEnumsGen) set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name GPU) add_public_tablegen_target(MLIRGPUPassIncGen) add_mlir_doc(Passes -gen-pass-doc GPUPasses ./) diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h index bc349061f39f0..64b744b6b172c 100644 --- a/mlir/include/mlir/Dialect/GPU/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Passes.h @@ -13,21 +13,23 @@ #ifndef MLIR_DIALECT_GPU_PASSES_H_ #define MLIR_DIALECT_GPU_PASSES_H_ -#include +#include "mlir/Pass/Pass.h" namespace mlir { - -class MLIRContext; -class ModuleOp; -template class OperationPass; -class OwningRewritePatternList; - std::unique_ptr> createGpuKernelOutliningPass(); /// Collect a set of patterns to rewrite ops within the GPU dialect. void populateGpuRewritePatterns(MLIRContext *context, OwningRewritePatternList &patterns); +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/GPU/Passes.h.inc" + } // namespace mlir #endif // MLIR_DIALECT_GPU_PASSES_H_ diff --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/Transforms/CMakeLists.txt index a2fd81c23e11b..a744b0706ffd4 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/Transforms/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LLVMIR/Transforms/CMakeLists.txt @@ -1,5 +1,5 @@ set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name LLVM) add_public_tablegen_target(MLIRLLVMPassIncGen) add_mlir_doc(Passes -gen-pass-doc LLVMPasses ./) diff --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h new file mode 100644 index 0000000000000..868a0e5635105 --- /dev/null +++ b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h @@ -0,0 +1,26 @@ +//===- Passes.h - LLVM Pass Construction and Registration -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LLVMIR_TRANSFORMS_PASSES_H +#define MLIR_DIALECT_LLVMIR_TRANSFORMS_PASSES_H + +#include "mlir/Dialect/LLVMIR/Transforms/LegalizeForExport.h" +#include "mlir/Pass/Pass.h" + +namespace mlir { + +namespace LLVM { + +/// Generate the code for registering conversion passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/LLVMIR/Transforms/Passes.h.inc" + +} // namespace LLVM +} // namespace mlir + +#endif // MLIR_DIALECT_LLVMIR_TRANSFORMS_PASSES_H diff --git a/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt index 66ac74515dddd..d0edae3979e04 100644 --- a/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt @@ -1,7 +1,7 @@ add_subdirectory(IR) set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name Linalg) add_public_tablegen_target(MLIRLinalgPassIncGen) add_mlir_doc(Passes -gen-pass-doc LinalgPasses ./) diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h index a5c09b3f75b7c..d74714cdaa56b 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.h +++ b/mlir/include/mlir/Dialect/Linalg/Passes.h @@ -13,17 +13,9 @@ #ifndef MLIR_DIALECT_LINALG_PASSES_H_ #define MLIR_DIALECT_LINALG_PASSES_H_ -#include "mlir/Support/LLVM.h" -#include "llvm/ADT/ArrayRef.h" +#include "mlir/Pass/Pass.h" namespace mlir { -class FuncOp; -class MLIRContext; -class ModuleOp; -template class OperationPass; -class OwningRewritePatternList; -class Pass; - std::unique_ptr> createLinalgFoldUnitExtentDimsPass(); std::unique_ptr> createLinalgFusionPass(); @@ -66,6 +58,14 @@ void populateLinalgTensorOpsFusionPatterns(MLIRContext *context, void populateLinalgFoldUnitExtentDimsPatterns( MLIRContext *context, OwningRewritePatternList &patterns); +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/Linalg/Passes.h.inc" + } // namespace mlir #endif // MLIR_DIALECT_LINALG_PASSES_H_ diff --git a/mlir/include/mlir/Dialect/Quant/CMakeLists.txt b/mlir/include/mlir/Dialect/Quant/CMakeLists.txt index 1a48e4928b334..177d129a805af 100644 --- a/mlir/include/mlir/Dialect/Quant/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Quant/CMakeLists.txt @@ -2,7 +2,7 @@ add_mlir_dialect(QuantOps quant) add_mlir_doc(QuantOps -gen-dialect-doc QuantDialect Dialects/) set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name Quant) add_public_tablegen_target(MLIRQuantPassIncGen) add_mlir_doc(Passes -gen-pass-doc QuantPasses ./) diff --git a/mlir/include/mlir/Dialect/Quant/Passes.h b/mlir/include/mlir/Dialect/Quant/Passes.h index b938c9a86b722..090653eabe3ff 100644 --- a/mlir/include/mlir/Dialect/Quant/Passes.h +++ b/mlir/include/mlir/Dialect/Quant/Passes.h @@ -16,12 +16,9 @@ #ifndef MLIR_DIALECT_QUANT_PASSES_H #define MLIR_DIALECT_QUANT_PASSES_H -#include +#include "mlir/Pass/Pass.h" namespace mlir { -class FuncOp; -template class OperationPass; - namespace quant { /// Creates a pass that converts quantization simulation operations (i.e. @@ -35,6 +32,14 @@ std::unique_ptr> createConvertSimulatedQuantPass(); /// destructive and cannot be undone. std::unique_ptr> createConvertConstPass(); +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/Quant/Passes.h.inc" + } // namespace quant } // namespace mlir diff --git a/mlir/include/mlir/Dialect/SCF/CMakeLists.txt b/mlir/include/mlir/Dialect/SCF/CMakeLists.txt index 9467b97b384bd..546ada0224cf1 100644 --- a/mlir/include/mlir/Dialect/SCF/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/SCF/CMakeLists.txt @@ -2,7 +2,7 @@ add_mlir_dialect(SCFOps scf Ops) add_mlir_doc(SCFOps -gen-dialect-doc SCFDialect Dialects/) set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name SCF) add_public_tablegen_target(MLIRSCFPassIncGen) add_dependencies(mlir-headers MLIRSCFPassIncGen) diff --git a/mlir/include/mlir/Dialect/SCF/Passes.h b/mlir/include/mlir/Dialect/SCF/Passes.h index df6037874f2b4..7edb2444e87c0 100644 --- a/mlir/include/mlir/Dialect/SCF/Passes.h +++ b/mlir/include/mlir/Dialect/SCF/Passes.h @@ -13,13 +13,10 @@ #ifndef MLIR_DIALECT_SCF_PASSES_H_ #define MLIR_DIALECT_SCF_PASSES_H_ -#include "llvm/ADT/ArrayRef.h" -#include +#include "mlir/Pass/Pass.h" namespace mlir { -class Pass; - /// Creates a pass that specializes for loop for unrolling and /// vectorization. std::unique_ptr createForLoopSpecializationPass(); @@ -35,6 +32,14 @@ std::unique_ptr createParallelLoopSpecializationPass(); std::unique_ptr createParallelLoopTilingPass(llvm::ArrayRef tileSize = {}); +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/SCF/Passes.h.inc" + } // namespace mlir #endif // MLIR_DIALECT_SCF_PASSES_H_ diff --git a/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt b/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt index 1e0901f07e91d..ff078ef9d9464 100644 --- a/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/SPIRV/CMakeLists.txt @@ -38,7 +38,7 @@ add_public_tablegen_target(MLIRSPIRVTargetAndABIIncGen) add_dependencies(mlir-headers MLIRSPIRVTargetAndABIIncGen) set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name SPIRV) add_public_tablegen_target(MLIRSPIRVPassIncGen) add_dependencies(mlir-headers MLIRSPIRVPassIncGen) diff --git a/mlir/include/mlir/Dialect/SPIRV/Passes.h b/mlir/include/mlir/Dialect/SPIRV/Passes.h index df516430be52c..dbd2c93a53a57 100644 --- a/mlir/include/mlir/Dialect/SPIRV/Passes.h +++ b/mlir/include/mlir/Dialect/SPIRV/Passes.h @@ -50,6 +50,14 @@ std::unique_ptr> createLowerABIAttributesPass(); /// spv.CompositeInsert into spv.CompositeConstruct. std::unique_ptr> createRewriteInsertsPass(); +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/SPIRV/Passes.h.inc" + } // namespace spirv } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/Shape/Transforms/CMakeLists.txt index 629b8c0db2947..8bbe1cb3fbc63 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Shape/Transforms/CMakeLists.txt @@ -1,5 +1,5 @@ set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name Shape) add_public_tablegen_target(MLIRShapeTransformsIncGen) add_mlir_doc(Passes -gen-pass-doc ShapePasses ./) diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h index e8d2167916d06..543ffc617a5cb 100644 --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h @@ -14,15 +14,9 @@ #ifndef MLIR_DIALECT_SHAPE_TRANSFORMS_PASSES_H_ #define MLIR_DIALECT_SHAPE_TRANSFORMS_PASSES_H_ -#include +#include "mlir/Pass/Pass.h" namespace mlir { - -class FunctionPass; -class MLIRContext; -class OwningRewritePatternList; -class Pass; - /// Creates an instance of the ShapeToShapeLowering pass that legalizes Shape /// dialect to be convertible to Standard. For example, `shape.num_elements` get /// transformed to `shape.reduce`, which can be lowered to SCF and Standard. @@ -42,6 +36,14 @@ void populateRemoveShapeConstraintsPatterns(OwningRewritePatternList &patterns, MLIRContext *ctx); std::unique_ptr createRemoveShapeConstraintsPass(); +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/Shape/Transforms/Passes.h.inc" + } // end namespace mlir #endif // MLIR_DIALECT_SHAPE_TRANSFORMS_PASSES_H_ diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/StandardOps/Transforms/CMakeLists.txt index 413c6523a7564..f1cc5d81e0fe4 100644 --- a/mlir/include/mlir/Dialect/StandardOps/Transforms/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/CMakeLists.txt @@ -1,5 +1,5 @@ set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name Standard) add_public_tablegen_target(MLIRStandardTransformsIncGen) add_mlir_doc(Passes -gen-pass-doc StandardPasses ./) diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h index aadc41d2790dd..fba5f4b320430 100644 --- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h @@ -15,12 +15,10 @@ #ifndef MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES_H_ #define MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES_H_ -#include +#include "mlir/Pass/Pass.h" namespace mlir { -class Pass; -class MLIRContext; class OwningRewritePatternList; /// Creates an instance of the ExpandAtomic pass. @@ -29,6 +27,14 @@ std::unique_ptr createExpandAtomicPass(); void populateExpandTanhPattern(OwningRewritePatternList &patterns, MLIRContext *ctx); +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Dialect/StandardOps/Transforms/Passes.h.inc" + } // end namespace mlir #endif // MLIR_DIALECT_STANDARD_TRANSFORMS_PASSES_H_ diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h index a2810f3b270b2..7d0a7726ea6cc 100644 --- a/mlir/include/mlir/InitAllPasses.h +++ b/mlir/include/mlir/InitAllPasses.h @@ -14,38 +14,17 @@ #ifndef MLIR_INITALLPASSES_H_ #define MLIR_INITALLPASSES_H_ -#include "mlir/Conversion/AVX512ToLLVM/ConvertAVX512ToLLVM.h" -#include "mlir/Conversion/GPUCommon/GPUCommonPass.h" -#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" -#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" -#include "mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h" -#include "mlir/Conversion/GPUToVulkan/ConvertGPUToVulkanPass.h" -#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h" -#include "mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h" -#include "mlir/Conversion/LinalgToStandard/LinalgToStandard.h" -#include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h" -#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" -#include "mlir/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVMPass.h" -#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" -#include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h" -#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" -#include "mlir/Conversion/StandardToSPIRV/ConvertStandardToSPIRVPass.h" -#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h" -#include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h" -#include "mlir/Conversion/VectorToSCF/VectorToSCF.h" +#include "mlir/Conversion/Passes.h" #include "mlir/Dialect/Affine/Passes.h" #include "mlir/Dialect/GPU/Passes.h" -#include "mlir/Dialect/LLVMIR/Transforms/LegalizeForExport.h" +#include "mlir/Dialect/LLVMIR/Transforms/Passes.h" #include "mlir/Dialect/Linalg/Passes.h" #include "mlir/Dialect/Quant/Passes.h" #include "mlir/Dialect/SCF/Passes.h" #include "mlir/Dialect/SPIRV/Passes.h" #include "mlir/Dialect/Shape/Transforms/Passes.h" #include "mlir/Dialect/StandardOps/Transforms/Passes.h" -#include "mlir/Transforms/LocationSnapshot.h" #include "mlir/Transforms/Passes.h" -#include "mlir/Transforms/ViewOpGraph.h" -#include "mlir/Transforms/ViewRegionGraph.h" #include @@ -59,49 +38,22 @@ namespace mlir { // individual passes. // The global registry is interesting to interact with the command-line tools. inline void registerAllPasses() { - // Init general passes -#define GEN_PASS_REGISTRATION -#include "mlir/Transforms/Passes.h.inc" + // General passes + registerTransformsPasses(); // Conversion passes -#define GEN_PASS_REGISTRATION -#include "mlir/Conversion/Passes.h.inc" - - // Affine -#define GEN_PASS_REGISTRATION -#include "mlir/Dialect/Affine/Passes.h.inc" - - // GPU -#define GEN_PASS_REGISTRATION -#include "mlir/Dialect/GPU/Passes.h.inc" - - // Linalg -#define GEN_PASS_REGISTRATION -#include "mlir/Dialect/Linalg/Passes.h.inc" - - // LLVM -#define GEN_PASS_REGISTRATION -#include "mlir/Dialect/LLVMIR/Transforms/Passes.h.inc" - - // Loop -#define GEN_PASS_REGISTRATION -#include "mlir/Dialect/SCF/Passes.h.inc" - - // Quant -#define GEN_PASS_REGISTRATION -#include "mlir/Dialect/Quant/Passes.h.inc" - - // SPIR-V -#define GEN_PASS_REGISTRATION -#include "mlir/Dialect/SPIRV/Passes.h.inc" - - // Standard -#define GEN_PASS_REGISTRATION -#include "mlir/Dialect/StandardOps/Transforms/Passes.h.inc" - - // Shape -#define GEN_PASS_REGISTRATION -#include "mlir/Dialect/Shape/Transforms/Passes.h.inc" + registerConversionPasses(); + + // Dialect passes + registerAffinePasses(); + registerGPUPasses(); + registerLinalgPasses(); + LLVM::registerLLVMPasses(); + quant::registerQuantPasses(); + registerSCFPasses(); + registerShapePasses(); + spirv::registerSPIRVPasses(); + registerStandardPasses(); } } // namespace mlir diff --git a/mlir/include/mlir/Transforms/CMakeLists.txt b/mlir/include/mlir/Transforms/CMakeLists.txt index 706193188eddd..f1006e06757b2 100644 --- a/mlir/include/mlir/Transforms/CMakeLists.txt +++ b/mlir/include/mlir/Transforms/CMakeLists.txt @@ -1,6 +1,6 @@ set(LLVM_TARGET_DEFINITIONS Passes.td) -mlir_tablegen(Passes.h.inc -gen-pass-decls) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name Transforms) add_public_tablegen_target(MLIRTransformsPassIncGen) add_mlir_doc(Passes -gen-pass-doc GeneralPasses ./) diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index 1ffff1a25a6d5..ef8524ebd28a2 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -14,19 +14,19 @@ #ifndef MLIR_TRANSFORMS_PASSES_H #define MLIR_TRANSFORMS_PASSES_H -#include "mlir/Support/LLVM.h" -#include +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/LocationSnapshot.h" +#include "mlir/Transforms/ViewOpGraph.h" +#include "mlir/Transforms/ViewRegionGraph.h" #include namespace mlir { class AffineForOp; -class FuncOp; -class ModuleOp; -class Pass; -template -class OperationPass; +//===----------------------------------------------------------------------===// +// Passes +//===----------------------------------------------------------------------===// /// Creates an instance of the BufferPlacement pass. std::unique_ptr createBufferPlacementPass(); @@ -95,6 +95,15 @@ std::unique_ptr createSymbolDCEPass(); /// Creates an interprocedural pass to normalize memrefs to have a trivial /// (identity) layout map. std::unique_ptr> createNormalizeMemRefsPass(); + +//===----------------------------------------------------------------------===// +// Registration +//===----------------------------------------------------------------------===// + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "mlir/Transforms/Passes.h.inc" + } // end namespace mlir #endif // MLIR_TRANSFORMS_PASSES_H diff --git a/mlir/tools/mlir-tblgen/PassGen.cpp b/mlir/tools/mlir-tblgen/PassGen.cpp index f8998f09a4368..c2dcdb8e4ac9b 100644 --- a/mlir/tools/mlir-tblgen/PassGen.cpp +++ b/mlir/tools/mlir-tblgen/PassGen.cpp @@ -14,6 +14,7 @@ #include "mlir/TableGen/GenInfo.h" #include "mlir/TableGen/Pass.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" @@ -21,6 +22,11 @@ using namespace mlir; using namespace mlir::tblgen; +static llvm::cl::OptionCategory passGenCat("Options for -gen-pass-decls"); +static llvm::cl::opt + groupName("name", llvm::cl::desc("The name of this group of passes"), + llvm::cl::cat(passGenCat)); + //===----------------------------------------------------------------------===// // GEN: Pass base class generation //===----------------------------------------------------------------------===// @@ -109,36 +115,49 @@ static void emitPassDecls(ArrayRef passes, raw_ostream &os) { // GEN: Pass registration generation //===----------------------------------------------------------------------===// +/// The code snippet used to generate the start of a pass base class. +/// +/// {0}: The def name of the pass record. +/// {1}: The argument of the pass. +/// {2): The summary of the pass. +/// {3}: The code for constructing the pass. +const char *const passRegistrationCode = R"( +//===----------------------------------------------------------------------===// +// {0} Registration +//===----------------------------------------------------------------------===// + +inline void register{0}Pass() {{ + ::mlir::registerPass("{1}", "{2}", []() -> std::unique_ptr<::mlir::Pass> {{ + return {3}; + }); +} +)"; + +/// {0}: The name of the pass group. +const char *const passGroupRegistrationCode = R"( +//===----------------------------------------------------------------------===// +// {0} Registration +//===----------------------------------------------------------------------===// + +inline void register{0}Passes() {{ +)"; + /// Emit the code for registering each of the given passes with the global /// PassRegistry. static void emitRegistration(ArrayRef passes, raw_ostream &os) { os << "#ifdef GEN_PASS_REGISTRATION\n"; for (const Pass &pass : passes) { - os << llvm::formatv("#define GEN_PASS_REGISTRATION_{0}\n", - pass.getDef()->getName()); - } - os << "#endif // GEN_PASS_REGISTRATION\n"; - - for (const Pass &pass : passes) { - os << llvm::formatv("#ifdef GEN_PASS_REGISTRATION_{0}\n", - pass.getDef()->getName()); - os << llvm::formatv("::mlir::registerPass(\"{0}\", \"{1}\", []() -> " - "std::unique_ptr<::mlir::Pass> {{ return {2}; });\n", + os << llvm::formatv(passRegistrationCode, pass.getDef()->getName(), pass.getArgument(), pass.getSummary(), pass.getConstructor()); - os << llvm::formatv("#endif // GEN_PASS_REGISTRATION_{0}\n", - pass.getDef()->getName()); - os << llvm::formatv("#undef GEN_PASS_REGISTRATION_{0}\n", - pass.getDef()->getName()); } - os << "#ifdef GEN_PASS_REGISTRATION\n"; - for (const Pass &pass : passes) { - os << llvm::formatv("#undef GEN_PASS_REGISTRATION_{0}\n", - pass.getDef()->getName()); - } - os << "#endif // GEN_PASS_REGISTRATION\n"; + os << llvm::formatv(passGroupRegistrationCode, groupName); + for (const Pass &pass : passes) + os << " register" << pass.getDef()->getName() << "Pass();\n"; + os << "}\n"; os << "#undef GEN_PASS_REGISTRATION\n"; + os << "#endif // GEN_PASS_REGISTRATION\n"; } //===----------------------------------------------------------------------===// From 86a78546b97950dfacd44ab77f17f4ce055d16e5 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 31 Jul 2020 14:46:48 -0700 Subject: [PATCH 062/600] [mlir] Add shape.with_shape op This is an operation that can returns a new ValueShape with a different shape. Useful for composing shape function calls and reusing existing shape transfer functions. Just adding the op in this change. Differential Revision: https://reviews.llvm.org/D84217 --- .../mlir/Dialect/Shape/IR/ShapeBase.td | 6 ++- .../include/mlir/Dialect/Shape/IR/ShapeOps.td | 43 +++++++++++++++++++ mlir/test/Dialect/Shape/ops.mlir | 15 ++++++- 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td index 8f64e3c081e6b..3e0177bca50e9 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td @@ -100,7 +100,11 @@ def Shape_ValueShapeType : DialectType { let hasCanonicalizer = 1; } +def Shape_WithOp : Shape_Op<"with_shape", [NoSideEffect]> { + let summary = "Returns ValueShape with given shape"; + let description = [{ + Returns ValueShape with the shape updated to match the shape operand. That + is a new ValueShape tuple is created with value equal to `operand`'s + value and shape equal to `shape`. If the ValueShape and given `shape` are + non-conformant, then the returned ValueShape will represent an error of + this mismatch. Similarly if either inputs are in an error state, then an + error is popagated. + + Usage: + %0 = shape.with_shape %1, %2 : tensor<...>, !shape.shape + + This is used, for example, where one combines shape function calculations + and/or call one shape function from another. E.g., + + ```mlir + func @shape_foobah(%a: !shape.value_shape, + %b: !shape.value_shape, + %c: !shape.value_shape) -> !shape.shape { + %0 = call @shape_foo(%a, %b) : + (!shape.value_shape, !shape.value_shape) -> !shape.shape + %1 = shape.with_shape %b, %0 : !shape.value_shape, !shape.shape + %2 = call @shape_bah(%c, %1) : + (!shape.value_shape, !shape.value_shape) -> !shape.shape + return %2 : !shape.shape + } + ``` + + This op need not be a refinement of the shape. In non-error cases the input + ValueShape's value and shape are conformant and so too for the output, but + the result may be less specified than `operand`'s shape as `shape` is + merely used to construct the new ValueShape. If join behavior is desired + then a join op should be used. + }]; + + let arguments = (ins AnyTypeOf<[AnyShaped, Shape_ValueShapeType]>:$operand, + Shape_ShapeType:$shape); + let results = (outs Shape_ValueShapeType:$result); + + let assemblyFormat = "operands attr-dict `:` type($operand) `,` type($shape)"; +} + def Shape_YieldOp : Shape_Op<"yield", [HasParent<"ReduceOp">, NoSideEffect, diff --git a/mlir/test/Dialect/Shape/ops.mlir b/mlir/test/Dialect/Shape/ops.mlir index 48b3805d0a3b7..172835a2c6d55 100644 --- a/mlir/test/Dialect/Shape/ops.mlir +++ b/mlir/test/Dialect/Shape/ops.mlir @@ -221,4 +221,17 @@ func @num_elements_shape(%arg : !shape.shape) -> !shape.size { return %result : !shape.size } - +// Testing nvoking shape function from another. shape_equal_shapes is merely +// a trivial helper function to invoke elsewhere. +func @shape_equal_shapes(%a : !shape.value_shape, %b : !shape.value_shape) -> !shape.shape { + %0 = shape.shape_of %a : !shape.value_shape -> !shape.shape + %1 = shape.shape_of %b : !shape.value_shape -> !shape.shape + %2 = "shape.join"(%0, %1) : (!shape.shape, !shape.shape) -> !shape.shape + return %2 : !shape.shape +} +func @shape_with_shape(%a : !shape.value_shape, %b : !shape.value_shape) -> !shape.shape { + %0 = shape.shape_of %a : !shape.value_shape -> !shape.shape + %1 = shape.with_shape %b, %0 : !shape.value_shape, !shape.shape + %2 = call @shape_equal_shapes(%a, %1) : (!shape.value_shape, !shape.value_shape) -> !shape.shape + return %2 : !shape.shape +} From 5110fd0343c2d06c8ae538741fbef13ece5e68de Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Fri, 31 Jul 2020 13:30:59 -0700 Subject: [PATCH 063/600] Convert to early exit (NFC) --- lldb/source/Target/TargetList.cpp | 55 ++++++++++++++++--------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/lldb/source/Target/TargetList.cpp b/lldb/source/Target/TargetList.cpp index 75a022d2c4636..7e243e5ed3380 100644 --- a/lldb/source/Target/TargetList.cpp +++ b/lldb/source/Target/TargetList.cpp @@ -394,37 +394,38 @@ Status TargetList::CreateTargetInternal(Debugger &debugger, target_sp.reset(new Target(debugger, arch, platform_sp, is_dummy_target)); } - if (target_sp) { - // Set argv0 with what the user typed, unless the user specified a - // directory. If the user specified a directory, then it is probably a - // bundle that was resolved and we need to use the resolved bundle path - if (!user_exe_path.empty()) { - // Use exactly what the user typed as the first argument when we exec or - // posix_spawn - if (user_exe_path_is_bundle && resolved_bundle_exe_path[0]) { - target_sp->SetArg0(resolved_bundle_exe_path); - } else { - // Use resolved path - target_sp->SetArg0(file.GetPath().c_str()); - } - } - if (file.GetDirectory()) { - FileSpec file_dir; - file_dir.GetDirectory() = file.GetDirectory(); - target_sp->AppendExecutableSearchPaths(file_dir); - } + if (!target_sp) + return error; - // Don't put the dummy target in the target list, it's held separately. - if (!is_dummy_target) { - std::lock_guard guard(m_target_list_mutex); - m_selected_target_idx = m_target_list.size(); - m_target_list.push_back(target_sp); - // Now prime this from the dummy target: - target_sp->PrimeFromDummyTarget(debugger.GetDummyTarget()); + // Set argv0 with what the user typed, unless the user specified a + // directory. If the user specified a directory, then it is probably a + // bundle that was resolved and we need to use the resolved bundle path + if (!user_exe_path.empty()) { + // Use exactly what the user typed as the first argument when we exec or + // posix_spawn + if (user_exe_path_is_bundle && resolved_bundle_exe_path[0]) { + target_sp->SetArg0(resolved_bundle_exe_path); } else { - m_dummy_target_sp = target_sp; + // Use resolved path + target_sp->SetArg0(file.GetPath().c_str()); } } + if (file.GetDirectory()) { + FileSpec file_dir; + file_dir.GetDirectory() = file.GetDirectory(); + target_sp->AppendExecutableSearchPaths(file_dir); + } + + // Don't put the dummy target in the target list, it's held separately. + if (!is_dummy_target) { + std::lock_guard guard(m_target_list_mutex); + m_selected_target_idx = m_target_list.size(); + m_target_list.push_back(target_sp); + // Now prime this from the dummy target: + target_sp->PrimeFromDummyTarget(debugger.GetDummyTarget()); + } else { + m_dummy_target_sp = target_sp; + } return error; } From 731292e5f30074c282d5ea1ebb86bb7adbc9e90e Mon Sep 17 00:00:00 2001 From: Andrei Lebedev Date: Fri, 31 Jul 2020 15:39:12 -0700 Subject: [PATCH 064/600] Updated the -I option description. --- clang/docs/ClangCommandLineReference.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 1613c8e453184..699a0be720368 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -1014,9 +1014,9 @@ Include path management Flags controlling how ``#include``\s are resolved to files. -.. option:: -I, --include-directory , --include-directory= +.. option:: -I, --include-directory , --include-directory= -Add directory to include search path +Add directory to the list of include files search paths. If there are multiple -I options, these directories are searched in the order they are given before the standard system directories are searched. If the same directory is in the SYSTEM include search paths, for example if also specified with -isystem, the -I option will be ignored .. option:: -I-, --include-barrier From 77a02527dc392121b31221cef73c4f933083a58e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 31 Jul 2020 16:35:41 -0400 Subject: [PATCH 065/600] [InstSimplify] add tests for abs intrinsic; NFC --- llvm/test/Transforms/InstSimplify/call.ll | 49 +++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index 6579bda52795b..e8c39dbda8f98 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -2,6 +2,55 @@ ; RUN: opt < %s -instsimplify -S | FileCheck %s ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s +declare i32 @llvm.abs.i32(i32, i1) +declare <3 x i82> @llvm.abs.v3i82(<3 x i82>, i1) + +; TODO: If the sign bit is known zero, the abs is not needed. + +define i32 @zext_abs(i31 %x) { +; CHECK-LABEL: @zext_abs( +; CHECK-NEXT: [[ZEXT:%.*]] = zext i31 [[X:%.*]] to i32 +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[ZEXT]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] +; + %zext = zext i31 %x to i32 + %abs = call i32 @llvm.abs.i32(i32 %zext, i1 false) + ret i32 %abs +} + +define <3 x i82> @lshr_abs(<3 x i82> %x) { +; CHECK-LABEL: @lshr_abs( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], +; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[LSHR]], i1 true) +; CHECK-NEXT: ret <3 x i82> [[ABS]] +; + %lshr = lshr <3 x i82> %x, + %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %lshr, i1 true) + ret <3 x i82> %abs +} + +define i32 @and_abs(i32 %x) { +; CHECK-LABEL: @and_abs( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 2147483644 +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[AND]], i1 true) +; CHECK-NEXT: ret i32 [[ABS]] +; + %and = and i32 %x, 2147483644 + %abs = call i32 @llvm.abs.i32(i32 %and, i1 true) + ret i32 %abs +} + +define <3 x i82> @select_abs(<3 x i1> %cond) { +; CHECK-LABEL: @select_abs( +; CHECK-NEXT: [[SEL:%.*]] = select <3 x i1> [[COND:%.*]], <3 x i82> zeroinitializer, <3 x i82> +; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[SEL]], i1 false) +; CHECK-NEXT: ret <3 x i82> [[ABS]] +; + %sel = select <3 x i1> %cond, <3 x i82> zeroinitializer, <3 x i82> + %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %sel, i1 false) + ret <3 x i82> %abs +} + declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) declare {i8, i1} @llvm.sadd.with.overflow.i8(i8 %a, i8 %b) declare {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b) From f2400f024d323bc9000a4c126f2008a8b58fb4a0 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 31 Jul 2020 18:54:09 -0400 Subject: [PATCH 066/600] [OpenMP] Fixed the issue that target memory deallocation might be called when they're being used This patch fixed the issue that target memory might be deallocated when they're still being used or before they're used. Reviewed By: ye-luo Differential Revision: https://reviews.llvm.org/D84996 --- openmp/libomptarget/src/omptarget.cpp | 60 ++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 6704a6fd6e6b0..f4d79d8064b94 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -421,11 +421,32 @@ int targetDataBegin(DeviceTy &Device, int32_t arg_num, void **args_base, return OFFLOAD_SUCCESS; } +namespace { +/// This structure contains information to deallocate a target pointer, aka. +/// used to call the function \p DeviceTy::deallocTgtPtr. +struct DeallocTgtPtrInfo { + /// Host pointer used to look up into the map table + void *HstPtrBegin; + /// Size of the data + int64_t DataSize; + /// Whether it is forced to be removed from the map table + bool ForceDelete; + /// Whether it has \p close modifier + bool HasCloseModifier; + + DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool ForceDelete, + bool HasCloseModifier) + : HstPtrBegin(HstPtr), DataSize(Size), ForceDelete(ForceDelete), + HasCloseModifier(HasCloseModifier) {} +}; +} // namespace + /// Internal function to undo the mapping and retrieve the data from the device. int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, void **ArgMappers, __tgt_async_info *AsyncInfo) { int Ret; + std::vector DeallocTgtPtrs; // process each input. for (int32_t I = ArgNum - 1; I >= 0; --I) { // Ignore private variables and arrays - there is no mapping for them. @@ -574,15 +595,34 @@ int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases, } Device.ShadowMtx.unlock(); - // Deallocate map - if (DelEntry) { - Ret = Device.deallocTgtPtr(HstPtrBegin, DataSize, ForceDelete, - HasCloseModifier); - if (Ret != OFFLOAD_SUCCESS) { - DP("Deallocating data from device failed.\n"); - return OFFLOAD_FAIL; - } - } + // Add pointer to the buffer for later deallocation + if (DelEntry) + DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, ForceDelete, + HasCloseModifier); + } + } + + // We need to synchronize before deallocating data. + // If AsyncInfo is nullptr, the previous data transfer (if has) will be + // synchronous, so we don't need to synchronize again. If AsyncInfo->Queue is + // nullptr, there is no data transfer happened because once there is, + // AsyncInfo->Queue will not be nullptr, so again, we don't need to + // synchronize. + if (AsyncInfo && AsyncInfo->Queue) { + Ret = Device.synchronize(AsyncInfo); + if (Ret != OFFLOAD_SUCCESS) { + DP("Failed to synchronize device.\n"); + return OFFLOAD_FAIL; + } + } + + // Deallocate target pointer + for (DeallocTgtPtrInfo &Info : DeallocTgtPtrs) { + Ret = Device.deallocTgtPtr(Info.HstPtrBegin, Info.DataSize, + Info.ForceDelete, Info.HasCloseModifier); + if (Ret != OFFLOAD_SUCCESS) { + DP("Deallocating data from device failed.\n"); + return OFFLOAD_FAIL; } } @@ -1006,5 +1046,5 @@ int target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, void **ArgBases, return OFFLOAD_FAIL; } - return Device.synchronize(&AsyncInfo); + return OFFLOAD_SUCCESS; } From 86dea1f39bd127776b999e10dff212003068d30a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Jul 2020 15:46:12 -0700 Subject: [PATCH 067/600] [ValueTracking] Improve llvm.abs handling in computeKnownBits. Add the optimizations we have in the SelectionDAG version. Known non-negative copies all known bits. Any known one other than the sign bit makes result non-negative. Differential Revision: https://reviews.llvm.org/D85000 --- llvm/lib/Analysis/ValueTracking.cpp | 21 +++- .../Transforms/InstCombine/abs-intrinsic.ll | 103 +++++++++++++++++- 2 files changed, 117 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 4cd2d07bf4b0c..af55c96f3a531 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1644,13 +1644,24 @@ static void computeKnownBitsFromOperator(const Operator *I, default: break; case Intrinsic::abs: computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); - // Otherwise, if this call is undefined for INT_MIN, the result is - // positive. - if (match(II->getArgOperand(1), m_One())) - Known.Zero.setSignBit(); + + // If the source's MSB is zero then we know the rest of the bits. + if (Known2.isNonNegative()) { + Known.Zero |= Known2.Zero; + Known.One |= Known2.One; + break; + } + // Absolute value preserves trailing zero count. Known.Zero.setLowBits(Known2.Zero.countTrailingOnes()); - // FIXME: Handle known negative/non-negative input? + + // If this call is undefined for INT_MIN, the result is positive. We + // also know it can't be INT_MIN if there is a set bit that isn't the + // sign bit. + Known2.One.clearSignBit(); + if (match(II->getArgOperand(1), m_One()) || Known2.One.getBoolValue()) + Known.Zero.setSignBit(); + // FIXME: Handle known negative input? // FIXME: Calculate the negated Known bits and combine them? break; case Intrinsic::bitreverse: diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index 8b965b3d1e440..ed845cc842677 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s declare i32 @llvm.abs.i32(i32, i1) +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) define i1 @abs_nsw_must_be_positive(i32 %x) { ; CHECK-LABEL: @abs_nsw_must_be_positive( @@ -12,6 +13,15 @@ define i1 @abs_nsw_must_be_positive(i32 %x) { ret i1 %c2 } +define <4 x i1> @abs_nsw_must_be_positive_vec(<4 x i32> %x) { +; CHECK-LABEL: @abs_nsw_must_be_positive_vec( +; CHECK-NEXT: ret <4 x i1> +; + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) + %c2 = icmp sge <4 x i32> %abs, zeroinitializer + ret <4 x i1> %c2 +} + ; Negative test, no nsw provides no information about the sign bit of the result. define i1 @abs_nonsw(i32 %x) { ; CHECK-LABEL: @abs_nonsw( @@ -24,6 +34,17 @@ define i1 @abs_nonsw(i32 %x) { ret i1 %c2 } +define <4 x i1> @abs_nonsw_vec(<4 x i32> %x) { +; CHECK-LABEL: @abs_nonsw_vec( +; CHECK-NEXT: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[X:%.*]], i1 false) +; CHECK-NEXT: [[C2:%.*]] = icmp sgt <4 x i32> [[ABS]], +; CHECK-NEXT: ret <4 x i1> [[C2]] +; + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 false) + %c2 = icmp sge <4 x i32> %abs, zeroinitializer + ret <4 x i1> %c2 +} + ; abs preserves trailing zeros so the second and is unneeded define i32 @abs_trailing_zeros(i32 %x) { ; CHECK-LABEL: @abs_trailing_zeros( @@ -37,6 +58,18 @@ define i32 @abs_trailing_zeros(i32 %x) { ret i32 %and2 } +define <4 x i32> @abs_trailing_zeros_vec(<4 x i32> %x) { +; CHECK-LABEL: @abs_trailing_zeros_vec( +; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[AND]], i1 false) +; CHECK-NEXT: ret <4 x i32> [[ABS]] +; + %and = and <4 x i32> %x, + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %and, i1 false) + %and2 = and <4 x i32> %abs, + ret <4 x i32> %and2 +} + ; negative test, can't remove the second and based on trailing zeroes. ; FIXME: Could remove the first and using demanded bits. define i32 @abs_trailing_zeros_negative(i32 %x) { @@ -52,12 +85,25 @@ define i32 @abs_trailing_zeros_negative(i32 %x) { ret i32 %and2 } +define <4 x i32> @abs_trailing_zeros_negative_vec(<4 x i32> %x) { +; CHECK-LABEL: @abs_trailing_zeros_negative_vec( +; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[AND]], i1 false) +; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[ABS]], +; CHECK-NEXT: ret <4 x i32> [[AND2]] +; + %and = and <4 x i32> %x, + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %and, i1 false) + %and2 = and <4 x i32> %abs, + ret <4 x i32> %and2 +} + ; Make sure we infer this add doesn't overflow. The input to the abs has 3 ; sign bits, the abs reduces this to 2 sign bits. define i32 @abs_signbits(i30 %x) { ; CHECK-LABEL: @abs_signbits( -; CHECK-NEXT: [[AND:%.*]] = sext i30 [[X:%.*]] to i32 -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[AND]], i1 false) +; CHECK-NEXT: [[EXT:%.*]] = sext i30 [[X:%.*]] to i32 +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[EXT]], i1 false) ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[ABS]], 1 ; CHECK-NEXT: ret i32 [[ADD]] ; @@ -66,3 +112,56 @@ define i32 @abs_signbits(i30 %x) { %add = add i32 %abs, 1 ret i32 %add } + +define <4 x i32> @abs_signbits_vec(<4 x i30> %x) { +; CHECK-LABEL: @abs_signbits_vec( +; CHECK-NEXT: [[EXT:%.*]] = sext <4 x i30> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[EXT]], i1 false) +; CHECK-NEXT: [[ADD:%.*]] = add nsw <4 x i32> [[ABS]], +; CHECK-NEXT: ret <4 x i32> [[ADD]] +; + %ext = sext <4 x i30> %x to <4 x i32> + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %ext, i1 false) + %add = add <4 x i32> %abs, + ret <4 x i32> %add +} + +define i1 @abs_known_positive_input_compare(i31 %x) { +; CHECK-LABEL: @abs_known_positive_input_compare( +; CHECK-NEXT: ret i1 true +; + %zext = zext i31 %x to i32 + %abs = call i32 @llvm.abs.i32(i32 %zext, i1 false) + %c2 = icmp sge i32 %abs, 0 + ret i1 %c2 +} + +define <4 x i1> @abs_known_positive_input_compare_vec(<4 x i31> %x) { +; CHECK-LABEL: @abs_known_positive_input_compare_vec( +; CHECK-NEXT: ret <4 x i1> +; + %zext = zext <4 x i31> %x to <4 x i32> + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %zext, i1 false) + %c2 = icmp sge <4 x i32> %abs, zeroinitializer + ret <4 x i1> %c2 +} + +define i1 @abs_known_not_int_min(i32 %x) { +; CHECK-LABEL: @abs_known_not_int_min( +; CHECK-NEXT: ret i1 true +; + %or = or i32 %x, 1 + %abs = call i32 @llvm.abs.i32(i32 %or, i1 false) + %c2 = icmp sge i32 %abs, 0 + ret i1 %c2 +} + +define <4 x i1> @abs_known_not_int_min_vec(<4 x i32> %x) { +; CHECK-LABEL: @abs_known_not_int_min_vec( +; CHECK-NEXT: ret <4 x i1> +; + %or = or <4 x i32> %x, + %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %or, i1 false) + %c2 = icmp sge <4 x i32> %abs, zeroinitializer + ret <4 x i1> %c2 +} From 47a4a27f47203055a4700b65533262409f83c491 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Thu, 21 May 2020 17:30:20 +0800 Subject: [PATCH 068/600] Upgrade MC to v0.9. Differential revision: https://reviews.llvm.org/D80802 --- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 185 ++++---- .../RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 22 +- llvm/lib/Target/RISCV/RISCVInstrFormats.td | 40 +- llvm/lib/Target/RISCV/RISCVInstrFormatsV.td | 93 ++-- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 55 ++- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 242 ++++++----- llvm/test/MC/RISCV/rvv/compare.s | 6 + llvm/test/MC/RISCV/rvv/convert.s | 192 ++++++--- llvm/test/MC/RISCV/rvv/ext.s | 81 ++++ llvm/test/MC/RISCV/rvv/fcompare.s | 6 + llvm/test/MC/RISCV/rvv/fothers.s | 40 +- llvm/test/MC/RISCV/rvv/freduction.s | 6 + llvm/test/MC/RISCV/rvv/invalid.s | 272 ++---------- llvm/test/MC/RISCV/rvv/load.s | 400 ++++++++++-------- llvm/test/MC/RISCV/rvv/mask.s | 4 +- llvm/test/MC/RISCV/rvv/reduction.s | 6 + llvm/test/MC/RISCV/rvv/snippet.s | 30 +- llvm/test/MC/RISCV/rvv/store.s | 228 +++++++--- llvm/test/MC/RISCV/rvv/vsetvl.s | 68 ++- 19 files changed, 1187 insertions(+), 789 deletions(-) create mode 100644 llvm/test/MC/RISCV/rvv/ext.s diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 407f980bd35e8..c094163c2f9d7 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -288,11 +288,21 @@ struct RISCVOperand : public MCParsedAsmOperand { SEW_1024, }; - enum class VLMUL { LMUL_1 = 0, LMUL_2, LMUL_4, LMUL_8 }; + enum class VLMUL { + LMUL_1 = 0, + LMUL_2, + LMUL_4, + LMUL_8, + LMUL_F8 = 5, + LMUL_F4, + LMUL_F2 + }; struct VTypeOp { VSEW Sew; VLMUL Lmul; + bool TailAgnostic; + bool MaskedoffAgnostic; unsigned Encoding; }; @@ -763,7 +773,7 @@ struct RISCVOperand : public MCParsedAsmOperand { case VSEW::SEW_1024: return "e1024"; } - return ""; + llvm_unreachable("Unknown SEW."); } static StringRef getLMULStr(VLMUL Lmul) { @@ -776,8 +786,14 @@ struct RISCVOperand : public MCParsedAsmOperand { return "m4"; case VLMUL::LMUL_8: return "m8"; + case VLMUL::LMUL_F2: + return "mf2"; + case VLMUL::LMUL_F4: + return "mf4"; + case VLMUL::LMUL_F8: + return "mf8"; } - return ""; + llvm_unreachable("Unknown LMUL."); } StringRef getVType(SmallString<32> &Buf) const { @@ -852,15 +868,31 @@ struct RISCVOperand : public MCParsedAsmOperand { return Op; } - static std::unique_ptr createVType(APInt Sew, APInt Lmul, - SMLoc S, bool IsRV64) { + static std::unique_ptr + createVType(APInt Sew, APInt Lmul, bool Fractional, bool TailAgnostic, + bool MaskedoffAgnostic, SMLoc S, bool IsRV64) { auto Op = std::make_unique(KindTy::VType); Sew.ashrInPlace(3); unsigned SewLog2 = Sew.logBase2(); unsigned LmulLog2 = Lmul.logBase2(); Op->VType.Sew = static_cast(SewLog2); - Op->VType.Lmul = static_cast(LmulLog2); - Op->VType.Encoding = (SewLog2 << 2) | LmulLog2; + if (Fractional) { + unsigned Flmul = 8 - LmulLog2; + Op->VType.Lmul = static_cast(Flmul); + Op->VType.Encoding = + ((Flmul & 0x4) << 3) | ((SewLog2 & 0x7) << 2) | (Flmul & 0x3); + } else { + Op->VType.Lmul = static_cast(LmulLog2); + Op->VType.Encoding = (SewLog2 << 2) | LmulLog2; + } + if (TailAgnostic) { + Op->VType.Encoding |= 0x40; + } + if (MaskedoffAgnostic) { + Op->VType.Encoding |= 0x80; + } + Op->VType.TailAgnostic = TailAgnostic; + Op->VType.MaskedoffAgnostic = MaskedoffAgnostic; Op->StartLoc = S; Op->IsRV64 = IsRV64; return Op; @@ -1181,8 +1213,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_InvalidVTypeI: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); - return Error(ErrorLoc, - "operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8]"); + return Error( + ErrorLoc, + "operand must be " + "e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]"); } case Match_InvalidVMaskRegister: { SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); @@ -1549,7 +1583,7 @@ OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) { if (getLexer().getKind() != AsmToken::Identifier) return MatchOperand_NoMatch; - // Parse "e8,m1" + // Parse "e8,m1,t[a|u],m[a|u]" StringRef Name = getLexer().getTok().getIdentifier(); if (!Name.consume_front("e")) return MatchOperand_NoMatch; @@ -1559,13 +1593,6 @@ OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) { return MatchOperand_NoMatch; getLexer().Lex(); - if (getLexer().getKind() == AsmToken::EndOfStatement) { - Operands.push_back( - RISCVOperand::createVType(Sew, APInt(16, 1), S, isRV64())); - - return MatchOperand_Success; - } - if (!getLexer().is(AsmToken::Comma)) return MatchOperand_NoMatch; getLexer().Lex(); @@ -1573,15 +1600,51 @@ OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) { Name = getLexer().getTok().getIdentifier(); if (!Name.consume_front("m")) return MatchOperand_NoMatch; + // "m" or "mf" + bool Fractional = false; + if (Name.consume_front("f")) { + Fractional = true; + } APInt Lmul(16, Name, 10); if (Lmul != 1 && Lmul != 2 && Lmul != 4 && Lmul != 8) return MatchOperand_NoMatch; getLexer().Lex(); + if (!getLexer().is(AsmToken::Comma)) + return MatchOperand_NoMatch; + getLexer().Lex(); + + Name = getLexer().getTok().getIdentifier(); + // ta or tu + bool TailAgnostic; + if (Name.consume_front("ta")) + TailAgnostic = true; + else if (Name.consume_front("tu")) + TailAgnostic = false; + else + return MatchOperand_NoMatch; + getLexer().Lex(); + + if (!getLexer().is(AsmToken::Comma)) + return MatchOperand_NoMatch; + getLexer().Lex(); + + Name = getLexer().getTok().getIdentifier(); + // ma or mu + bool MaskedoffAgnostic; + if (Name.consume_front("ma")) + MaskedoffAgnostic = true; + else if (Name.consume_front("mu")) + MaskedoffAgnostic = false; + else + return MatchOperand_NoMatch; + getLexer().Lex(); + if (getLexer().getKind() != AsmToken::EndOfStatement) return MatchOperand_NoMatch; - Operands.push_back(RISCVOperand::createVType(Sew, Lmul, S, isRV64())); + Operands.push_back(RISCVOperand::createVType( + Sew, Lmul, Fractional, TailAgnostic, MaskedoffAgnostic, S, isRV64())); return MatchOperand_Success; } @@ -2281,71 +2344,41 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst, return false; unsigned DestReg = Inst.getOperand(0).getReg(); + unsigned CheckReg; // Operands[1] will be the first operand, DestReg. SMLoc Loc = Operands[1]->getStartLoc(); - if ((TargetFlags == RISCV::WidenV) || (TargetFlags == RISCV::WidenW) || - (TargetFlags == RISCV::SlideUp) || (TargetFlags == RISCV::Vrgather) || - (TargetFlags == RISCV::Vcompress)) { - if (TargetFlags != RISCV::WidenW) { - unsigned Src2Reg = Inst.getOperand(1).getReg(); - if (DestReg == Src2Reg) - return Error(Loc, "The destination vector register group cannot overlap" - " the source vector register group."); - if (TargetFlags == RISCV::WidenV) { - // Assume DestReg LMUL is 2 at least for widening/narrowing operations. - if (DestReg + 1 == Src2Reg) - return Error(Loc, - "The destination vector register group cannot overlap" - " the source vector register group."); - } - } - if (Inst.getOperand(2).isReg()) { - unsigned Src1Reg = Inst.getOperand(2).getReg(); - if (DestReg == Src1Reg) - return Error(Loc, "The destination vector register group cannot overlap" - " the source vector register group."); - if (TargetFlags == RISCV::WidenV || TargetFlags == RISCV::WidenW) { - // Assume DestReg LMUL is 2 at least for widening/narrowing operations. - if (DestReg + 1 == Src1Reg) - return Error(Loc, - "The destination vector register group cannot overlap" - " the source vector register group."); - } - } - if (Inst.getNumOperands() == 4) { - unsigned MaskReg = Inst.getOperand(3).getReg(); - - if (DestReg == MaskReg) - return Error(Loc, "The destination vector register group cannot overlap" - " the mask register."); - } - } else if (TargetFlags == RISCV::Narrow) { - unsigned Src2Reg = Inst.getOperand(1).getReg(); - if (DestReg == Src2Reg) + if (TargetFlags & RISCV::VS2Constraint) { + CheckReg = Inst.getOperand(1).getReg(); + if (DestReg == CheckReg) return Error(Loc, "The destination vector register group cannot overlap" " the source vector register group."); - // Assume Src2Reg LMUL is 2 at least for widening/narrowing operations. - if (DestReg == Src2Reg + 1) + } + if ((TargetFlags & RISCV::VS1Constraint) && (Inst.getOperand(2).isReg())) { + CheckReg = Inst.getOperand(2).getReg(); + if (DestReg == CheckReg) return Error(Loc, "The destination vector register group cannot overlap" " the source vector register group."); - } else if (TargetFlags == RISCV::WidenCvt || TargetFlags == RISCV::Iota) { - unsigned Src2Reg = Inst.getOperand(1).getReg(); - if (DestReg == Src2Reg) + } + if ((TargetFlags & RISCV::VMConstraint) && (DestReg == RISCV::V0)) { + // vadc, vsbc are special cases. These instructions have no mask register. + // The destination register could not be V0. + unsigned Opcode = Inst.getOpcode(); + if (Opcode == RISCV::VADC_VVM || Opcode == RISCV::VADC_VXM || + Opcode == RISCV::VADC_VIM || Opcode == RISCV::VSBC_VVM || + Opcode == RISCV::VSBC_VXM) + return Error(Loc, "The destination vector register group cannot be V0."); + + // Regardless masked or unmasked version, the number of operands is the + // same. For example, "viota.m v0, v2" is "viota.m v0, v2, NoRegister" + // actually. We need to check the last operand to ensure whether it is + // masked or not. + if ((TargetFlags & RISCV::OneInput) && (Inst.getNumOperands() == 3)) + CheckReg = Inst.getOperand(2).getReg(); + else if (Inst.getNumOperands() == 4) + CheckReg = Inst.getOperand(3).getReg(); + if (DestReg == CheckReg) return Error(Loc, "The destination vector register group cannot overlap" - " the source vector register group."); - if (TargetFlags == RISCV::WidenCvt) { - // Assume DestReg LMUL is 2 at least for widening/narrowing operations. - if (DestReg + 1 == Src2Reg) - return Error(Loc, "The destination vector register group cannot overlap" - " the source vector register group."); - } - if (Inst.getNumOperands() == 3) { - unsigned MaskReg = Inst.getOperand(2).getReg(); - - if (DestReg == MaskReg) - return Error(Loc, "The destination vector register group cannot overlap" - " the mask register."); - } + " the mask register."); } return false; } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index eae3e13dbe409..2c7038a0a8ace 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -155,10 +155,28 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo, unsigned Imm = MI->getOperand(OpNo).getImm(); unsigned Sew = (Imm >> 2) & 0x7; unsigned Lmul = Imm & 0x3; + bool Fractional = (Imm >> 5) & 0x1; - Lmul = 0x1 << Lmul; Sew = 0x1 << (Sew + 3); - O << "e" << Sew << ",m" << Lmul; + O << "e" << Sew; + if (Fractional) { + Lmul = 4 - Lmul; + Lmul = 0x1 << Lmul; + O << ",mf" << Lmul; + } else { + Lmul = 0x1 << Lmul; + O << ",m" << Lmul; + } + bool TailAgnostic = Imm & 0x40; + bool MaskedoffAgnostic = Imm & 0x80; + if (TailAgnostic) + O << ",ta"; + else + O << ",tu"; + if (MaskedoffAgnostic) + O << ",ma"; + else + O << ",mu"; } void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index a47945a6a5154..21f0514146353 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -52,15 +52,37 @@ def InstFormatOther : InstFormat<17>; class RISCVVConstraint val> { bits<4> Value = val; } -def NoConstraint : RISCVVConstraint<0>; -def WidenV : RISCVVConstraint<1>; -def WidenW : RISCVVConstraint<2>; -def WidenCvt : RISCVVConstraint<3>; -def Narrow : RISCVVConstraint<4>; -def Iota : RISCVVConstraint<5>; -def SlideUp : RISCVVConstraint<6>; -def Vrgather : RISCVVConstraint<7>; -def Vcompress : RISCVVConstraint<8>; +def NoConstraint : RISCVVConstraint<0b0000>; +def VS2Constraint : RISCVVConstraint<0b0001>; +def VS1Constraint : RISCVVConstraint<0b0010>; +def VMConstraint : RISCVVConstraint<0b0100>; +def OneInput : RISCVVConstraint<0b1000>; + +def WidenV : RISCVVConstraint; +def WidenW : RISCVVConstraint; +def WidenCvt : RISCVVConstraint; +def Narrow : RISCVVConstraint; +def NarrowCvt : RISCVVConstraint; +def Vmadc : RISCVVConstraint; +def Iota : RISCVVConstraint; +def SlideUp : RISCVVConstraint; +def Vrgather : RISCVVConstraint; +def Vcompress : RISCVVConstraint; // The following opcode names match those given in Table 19.1 in the // RISC-V User-level ISA specification ("RISC-V base opcode map"). diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td index e5f154966ba61..8ca010d033c39 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td @@ -21,20 +21,17 @@ def OPIVX : RISCVVFormat<0b100>; def OPFVF : RISCVVFormat<0b101>; def OPMVX : RISCVVFormat<0b110>; -class RISCVMOP val> { - bits<3> Value = val; +class RISCVMOP val> { + bits<2> Value = val; } -def MOPLDUnitStrideU : RISCVMOP<0b000>; -def MOPLDStridedU : RISCVMOP<0b010>; -def MOPLDIndexedU : RISCVMOP<0b011>; -def MOPLDUnitStrideS : RISCVMOP<0b100>; -def MOPLDStridedS : RISCVMOP<0b110>; -def MOPLDIndexedS : RISCVMOP<0b111>; - -def MOPSTUnitStride : RISCVMOP<0b000>; -def MOPSTStrided : RISCVMOP<0b010>; -def MOPSTIndexedOrder: RISCVMOP<0b011>; -def MOPSTIndexedUnOrd: RISCVMOP<0b111>; +def MOPLDUnitStride : RISCVMOP<0b00>; +def MOPLDStrided : RISCVMOP<0b10>; +def MOPLDIndexed : RISCVMOP<0b11>; + +def MOPSTUnitStride : RISCVMOP<0b00>; +def MOPSTIndexedUnord : RISCVMOP<0b01>; +def MOPSTStrided : RISCVMOP<0b10>; +def MOPSTIndexedOrder : RISCVMOP<0b11>; class RISCVLSUMOP val> { bits<5> Value = val; @@ -45,13 +42,17 @@ def LUMOPUnitStrideFF: RISCVLSUMOP<0b10000>; def SUMOPUnitStride : RISCVLSUMOP<0b00000>; def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>; -class RISCVWidth val> { - bits<3> Value = val; +class RISCVWidth val> { + bits<4> Value = val; } -def LSWidthVByte : RISCVWidth<0b000>; -def LSWidthVHalf : RISCVWidth<0b101>; -def LSWidthVWord : RISCVWidth<0b110>; -def LSWidthVSEW : RISCVWidth<0b111>; +def LSWidth8 : RISCVWidth<0b0000>; +def LSWidth16 : RISCVWidth<0b0101>; +def LSWidth32 : RISCVWidth<0b0110>; +def LSWidth64 : RISCVWidth<0b0111>; +def LSWidth128 : RISCVWidth<0b1000>; +def LSWidth256 : RISCVWidth<0b1101>; +def LSWidth512 : RISCVWidth<0b1110>; +def LSWidth1024 : RISCVWidth<0b1111>; class RVInstSetVLi : RVInst { @@ -103,6 +104,7 @@ class RVInstVV funct6, RISCVVFormat opv, dag outs, dag ins, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } class RVInstVX funct6, RISCVVFormat opv, dag outs, dag ins, @@ -122,6 +124,7 @@ class RVInstVX funct6, RISCVVFormat opv, dag outs, dag ins, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } class RVInstV2 funct6, bits<5> vs2, RISCVVFormat opv, dag outs, dag ins, @@ -140,6 +143,7 @@ class RVInstV2 funct6, bits<5> vs2, RISCVVFormat opv, dag outs, dag ins, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } class RVInstIVI funct6, dag outs, dag ins, string opcodestr, @@ -159,6 +163,7 @@ class RVInstIVI funct6, dag outs, dag ins, string opcodestr, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } class RVInstV funct6, bits<5> vs1, RISCVVFormat opv, dag outs, @@ -177,10 +182,11 @@ class RVInstV funct6, bits<5> vs1, RISCVVFormat opv, dag outs, let Opcode = OPC_OP_V.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } -class RVInstVLU nf, RISCVMOP mop, RISCVLSUMOP lumop, - RISCVWidth width, dag outs, dag ins, string opcodestr, +class RVInstVLU nf, bit mew, RISCVLSUMOP lumop, + bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst { bits<5> rs1; @@ -188,18 +194,20 @@ class RVInstVLU nf, RISCVMOP mop, RISCVLSUMOP lumop, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = MOPLDUnitStride.Value; let Inst{25} = vm; let Inst{24-20} = lumop.Value; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vd; let Opcode = OPC_LOAD_FP.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } -class RVInstVLS nf, RISCVMOP mop, RISCVWidth width, +class RVInstVLS nf, bit mew, bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst { bits<5> rs2; @@ -208,18 +216,20 @@ class RVInstVLS nf, RISCVMOP mop, RISCVWidth width, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = MOPLDStrided.Value; let Inst{25} = vm; let Inst{24-20} = rs2; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vd; let Opcode = OPC_LOAD_FP.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } -class RVInstVLX nf, RISCVMOP mop, RISCVWidth width, +class RVInstVLX nf, bit mew, bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst { bits<5> vs2; @@ -228,19 +238,21 @@ class RVInstVLX nf, RISCVMOP mop, RISCVWidth width, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = MOPLDIndexed.Value; let Inst{25} = vm; let Inst{24-20} = vs2; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vd; let Opcode = OPC_LOAD_FP.Value; let Uses = [VTYPE, VL]; + let RVVConstraint = VMConstraint; } -class RVInstVSU nf, RISCVMOP mop, RISCVLSUMOP sumop, - RISCVWidth width, dag outs, dag ins, string opcodestr, +class RVInstVSU nf, bit mew, RISCVLSUMOP sumop, + bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst { bits<5> rs1; @@ -248,18 +260,19 @@ class RVInstVSU nf, RISCVMOP mop, RISCVLSUMOP sumop, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = MOPSTUnitStride.Value; let Inst{25} = vm; let Inst{24-20} = sumop.Value; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vs3; let Opcode = OPC_STORE_FP.Value; let Uses = [VTYPE, VL]; } -class RVInstVSS nf, RISCVMOP mop, RISCVWidth width, +class RVInstVSS nf, bit mew, bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst { bits<5> rs2; @@ -268,18 +281,19 @@ class RVInstVSS nf, RISCVMOP mop, RISCVWidth width, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = MOPSTStrided.Value; let Inst{25} = vm; let Inst{24-20} = rs2; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vs3; let Opcode = OPC_STORE_FP.Value; let Uses = [VTYPE, VL]; } -class RVInstVSX nf, RISCVMOP mop, RISCVWidth width, +class RVInstVSX nf, bit mew, RISCVMOP mop, bits<3> width, dag outs, dag ins, string opcodestr, string argstr> : RVInst { bits<5> vs2; @@ -288,11 +302,12 @@ class RVInstVSX nf, RISCVMOP mop, RISCVWidth width, bit vm; let Inst{31-29} = nf; - let Inst{28-26} = mop.Value; + let Inst{28} = mew; + let Inst{27-26} = mop.Value; let Inst{25} = vm; let Inst{24-20} = vs2; let Inst{19-15} = rs1; - let Inst{14-12} = width.Value; + let Inst{14-12} = width; let Inst{11-7} = vs3; let Opcode = OPC_STORE_FP.Value; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 21bc508cdc9ca..cd8b6d5fba59c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -138,14 +138,53 @@ namespace RISCV { // Match with the definitions in RISCVInstrFormatsV.td enum RVVConstraintType { NoConstraint = 0, - WidenV = 1, - WidenW = 2, - WidenCvt = 3, - Narrow = 4, - Iota = 5, - SlideUp = 6, - Vrgather = 7, - Vcompress = 8, + VS2Constraint = 0b0001, + VS1Constraint = 0b0010, + VMConstraint = 0b0100, + OneInput = 0b1000, + + // Illegal instructions: + // + // * The destination vector register group for a masked vector instruction + // cannot overlap the source mask register (v0), unless the destination vector + // register is being written with a mask value (e.g., comparisons) or the + // scalar result of a reduction. + // + // * Widening: The destination vector register group cannot overlap a source + // vector register group of a different EEW + // + // * Narrowing: The destination vector register group cannot overlap the + // first source vector register group + // + // * For vadc and vsbc, an illegal instruction exception is raised if the + // destination vector register is v0. + // + // * For vmadc and vmsbc, an illegal instruction exception is raised if the + // destination vector register overlaps a source vector register group. + // + // * viota: An illegal instruction exception is raised if the destination + // vector register group overlaps the source vector mask register. If the + // instruction is masked, an illegal instruction exception is issued if the + // destination vector register group overlaps v0. + // + // * v[f]slide[1]up: The destination vector register group for vslideup cannot + // overlap the source vector register group. + // + // * vrgather: The destination vector register group cannot overlap with the + // source vector register groups. + // + // * vcompress: The destination vector register group cannot overlap the + // source vector register group or the source mask register + WidenV = VS2Constraint | VS1Constraint | VMConstraint, + WidenW = VS1Constraint | VMConstraint, + WidenCvt = VS2Constraint | VMConstraint | OneInput, + Narrow = VS2Constraint | VMConstraint, + NarrowCvt = VS2Constraint | VMConstraint | OneInput, + Vmadc = VS2Constraint | VS1Constraint, + Iota = VS2Constraint | VMConstraint | OneInput, + SlideUp = VS2Constraint | VMConstraint, + Vrgather = VS2Constraint | VS1Constraint | VMConstraint, + Vcompress = VS2Constraint | VS1Constraint, ConstraintOffset = 5, ConstraintMask = 0b1111 diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 1c7f53fecb8c9..ae335ddce882a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// /// /// This file describes the RISC-V instructions from the standard 'V' Vector -/// extension, version 0.8. +/// extension, version 0.9. /// This version is still experimental as the 'V' extension hasn't been /// ratified yet. /// @@ -96,27 +96,30 @@ def simm5_plus1 : Operand, ImmLeaf - : RVInstVLU<0b000, mop, lumop, width, (outs VRegOp:$vd), +class VUnitStrideLoad + : RVInstVLU<0b000, width.Value{3}, lumop, width.Value{2-0}, + (outs VRegOp:$vd), (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">; // load vd, (rs1), rs2, vm -class VStridedLoad - : RVInstVLS<0b000, mop, width, (outs VRegOp:$vd), +class VStridedLoad + : RVInstVLS<0b000, width.Value{3}, width.Value{2-0}, + (outs VRegOp:$vd), (ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr, "$vd, (${rs1}), $rs2$vm">; // load vd, (rs1), vs2, vm -class VIndexedLoad - : RVInstVLX<0b000, mop, width, (outs VRegOp:$vd), +class VIndexedLoad + : RVInstVLX<0b000, width.Value{3}, width.Value{2-0}, + (outs VRegOp:$vd), (ins GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm), opcodestr, "$vd, (${rs1}), $vs2$vm">; // vlr.v vd, (rs1) class VWholeLoad nf, string opcodestr> - : RVInstVLU { let vm = 1; let Uses = []; @@ -125,28 +128,28 @@ class VWholeLoad nf, string opcodestr> let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { // store vd, vs3, (rs1), vm -class VUnitStrideStore - : RVInstVSU<0b000, mop, sumop, width, (outs), - (ins VRegOp:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr, + : RVInstVSU<0b000, width.Value{3}, sumop, width.Value{2-0}, + (outs), (ins VRegOp:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr, "$vs3, (${rs1})$vm">; // store vd, vs3, (rs1), rs2, vm -class VStridedStore - : RVInstVSS<0b000, mop, width, (outs), +class VStridedStore + : RVInstVSS<0b000, width.Value{3}, width.Value{2-0}, (outs), (ins VRegOp:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr, "$vs3, (${rs1}), $rs2$vm">; // store vd, vs3, (rs1), vs2, vm class VIndexedStore - : RVInstVSX<0b000, mop, width, (outs), + : RVInstVSX<0b000, width.Value{3}, mop, width.Value{2-0}, (outs), (ins VRegOp:$vs3, GPR:$rs1, VRegOp:$vs2, VMaskOp:$vm), opcodestr, "$vs3, (${rs1}), $vs2$vm">; // vsr.v vd, (rs1) class VWholeStore nf, string opcodestr> - : RVInstVSU { let vm = 1; let Uses = []; @@ -372,68 +375,79 @@ def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 // Vector Unit-Stride Instructions -def VLB_V : VUnitStrideLoad; -def VLH_V : VUnitStrideLoad; -def VLW_V : VUnitStrideLoad; - -def VLBU_V : VUnitStrideLoad; -def VLHU_V : VUnitStrideLoad; -def VLWU_V : VUnitStrideLoad; - -def VLE_V : VUnitStrideLoad; - -def VLBFF_V : VUnitStrideLoad; -def VLHFF_V : VUnitStrideLoad; -def VLWFF_V : VUnitStrideLoad; - -def VLBUFF_V : VUnitStrideLoad; -def VLHUFF_V : VUnitStrideLoad; -def VLWUFF_V : VUnitStrideLoad; - -def VLEFF_V : VUnitStrideLoad; - -def VSB_V : VUnitStrideStore; -def VSH_V : VUnitStrideStore; -def VSW_V : VUnitStrideStore; - -def VSE_V : VUnitStrideStore; +def VLE8_V : VUnitStrideLoad; +def VLE16_V : VUnitStrideLoad; +def VLE32_V : VUnitStrideLoad; +def VLE64_V : VUnitStrideLoad; +def VLE128_V : VUnitStrideLoad; +def VLE256_V : VUnitStrideLoad; +def VLE512_V : VUnitStrideLoad; +def VLE1024_V : VUnitStrideLoad; + +def VLE8FF_V : VUnitStrideLoad; +def VLE16FF_V : VUnitStrideLoad; +def VLE32FF_V : VUnitStrideLoad; +def VLE64FF_V : VUnitStrideLoad; +def VLE128FF_V : VUnitStrideLoad; +def VLE256FF_V : VUnitStrideLoad; +def VLE512FF_V : VUnitStrideLoad; +def VLE1024FF_V : VUnitStrideLoad; + +def VSE8_V : VUnitStrideStore; +def VSE16_V : VUnitStrideStore; +def VSE32_V : VUnitStrideStore; +def VSE64_V : VUnitStrideStore; +def VSE128_V : VUnitStrideStore; +def VSE256_V : VUnitStrideStore; +def VSE512_V : VUnitStrideStore; +def VSE1024_V : VUnitStrideStore; // Vector Strided Instructions -def VLSB_V : VStridedLoad; -def VLSH_V : VStridedLoad; -def VLSW_V : VStridedLoad; - -def VLSBU_V : VStridedLoad; -def VLSHU_V : VStridedLoad; -def VLSWU_V : VStridedLoad; - -def VLSE_V : VStridedLoad; - -def VSSB_V : VStridedStore; -def VSSH_V : VStridedStore; -def VSSW_V : VStridedStore; -def VSSE_V : VStridedStore; +def VLSE8_V : VStridedLoad; +def VLSE16_V : VStridedLoad; +def VLSE32_V : VStridedLoad; +def VLSE64_V : VStridedLoad; +def VLSE128_V : VStridedLoad; +def VLSE256_V : VStridedLoad; +def VLSE512_V : VStridedLoad; +def VLSE1024_V : VStridedLoad; + +def VSSE8_V : VStridedStore; +def VSSE16_V : VStridedStore; +def VSSE32_V : VStridedStore; +def VSSE64_V : VStridedStore; +def VSSE128_V : VStridedStore; +def VSSE256_V : VStridedStore; +def VSSE512_V : VStridedStore; +def VSSE1024_V : VStridedStore; // Vector Indexed Instructions -def VLXB_V : VIndexedLoad; -def VLXH_V : VIndexedLoad; -def VLXW_V : VIndexedLoad; - -def VLXBU_V : VIndexedLoad; -def VLXHU_V : VIndexedLoad; -def VLXWU_V : VIndexedLoad; - -def VLXE_V : VIndexedLoad; - -def VSXB_V : VIndexedStore; -def VSXH_V : VIndexedStore; -def VSXW_V : VIndexedStore; -def VSXE_V : VIndexedStore; - -def VSUXB_V : VIndexedStore; -def VSUXH_V : VIndexedStore; -def VSUXW_V : VIndexedStore; -def VSUXE_V : VIndexedStore; +def VLXEI8_V : VIndexedLoad; +def VLXEI16_V : VIndexedLoad; +def VLXEI32_V : VIndexedLoad; +def VLXEI64_V : VIndexedLoad; +def VLXEI128_V : VIndexedLoad; +def VLXEI256_V : VIndexedLoad; +def VLXEI512_V : VIndexedLoad; +def VLXEI1024_V : VIndexedLoad; + +def VSXEI8_V : VIndexedStore; +def VSXEI16_V : VIndexedStore; +def VSXEI32_V : VIndexedStore; +def VSXEI64_V : VIndexedStore; +def VSXEI128_V : VIndexedStore; +def VSXEI256_V : VIndexedStore; +def VSXEI512_V : VIndexedStore; +def VSXEI1024_V : VIndexedStore; + +def VSUXEI8_V : VIndexedStore; +def VSUXEI16_V : VIndexedStore; +def VSUXEI32_V : VIndexedStore; +def VSUXEI64_V : VIndexedStore; +def VSUXEI128_V : VIndexedStore; +def VSUXEI256_V : VIndexedStore; +def VSUXEI512_V : VIndexedStore; +def VSUXEI1024_V : VIndexedStore; def VL1R_V : VWholeLoad<0, "vl1r.v">; def VS1R_V : VWholeStore<0, "vs1r.v">; @@ -472,13 +486,25 @@ def : InstAlias<"vwcvt.x.x.v $vd, $vs$vm", def : InstAlias<"vwcvtu.x.x.v $vd, $vs$vm", (VWADDU_VX VRegOp:$vd, VRegOp:$vs, X0, VMaskOp:$vm)>; +// Vector Integer Extension +defm VZEXT_VF8 : VALU_MV_VS2<"vzext.vf8", 0b010010, 0b00010>; +defm VSEXT_VF8 : VALU_MV_VS2<"vsext.vf8", 0b010010, 0b00011>; +defm VZEXT_VF4 : VALU_MV_VS2<"vzext.vf4", 0b010010, 0b00100>; +defm VSEXT_VF4 : VALU_MV_VS2<"vsext.vf4", 0b010010, 0b00101>; +defm VZEXT_VF2 : VALU_MV_VS2<"vzext.vf2", 0b010010, 0b00110>; +defm VSEXT_VF2 : VALU_MV_VS2<"vsext.vf2", 0b010010, 0b00111>; + // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions defm VADC_V : VALUm_IV_V_X_I<"vadc", 0b010000>; +let Constraints = "@earlyclobber $vd", RVVConstraint = Vmadc in { defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>; defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>; +} // Constraints = "@earlyclobber $vd", RVVConstraint = Vmadc defm VSBC_V : VALUm_IV_V_X<"vsbc", 0b010010>; +let Constraints = "@earlyclobber $vd", RVVConstraint = Vmadc in { defm VMSBC_V : VALUm_IV_V_X<"vmsbc", 0b010011>; defm VMSBC_V : VALUNoVm_IV_V_X<"vmsbc", 0b010011>; +} // Constraints = "@earlyclobber $vd", RVVConstraint = Vmadc // Vector Bitwise Logical Instructions defm VAND_V : VALU_IV_V_X_I<"vand", 0b001001>; @@ -504,6 +530,7 @@ defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">; } // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow // Vector Integer Comparison Instructions +let RVVConstraint = NoConstraint in { defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>; defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>; defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>; @@ -512,6 +539,7 @@ defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>; defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>; defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>; defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>; +} // RVVConstraint = NoConstraint def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm", (VMSLTU_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>; @@ -664,7 +692,7 @@ defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV // Vector Floating-Point Square-Root Instruction -defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b100011, 0b00000>; +defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>; // Vector Floating-Point MIN/MAX Instructions defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>; @@ -676,12 +704,14 @@ defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>; defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>; // Vector Floating-Point Compare Instructions +let RVVConstraint = NoConstraint in { defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>; defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>; defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>; defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>; defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>; defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>; +} // RVVConstraint = NoConstraint def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm", (VMFLT_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>; @@ -689,7 +719,7 @@ def : InstAlias<"vmfge.vv $vd, $va, $vb$vm", (VMFLE_VV VRegOp:$vd, VRegOp:$vb, VRegOp:$va, VMaskOp:$vm), 0>; // Vector Floating-Point Classify Instruction -defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b100011, 0b10000>; +defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b010011, 0b10000>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { // Vector Floating-Point Merge Instruction @@ -708,31 +738,38 @@ def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VRegOp:$vd), } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Single-Width Floating-Point/Integer Type-Convert Instructions -defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b100010, 0b00000>; -defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b100010, 0b00001>; -defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b100010, 0b00010>; -defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b100010, 0b00011>; +defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>; +defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>; +defm VFCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>; +defm VFCVT_RTZ_X_F_V : VALU_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>; +defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>; +defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>; // Widening Floating-Point/Integer Type-Convert Instructions let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in { -defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b100010, 0b01000>; -defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b100010, 0b01001>; -defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b100010, 0b01010>; -defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b100010, 0b01011>; -defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b100010, 0b01100>; +defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>; +defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>; +defm VFWCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>; +defm VFWCVT_RTZ_X_F_V : VALU_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>; +defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>; +defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>; +defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>; } // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt // Narrowing Floating-Point/Integer Type-Convert Instructions -let Constraints = "@earlyclobber $vd", RVVConstraint = Narrow in { -defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b100010, 0b10000>; -defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b100010, 0b10001>; -defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b100010, 0b10010>; -defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b100010, 0b10011>; -defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b100010, 0b10100>; -defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b100010, 0b10101>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = Narrow +let Constraints = "@earlyclobber $vd", RVVConstraint = NarrowCvt in { +defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>; +defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>; +defm VFNCVT_RTZ_XU_F_W : VALU_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>; +defm VFNCVT_RTZ_X_F_W : VALU_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>; +defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>; +defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>; +defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>; +defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>; +} // Constraints = "@earlyclobber $vd", RVVConstraint = NarrowCvt // Vector Single-Width Integer Reduction Instructions +let RVVConstraint = NoConstraint in { defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>; defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>; defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>; @@ -741,32 +778,35 @@ defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>; defm VREDAND : VALU_MV_V<"vredand", 0b000001>; defm VREDOR : VALU_MV_V<"vredor", 0b000010>; defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>; +} // RVVConstraint = NoConstraint // Vector Widening Integer Reduction Instructions -let Constraints = "@earlyclobber $vd" in { +let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // Set earlyclobber for following instructions for second and mask operands. // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>; defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>; -} // Constraints = "@earlyclobber $vd" +} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint // Vector Single-Width Floating-Point Reduction Instructions +let RVVConstraint = NoConstraint in { defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>; defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>; defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>; defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>; +} // RVVConstraint = NoConstraint // Vector Widening Floating-Point Reduction Instructions -let Constraints = "@earlyclobber $vd" in { +let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { // Set earlyclobber for following instructions for second and mask operands. // This has the downside that the earlyclobber constraint is too coarse and // will impose unnecessary restrictions by not allowing the destination to // overlap with the first (wide) operand. defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>; defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>; -} // Constraints = "@earlyclobber $vd" +} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint // Vector Mask-Register Logical Instructions defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">; @@ -778,7 +818,7 @@ defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">; defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">; defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">; -def : InstAlias<"vmcpy.m $vd, $vs", +def : InstAlias<"vmmv.m $vd, $vs", (VMAND_MM VRegOp:$vd, VRegOp:$vs, VRegOp:$vs)>; def : InstAlias<"vmclr.m $vd", (VMXOR_MM VRegOp:$vd, VRegOp:$vd, VRegOp:$vd)>; @@ -847,8 +887,10 @@ defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>; let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in { defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>; +defm VFSLIDE1UP_V : VALU_FV_F<"vfslide1up", 0b001110>; } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>; +defm VFSLIDE1DOWN_V : VALU_FV_F<"vfslide1down", 0b001111>; // Vector Register Gather Instruction let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in { diff --git a/llvm/test/MC/RISCV/rvv/compare.s b/llvm/test/MC/RISCV/rvv/compare.s index 7bf452f771a7e..f93aeac1796a7 100644 --- a/llvm/test/MC/RISCV/rvv/compare.s +++ b/llvm/test/MC/RISCV/rvv/compare.s @@ -8,6 +8,12 @@ # RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +vmslt.vv v0, v4, v20, v0.t +# CHECK-INST: vmslt.vv v0, v4, v20, v0.t +# CHECK-ENCODING: [0x57,0x00,0x4a,0x6c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 00 4a 6c + vmseq.vv v8, v4, v20, v0.t # CHECK-INST: vmseq.vv v8, v4, v20, v0.t # CHECK-ENCODING: [0x57,0x04,0x4a,0x60] diff --git a/llvm/test/MC/RISCV/rvv/convert.s b/llvm/test/MC/RISCV/rvv/convert.s index e9f8860e97b59..bc818523e3e9c 100644 --- a/llvm/test/MC/RISCV/rvv/convert.s +++ b/llvm/test/MC/RISCV/rvv/convert.s @@ -10,180 +10,252 @@ vfcvt.xu.f.v v8, v4, v0.t # CHECK-INST: vfcvt.xu.f.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x40,0x88] +# CHECK-ENCODING: [0x57,0x14,0x40,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 40 88 +# CHECK-UNKNOWN: 57 14 40 48 vfcvt.xu.f.v v8, v4 # CHECK-INST: vfcvt.xu.f.v v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x40,0x8a] +# CHECK-ENCODING: [0x57,0x14,0x40,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 40 8a +# CHECK-UNKNOWN: 57 14 40 4a vfcvt.x.f.v v8, v4, v0.t # CHECK-INST: vfcvt.x.f.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x94,0x40,0x88] +# CHECK-ENCODING: [0x57,0x94,0x40,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 40 88 +# CHECK-UNKNOWN: 57 94 40 48 vfcvt.x.f.v v8, v4 # CHECK-INST: vfcvt.x.f.v v8, v4 -# CHECK-ENCODING: [0x57,0x94,0x40,0x8a] +# CHECK-ENCODING: [0x57,0x94,0x40,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 40 8a +# CHECK-UNKNOWN: 57 94 40 4a vfcvt.f.xu.v v8, v4, v0.t # CHECK-INST: vfcvt.f.xu.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x41,0x88] +# CHECK-ENCODING: [0x57,0x14,0x41,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 41 88 +# CHECK-UNKNOWN: 57 14 41 48 vfcvt.f.xu.v v8, v4 # CHECK-INST: vfcvt.f.xu.v v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x41,0x8a] +# CHECK-ENCODING: [0x57,0x14,0x41,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 41 8a +# CHECK-UNKNOWN: 57 14 41 4a vfcvt.f.x.v v8, v4, v0.t # CHECK-INST: vfcvt.f.x.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x94,0x41,0x88] +# CHECK-ENCODING: [0x57,0x94,0x41,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 41 88 +# CHECK-UNKNOWN: 57 94 41 48 vfcvt.f.x.v v8, v4 # CHECK-INST: vfcvt.f.x.v v8, v4 -# CHECK-ENCODING: [0x57,0x94,0x41,0x8a] +# CHECK-ENCODING: [0x57,0x94,0x41,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 41 8a +# CHECK-UNKNOWN: 57 94 41 4a + +vfcvt.rtz.xu.f.v v8, v4, v0.t +# CHECK-INST: vfcvt.rtz.xu.f.v v8, v4, v0.t +# CHECK-ENCODING: [0x57,0x14,0x43,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 14 43 48 + +vfcvt.rtz.xu.f.v v8, v4 +# CHECK-INST: vfcvt.rtz.xu.f.v v8, v4 +# CHECK-ENCODING: [0x57,0x14,0x43,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 14 43 4a + +vfcvt.rtz.x.f.v v8, v4, v0.t +# CHECK-INST: vfcvt.rtz.x.f.v v8, v4, v0.t +# CHECK-ENCODING: [0x57,0x94,0x43,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 94 43 48 + +vfcvt.rtz.x.f.v v8, v4 +# CHECK-INST: vfcvt.rtz.x.f.v v8, v4 +# CHECK-ENCODING: [0x57,0x94,0x43,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 94 43 4a vfwcvt.xu.f.v v8, v4, v0.t # CHECK-INST: vfwcvt.xu.f.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x44,0x88] +# CHECK-ENCODING: [0x57,0x14,0x44,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 44 88 +# CHECK-UNKNOWN: 57 14 44 48 vfwcvt.xu.f.v v8, v4 # CHECK-INST: vfwcvt.xu.f.v v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x44,0x8a] +# CHECK-ENCODING: [0x57,0x14,0x44,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 44 8a +# CHECK-UNKNOWN: 57 14 44 4a vfwcvt.x.f.v v8, v4, v0.t # CHECK-INST: vfwcvt.x.f.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x94,0x44,0x88] +# CHECK-ENCODING: [0x57,0x94,0x44,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 44 88 +# CHECK-UNKNOWN: 57 94 44 48 vfwcvt.x.f.v v8, v4 # CHECK-INST: vfwcvt.x.f.v v8, v4 -# CHECK-ENCODING: [0x57,0x94,0x44,0x8a] +# CHECK-ENCODING: [0x57,0x94,0x44,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 44 8a +# CHECK-UNKNOWN: 57 94 44 4a vfwcvt.f.xu.v v8, v4, v0.t # CHECK-INST: vfwcvt.f.xu.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x45,0x88] +# CHECK-ENCODING: [0x57,0x14,0x45,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 45 88 +# CHECK-UNKNOWN: 57 14 45 48 vfwcvt.f.xu.v v8, v4 # CHECK-INST: vfwcvt.f.xu.v v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x45,0x8a] +# CHECK-ENCODING: [0x57,0x14,0x45,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 45 8a +# CHECK-UNKNOWN: 57 14 45 4a vfwcvt.f.x.v v8, v4, v0.t # CHECK-INST: vfwcvt.f.x.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x94,0x45,0x88] +# CHECK-ENCODING: [0x57,0x94,0x45,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 45 88 +# CHECK-UNKNOWN: 57 94 45 48 vfwcvt.f.x.v v8, v4 # CHECK-INST: vfwcvt.f.x.v v8, v4 -# CHECK-ENCODING: [0x57,0x94,0x45,0x8a] +# CHECK-ENCODING: [0x57,0x94,0x45,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 45 8a +# CHECK-UNKNOWN: 57 94 45 4a vfwcvt.f.f.v v8, v4, v0.t # CHECK-INST: vfwcvt.f.f.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x46,0x88] +# CHECK-ENCODING: [0x57,0x14,0x46,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 46 88 +# CHECK-UNKNOWN: 57 14 46 48 vfwcvt.f.f.v v8, v4 # CHECK-INST: vfwcvt.f.f.v v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x46,0x8a] +# CHECK-ENCODING: [0x57,0x14,0x46,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 14 46 4a + +vfwcvt.rtz.xu.f.v v8, v4, v0.t +# CHECK-INST: vfwcvt.rtz.xu.f.v v8, v4, v0.t +# CHECK-ENCODING: [0x57,0x14,0x47,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 14 47 48 + +vfwcvt.rtz.xu.f.v v8, v4 +# CHECK-INST: vfwcvt.rtz.xu.f.v v8, v4 +# CHECK-ENCODING: [0x57,0x14,0x47,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 46 8a +# CHECK-UNKNOWN: 57 14 47 4a + +vfwcvt.rtz.x.f.v v8, v4, v0.t +# CHECK-INST: vfwcvt.rtz.x.f.v v8, v4, v0.t +# CHECK-ENCODING: [0x57,0x94,0x47,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 94 47 48 + +vfwcvt.rtz.x.f.v v8, v4 +# CHECK-INST: vfwcvt.rtz.x.f.v v8, v4 +# CHECK-ENCODING: [0x57,0x94,0x47,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 94 47 4a vfncvt.xu.f.w v8, v4, v0.t # CHECK-INST: vfncvt.xu.f.w v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x48,0x88] +# CHECK-ENCODING: [0x57,0x14,0x48,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 48 88 +# CHECK-UNKNOWN: 57 14 48 48 vfncvt.xu.f.w v8, v4 # CHECK-INST: vfncvt.xu.f.w v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x48,0x8a] +# CHECK-ENCODING: [0x57,0x14,0x48,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 48 8a +# CHECK-UNKNOWN: 57 14 48 4a vfncvt.x.f.w v8, v4, v0.t # CHECK-INST: vfncvt.x.f.w v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x94,0x48,0x88] +# CHECK-ENCODING: [0x57,0x94,0x48,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 48 88 +# CHECK-UNKNOWN: 57 94 48 48 vfncvt.x.f.w v8, v4 # CHECK-INST: vfncvt.x.f.w v8, v4 -# CHECK-ENCODING: [0x57,0x94,0x48,0x8a] +# CHECK-ENCODING: [0x57,0x94,0x48,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 48 8a +# CHECK-UNKNOWN: 57 94 48 4a vfncvt.f.xu.w v8, v4, v0.t # CHECK-INST: vfncvt.f.xu.w v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x49,0x88] +# CHECK-ENCODING: [0x57,0x14,0x49,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 49 88 +# CHECK-UNKNOWN: 57 14 49 48 vfncvt.f.xu.w v8, v4 # CHECK-INST: vfncvt.f.xu.w v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x49,0x8a] +# CHECK-ENCODING: [0x57,0x14,0x49,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 49 8a +# CHECK-UNKNOWN: 57 14 49 4a vfncvt.f.x.w v8, v4, v0.t # CHECK-INST: vfncvt.f.x.w v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x94,0x49,0x88] +# CHECK-ENCODING: [0x57,0x94,0x49,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 49 88 +# CHECK-UNKNOWN: 57 94 49 48 vfncvt.f.x.w v8, v4 # CHECK-INST: vfncvt.f.x.w v8, v4 -# CHECK-ENCODING: [0x57,0x94,0x49,0x8a] +# CHECK-ENCODING: [0x57,0x94,0x49,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 49 8a +# CHECK-UNKNOWN: 57 94 49 4a vfncvt.f.f.w v8, v4, v0.t # CHECK-INST: vfncvt.f.f.w v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x4a,0x88] +# CHECK-ENCODING: [0x57,0x14,0x4a,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 4a 88 +# CHECK-UNKNOWN: 57 14 4a 48 vfncvt.f.f.w v8, v4 # CHECK-INST: vfncvt.f.f.w v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x4a,0x8a] +# CHECK-ENCODING: [0x57,0x14,0x4a,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 4a 8a +# CHECK-UNKNOWN: 57 14 4a 4a vfncvt.rod.f.f.w v8, v4, v0.t # CHECK-INST: vfncvt.rod.f.f.w v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x94,0x4a,0x88] +# CHECK-ENCODING: [0x57,0x94,0x4a,0x48] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 4a 88 +# CHECK-UNKNOWN: 57 94 4a 48 vfncvt.rod.f.f.w v8, v4 # CHECK-INST: vfncvt.rod.f.f.w v8, v4 -# CHECK-ENCODING: [0x57,0x94,0x4a,0x8a] +# CHECK-ENCODING: [0x57,0x94,0x4a,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 94 4a 4a + +vfncvt.rtz.xu.f.w v8, v4, v0.t +# CHECK-INST: vfncvt.rtz.xu.f.w v8, v4, v0.t +# CHECK-ENCODING: [0x57,0x14,0x4b,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 14 4b 48 + +vfncvt.rtz.xu.f.w v8, v4 +# CHECK-INST: vfncvt.rtz.xu.f.w v8, v4 +# CHECK-ENCODING: [0x57,0x14,0x4b,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 14 4b 4a + +vfncvt.rtz.x.f.w v8, v4, v0.t +# CHECK-INST: vfncvt.rtz.x.f.w v8, v4, v0.t +# CHECK-ENCODING: [0x57,0x94,0x4b,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 94 4b 48 + +vfncvt.rtz.x.f.w v8, v4 +# CHECK-INST: vfncvt.rtz.x.f.w v8, v4 +# CHECK-ENCODING: [0x57,0x94,0x4b,0x4a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 94 4a 8a +# CHECK-UNKNOWN: 57 94 4b 4a diff --git a/llvm/test/MC/RISCV/rvv/ext.s b/llvm/test/MC/RISCV/rvv/ext.s new file mode 100644 index 0000000000000..45ac980b47b65 --- /dev/null +++ b/llvm/test/MC/RISCV/rvv/ext.s @@ -0,0 +1,81 @@ +# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \ +# RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ +# RUN: | FileCheck %s --check-prefix=CHECK-ERROR +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-v - \ +# RUN: | FileCheck %s --check-prefix=CHECK-INST +# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ +# RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN + +vzext.vf2 v8, v4, v0.t +# CHECK-INST: vzext.vf2 v8, v4, v0.t +# CHECK-ENCODING: [0x57,0x24,0x43,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 24 43 48 + +vzext.vf2 v8, v4 +# CHECK-INST: vzext.vf2 v8, v4 +# CHECK-ENCODING: [0x57,0x24,0x43,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 24 43 4a + +vsext.vf2 v8, v4, v0.t +# CHECK-INST: vsext.vf2 v8, v4, v0.t +# CHECK-ENCODING: [0x57,0xa4,0x43,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 a4 43 48 + +vsext.vf2 v8, v4 +# CHECK-INST: vsext.vf2 v8, v4 +# CHECK-ENCODING: [0x57,0xa4,0x43,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 a4 43 4a + +vzext.vf4 v8, v4, v0.t +# CHECK-INST: vzext.vf4 v8, v4, v0.t +# CHECK-ENCODING: [0x57,0x24,0x42,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 24 42 48 + +vzext.vf4 v8, v4 +# CHECK-INST: vzext.vf4 v8, v4 +# CHECK-ENCODING: [0x57,0x24,0x42,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 24 42 4a + +vsext.vf4 v8, v4, v0.t +# CHECK-INST: vsext.vf4 v8, v4, v0.t +# CHECK-ENCODING: [0x57,0xa4,0x42,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 a4 42 48 + +vsext.vf4 v8, v4 +# CHECK-INST: vsext.vf4 v8, v4 +# CHECK-ENCODING: [0x57,0xa4,0x42,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 a4 42 4a + +vzext.vf8 v8, v4, v0.t +# CHECK-INST: vzext.vf8 v8, v4, v0.t +# CHECK-ENCODING: [0x57,0x24,0x41,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 24 41 48 + +vzext.vf8 v8, v4 +# CHECK-INST: vzext.vf8 v8, v4 +# CHECK-ENCODING: [0x57,0x24,0x41,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 24 41 4a + +vsext.vf8 v8, v4, v0.t +# CHECK-INST: vsext.vf8 v8, v4, v0.t +# CHECK-ENCODING: [0x57,0xa4,0x41,0x48] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 a4 41 48 + +vsext.vf8 v8, v4 +# CHECK-INST: vsext.vf8 v8, v4 +# CHECK-ENCODING: [0x57,0xa4,0x41,0x4a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 a4 41 4a diff --git a/llvm/test/MC/RISCV/rvv/fcompare.s b/llvm/test/MC/RISCV/rvv/fcompare.s index 9ad55dc54a07e..4bb4e2d966f28 100644 --- a/llvm/test/MC/RISCV/rvv/fcompare.s +++ b/llvm/test/MC/RISCV/rvv/fcompare.s @@ -151,3 +151,9 @@ vmfge.vv v8, v20, v4 # CHECK-ENCODING: [0x57,0x14,0x4a,0x66] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 14 4a 66 + +vmfeq.vv v0, v4, v20, v0.t +# CHECK-INST: vmfeq.vv v0, v4, v20, v0.t +# CHECK-ENCODING: [0x57,0x10,0x4a,0x60] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 10 4a 60 diff --git a/llvm/test/MC/RISCV/rvv/fothers.s b/llvm/test/MC/RISCV/rvv/fothers.s index 768800def4248..b9e03842fa2de 100644 --- a/llvm/test/MC/RISCV/rvv/fothers.s +++ b/llvm/test/MC/RISCV/rvv/fothers.s @@ -10,30 +10,54 @@ vfsqrt.v v8, v4, v0.t # CHECK-INST: vfsqrt.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x40,0x8c] +# CHECK-ENCODING: [0x57,0x14,0x40,0x4c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 40 8c +# CHECK-UNKNOWN: 57 14 40 4c vfsqrt.v v8, v4 # CHECK-INST: vfsqrt.v v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x40,0x8e] +# CHECK-ENCODING: [0x57,0x14,0x40,0x4e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 40 8e +# CHECK-UNKNOWN: 57 14 40 4e vfclass.v v8, v4, v0.t # CHECK-INST: vfclass.v v8, v4, v0.t -# CHECK-ENCODING: [0x57,0x14,0x48,0x8c] +# CHECK-ENCODING: [0x57,0x14,0x48,0x4c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 48 8c +# CHECK-UNKNOWN: 57 14 48 4c vfclass.v v8, v4 # CHECK-INST: vfclass.v v8, v4 -# CHECK-ENCODING: [0x57,0x14,0x48,0x8e] +# CHECK-ENCODING: [0x57,0x14,0x48,0x4e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 14 48 8e +# CHECK-UNKNOWN: 57 14 48 4e vfmerge.vfm v8, v4, fa0, v0 # CHECK-INST: vfmerge.vfm v8, v4, fa0, v0 # CHECK-ENCODING: [0x57,0x54,0x45,0x5c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 54 45 5c + +vfslide1up.vf v8, v4, fa0, v0.t +# CHECK-INST: vfslide1up.vf v8, v4, fa0, v0.t +# CHECK-ENCODING: [0x57,0x54,0x45,0x38] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 54 45 38 + +vfslide1up.vf v8, v4, fa0 +# CHECK-INST: vfslide1up.vf v8, v4, fa0 +# CHECK-ENCODING: [0x57,0x54,0x45,0x3a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 54 45 3a + +vfslide1down.vf v8, v4, fa0, v0.t +# CHECK-INST: vfslide1down.vf v8, v4, fa0, v0.t +# CHECK-ENCODING: [0x57,0x54,0x45,0x3c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 54 45 3c + +vfslide1down.vf v8, v4, fa0 +# CHECK-INST: vfslide1down.vf v8, v4, fa0 +# CHECK-ENCODING: [0x57,0x54,0x45,0x3e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 54 45 3e diff --git a/llvm/test/MC/RISCV/rvv/freduction.s b/llvm/test/MC/RISCV/rvv/freduction.s index 2131dadff8ca8..a85e676f9a8ca 100644 --- a/llvm/test/MC/RISCV/rvv/freduction.s +++ b/llvm/test/MC/RISCV/rvv/freduction.s @@ -79,3 +79,9 @@ vfwredsum.vs v8, v4, v20 # CHECK-ENCODING: [0x57,0x14,0x4a,0xc6] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 14 4a c6 + +vfredosum.vs v0, v4, v20, v0.t +# CHECK-INST: vfredosum.vs v0, v4, v20, v0.t +# CHECK-ENCODING: [0x57,0x10,0x4a,0x0c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 10 4a 0c diff --git a/llvm/test/MC/RISCV/rvv/invalid.s b/llvm/test/MC/RISCV/rvv/invalid.s index 814093eba5059..615dc08ad67ce 100644 --- a/llvm/test/MC/RISCV/rvv/invalid.s +++ b/llvm/test/MC/RISCV/rvv/invalid.s @@ -2,22 +2,34 @@ # RUN: | FileCheck %s --check-prefix=CHECK-ERROR vsetvli a2, a0, e31 -# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8] +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] vsetvli a2, a0, e32,m3 -# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8] +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] vsetvli a2, a0, m1,e32 -# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8] +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] vsetvli a2, a0, e32,m16 -# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8] +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] vsetvli a2, a0, e2048,m8 -# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8] +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] vsetvli a2, a0, e1,m8 -# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8] +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] + +vsetvli a2, a0, e8,m1,tx +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] + +vsetvli a2, a0, e8,m1,ta,mx +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] + +vsetvli a2, a0, e8,m1,ma +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] + +vsetvli a2, a0, e8,m1,mu +# CHECK-ERROR: operand must be e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu] vadd.vv v1, v3, v2, v4.t # CHECK-ERROR: operand must be v0.t @@ -47,10 +59,6 @@ vfwcvt.xu.f.v v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwcvt.xu.f.v v2, v2 -vfwcvt.xu.f.v v2, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwcvt.xu.f.v v2, v3 - vfwcvt.x.f.v v0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwcvt.x.f.v v0, v2, v0.t @@ -59,10 +67,6 @@ vfwcvt.x.f.v v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwcvt.x.f.v v2, v2 -vfwcvt.x.f.v v2, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwcvt.x.f.v v2, v3 - vfwcvt.f.xu.v v0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwcvt.f.xu.v v0, v2, v0.t @@ -71,10 +75,6 @@ vfwcvt.f.xu.v v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwcvt.f.xu.v v2, v2 -vfwcvt.f.xu.v v2, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwcvt.f.xu.v v2, v3 - vfwcvt.f.x.v v0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwcvt.f.x.v v0, v2, v0.t @@ -83,10 +83,6 @@ vfwcvt.f.x.v v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwcvt.f.x.v v2, v2 -vfwcvt.f.x.v v2, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwcvt.f.x.v v2, v3 - vfwcvt.f.f.v v0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwcvt.f.f.v v0, v2, v0.t @@ -95,10 +91,6 @@ vfwcvt.f.f.v v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwcvt.f.f.v v2, v2 -vfwcvt.f.f.v v2, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwcvt.f.f.v v2, v3 - vslideup.vx v0, v2, a0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vslideup.vx v0, v2, a0, v0.t @@ -127,10 +119,6 @@ vnsrl.wv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vnsrl.wv v2, v2, v4 -vnsrl.wv v3, v2, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vnsrl.wv v3, v2, v4 - vnsrl.wx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vnsrl.wx v2, v2, a0 @@ -143,10 +131,6 @@ vnsra.wv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vnsra.wv v2, v2, v4 -vnsra.wv v3, v2, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vnsra.wv v3, v2, v4 - vnsra.wx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vnsra.wx v2, v2, a0 @@ -159,10 +143,6 @@ vnclipu.wv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vnclipu.wv v2, v2, v4 -vnclipu.wv v3, v2, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vnclipu.wv v3, v2, v4 - vnclipu.wx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vnclipu.wx v2, v2, a0 @@ -175,10 +155,6 @@ vnclip.wv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vnclip.wv v2, v2, v4 -vnclip.wv v3, v2, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vnclip.wv v3, v2, v4 - vnclip.wx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vnclip.wx v2, v2, a0 @@ -191,50 +167,26 @@ vfncvt.xu.f.w v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfncvt.xu.f.w v2, v2 -vfncvt.xu.f.w v3, v2 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfncvt.xu.f.w v3, v2 - vfncvt.x.f.w v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfncvt.x.f.w v2, v2 -vfncvt.x.f.w v3, v2 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfncvt.x.f.w v3, v2 - vfncvt.f.xu.w v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfncvt.f.xu.w v2, v2 -vfncvt.f.xu.w v3, v2 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfncvt.f.xu.w v3, v2 - vfncvt.f.x.w v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfncvt.f.x.w v2, v2 -vfncvt.f.x.w v3, v2 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfncvt.f.x.w v3, v2 - vfncvt.f.f.w v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfncvt.f.f.w v2, v2 -vfncvt.f.f.w v3, v2 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfncvt.f.f.w v3, v2 - vfncvt.rod.f.f.w v2, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfncvt.rod.f.f.w v2, v2 -vfncvt.rod.f.f.w v3, v2 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfncvt.rod.f.f.w v3, v2 - vrgather.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vrgather.vv v0, v2, v4, v0.t @@ -267,10 +219,6 @@ vwaddu.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwaddu.vv v2, v2, v4 -vwaddu.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwaddu.vv v2, v3, v4 - vwsubu.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwsubu.vv v0, v2, v4, v0.t @@ -279,10 +227,6 @@ vwsubu.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwsubu.vv v2, v2, v4 -vwsubu.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwsubu.vv v2, v3, v4 - vwadd.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwadd.vv v0, v2, v4, v0.t @@ -291,10 +235,6 @@ vwadd.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwadd.vv v2, v2, v4 -vwadd.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwadd.vv v2, v3, v4 - vwsub.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwsub.vv v0, v2, v4, v0.t @@ -303,10 +243,6 @@ vwsub.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwsub.vv v2, v2, v4 -vwsub.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwsub.vv v2, v3, v4 - vwmul.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmul.vv v0, v2, v4, v0.t @@ -315,10 +251,6 @@ vwmul.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmul.vv v2, v2, v4 -vwmul.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmul.vv v2, v3, v4 - vwmulu.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmulu.vv v0, v2, v4, v0.t @@ -327,10 +259,6 @@ vwmulu.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmulu.vv v2, v2, v4 -vwmulu.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmulu.vv v2, v3, v4 - vwmulsu.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmulsu.vv v0, v2, v4, v0.t @@ -339,10 +267,6 @@ vwmulsu.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmulsu.vv v2, v2, v4 -vwmulsu.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmulsu.vv v2, v3, v4 - vwmaccu.vv v0, v4, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmaccu.vv v0, v4, v2, v0.t @@ -351,10 +275,6 @@ vwmaccu.vv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmaccu.vv v2, v4, v2 -vwmaccu.vv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmaccu.vv v2, v4, v3 - vwmacc.vv v0, v4, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmacc.vv v0, v4, v2, v0.t @@ -363,10 +283,6 @@ vwmacc.vv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmacc.vv v2, v4, v2 -vwmacc.vv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmacc.vv v2, v4, v3 - vwmaccsu.vv v0, v4, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmaccsu.vv v0, v4, v2, v0.t @@ -375,10 +291,6 @@ vwmaccsu.vv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmaccsu.vv v2, v4, v2 -vwmaccsu.vv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmaccsu.vv v2, v4, v3 - vfwadd.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwadd.vv v0, v2, v4, v0.t @@ -387,10 +299,6 @@ vfwadd.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwadd.vv v2, v2, v4 -vfwadd.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwadd.vv v2, v3, v4 - vfwsub.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwsub.vv v0, v2, v4, v0.t @@ -399,10 +307,6 @@ vfwsub.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwsub.vv v2, v2, v4 -vfwsub.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwsub.vv v2, v3, v4 - vfwmul.vv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwmul.vv v0, v2, v4, v0.t @@ -411,10 +315,6 @@ vfwmul.vv v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwmul.vv v2, v2, v4 -vfwmul.vv v2, v3, v4 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwmul.vv v2, v3, v4 - vfwmacc.vv v0, v4, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwmacc.vv v0, v4, v2, v0.t @@ -423,10 +323,6 @@ vfwmacc.vv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwmacc.vv v2, v4, v2 -vfwmacc.vv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwmacc.vv v2, v4, v3 - vfwnmacc.vv v0, v4, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwnmacc.vv v0, v4, v2, v0.t @@ -435,10 +331,6 @@ vfwnmacc.vv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwnmacc.vv v2, v4, v2 -vfwnmacc.vv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwnmacc.vv v2, v4, v3 - vfwmsac.vv v0, v4, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwmsac.vv v0, v4, v2, v0.t @@ -447,10 +339,6 @@ vfwmsac.vv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwmsac.vv v2, v4, v2 -vfwmsac.vv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwmsac.vv v2, v4, v3 - vfwnmsac.vv v0, v4, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwnmsac.vv v0, v4, v2, v0.t @@ -459,10 +347,6 @@ vfwnmsac.vv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwnmsac.vv v2, v4, v2 -vfwnmsac.vv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwnmsac.vv v2, v4, v3 - vwaddu.vx v0, v2, a0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwaddu.vx v0, v2, a0, v0.t @@ -471,10 +355,6 @@ vwaddu.vx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwaddu.vx v2, v2, a0 -vwaddu.vx v2, v3, a0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwaddu.vx v2, v3, a0 - vwsubu.vx v0, v2, a0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwsubu.vx v0, v2, a0, v0.t @@ -483,10 +363,6 @@ vwsubu.vx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwsubu.vx v2, v2, a0 -vwsubu.vx v2, v3, a0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwsubu.vx v2, v3, a0 - vwadd.vx v0, v2, a0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwadd.vx v0, v2, a0, v0.t @@ -495,10 +371,6 @@ vwadd.vx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwadd.vx v2, v2, a0 -vwadd.vx v2, v3, a0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwadd.vx v2, v3, a0 - vwsub.vx v0, v2, a0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwsub.vx v0, v2, a0, v0.t @@ -507,10 +379,6 @@ vwsub.vx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwsub.vx v2, v2, a0 -vwsub.vx v2, v3, a0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwsub.vx v2, v3, a0 - vwmul.vx v0, v2, a0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmul.vx v0, v2, a0, v0.t @@ -519,10 +387,6 @@ vwmul.vx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmul.vx v2, v2, a0 -vwmul.vx v2, v3, a0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmul.vx v2, v3, a0 - vwmulu.vx v0, v2, a0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmulu.vx v0, v2, a0, v0.t @@ -531,10 +395,6 @@ vwmulu.vx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmulu.vx v2, v2, a0 -vwmulu.vx v2, v3, a0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmulu.vx v2, v3, a0 - vwmulsu.vx v0, v2, a0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmulsu.vx v0, v2, a0, v0.t @@ -543,10 +403,6 @@ vwmulsu.vx v2, v2, a0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmulsu.vx v2, v2, a0 -vwmulsu.vx v2, v3, a0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmulsu.vx v2, v3, a0 - vwmaccu.vx v0, a0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmaccu.vx v0, a0, v2, v0.t @@ -555,10 +411,6 @@ vwmaccu.vx v2, a0, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmaccu.vx v2, a0, v2 -vwmaccu.vx v2, a0, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmaccu.vx v2, a0, v3 - vwmacc.vx v0, a0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmacc.vx v0, a0, v2, v0.t @@ -567,10 +419,6 @@ vwmacc.vx v2, a0, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmacc.vx v2, a0, v2 -vwmacc.vx v2, a0, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmacc.vx v2, a0, v3 - vwmaccsu.vx v0, a0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmaccsu.vx v0, a0, v2, v0.t @@ -579,10 +427,6 @@ vwmaccsu.vx v2, a0, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmaccsu.vx v2, a0, v2 -vwmaccsu.vx v2, a0, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmaccsu.vx v2, a0, v3 - vwmaccus.vx v0, a0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwmaccus.vx v0, a0, v2, v0.t @@ -591,10 +435,6 @@ vwmaccus.vx v2, a0, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwmaccus.vx v2, a0, v2 -vwmaccus.vx v2, a0, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwmaccus.vx v2, a0, v3 - vfwadd.vf v0, v2, fa0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwadd.vf v0, v2, fa0, v0.t @@ -603,10 +443,6 @@ vfwadd.vf v2, v2, fa0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwadd.vf v2, v2, fa0 -vfwadd.vf v2, v3, fa0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwadd.vf v2, v3, fa0 - vfwsub.vf v0, v2, fa0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwsub.vf v0, v2, fa0, v0.t @@ -615,10 +451,6 @@ vfwsub.vf v2, v2, fa0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwsub.vf v2, v2, fa0 -vfwsub.vf v2, v3, fa0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwsub.vf v2, v3, fa0 - vfwmul.vf v0, v2, fa0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwmul.vf v0, v2, fa0, v0.t @@ -627,10 +459,6 @@ vfwmul.vf v2, v2, fa0 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwmul.vf v2, v2, fa0 -vfwmul.vf v2, v3, fa0 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwmul.vf v2, v3, fa0 - vfwmacc.vf v0, fa0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwmacc.vf v0, fa0, v2, v0.t @@ -639,10 +467,6 @@ vfwmacc.vf v2, fa0, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwmacc.vf v2, fa0, v2 -vfwmacc.vf v2, fa0, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwmacc.vf v2, fa0, v3 - vfwnmacc.vf v0, fa0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwnmacc.vf v0, fa0, v2, v0.t @@ -651,10 +475,6 @@ vfwnmacc.vf v2, fa0, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwnmacc.vf v2, fa0, v2 -vfwnmacc.vf v2, fa0, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwnmacc.vf v2, fa0, v3 - vfwmsac.vf v0, fa0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwmsac.vf v0, fa0, v2, v0.t @@ -663,10 +483,6 @@ vfwmsac.vf v2, fa0, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwmsac.vf v2, fa0, v2 -vfwmsac.vf v2, fa0, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwmsac.vf v2, fa0, v3 - vfwnmsac.vf v0, fa0, v2, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwnmsac.vf v0, fa0, v2, v0.t @@ -675,10 +491,6 @@ vfwnmsac.vf v2, fa0, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwnmsac.vf v2, fa0, v2 -vfwnmsac.vf v2, fa0, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwnmsac.vf v2, fa0, v3 - vcompress.vm v2, v2, v4 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vcompress.vm v2, v2, v4 @@ -691,10 +503,6 @@ vwaddu.wv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwaddu.wv v2, v4, v2 -vwaddu.wv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwaddu.wv v2, v4, v3 - vwsubu.wv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwsubu.wv v0, v2, v4, v0.t @@ -703,10 +511,6 @@ vwsubu.wv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwsubu.wv v2, v4, v2 -vwsubu.wv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwsubu.wv v2, v4, v3 - vwadd.wv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwadd.wv v0, v2, v4, v0.t @@ -715,10 +519,6 @@ vwadd.wv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwadd.wv v2, v4, v2 -vwadd.wv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwadd.wv v2, v4, v3 - vwsub.wv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwsub.wv v0, v2, v4, v0.t @@ -727,10 +527,6 @@ vwsub.wv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vwsub.wv v2, v4, v2 -vwsub.wv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vwsub.wv v2, v4, v3 - vfwadd.wv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwadd.wv v0, v2, v4, v0.t @@ -739,10 +535,6 @@ vfwadd.wv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwadd.wv v2, v4, v2 -vfwadd.wv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwadd.wv v2, v4, v3 - vfwsub.wv v0, v2, v4, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwsub.wv v0, v2, v4, v0.t @@ -751,10 +543,6 @@ vfwsub.wv v2, v4, v2 # CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. # CHECK-ERROR-LABEL: vfwsub.wv v2, v4, v2 -vfwsub.wv v2, v4, v3 -# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. -# CHECK-ERROR-LABEL: vfwsub.wv v2, v4, v3 - vwaddu.wx v0, v2, a0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vwaddu.wx v0, v2, a0, v0.t @@ -778,3 +566,27 @@ vfwadd.wf v0, v2, fa0, v0.t vfwsub.wf v0, v2, fa0, v0.t # CHECK-ERROR: The destination vector register group cannot overlap the mask register. # CHECK-ERROR-LABEL: vfwsub.wf v0, v2, fa0, v0.t + +vadc.vvm v0, v2, v4, v0 +# CHECK-ERROR: The destination vector register group cannot be V0. +# CHECK-ERROR-LABEL: vadc.vvm v0, v2, v4, v0 + +vmadc.vvm v2, v2, v4, v0 +# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. +# CHECK-ERROR-LABEL: vmadc.vvm v2, v2, v4, v0 + +vmadc.vvm v4, v2, v4, v0 +# CHECK-ERROR: The destination vector register group cannot overlap the source vector register group. +# CHECK-ERROR-LABEL: vmadc.vvm v4, v2, v4, v0 + +vadd.vv v0, v2, v4, v0.t +# CHECK-ERROR: The destination vector register group cannot overlap the mask register. +# CHECK-ERROR-LABEL: vadd.vv v0, v2, v4, v0.t + +vadd.vx v0, v2, a0, v0.t +# CHECK-ERROR: The destination vector register group cannot overlap the mask register. +# CHECK-ERROR-LABEL: vadd.vx v0, v2, a0, v0.t + +vadd.vi v0, v2, 1, v0.t +# CHECK-ERROR: The destination vector register group cannot overlap the mask register. +# CHECK-ERROR-LABEL: vadd.vi v0, v2, 1, v0.t diff --git a/llvm/test/MC/RISCV/rvv/load.s b/llvm/test/MC/RISCV/rvv/load.s index c8284b561f4af..eac3191c2d6b1 100644 --- a/llvm/test/MC/RISCV/rvv/load.s +++ b/llvm/test/MC/RISCV/rvv/load.s @@ -8,332 +8,392 @@ # RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN -vlb.v v8, (a0), v0.t -# CHECK-INST: vlb.v v8, (a0), v0.t -# CHECK-ENCODING: [0x07,0x04,0x05,0x10] -# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 05 10 - -vlb.v v8, (a0) -# CHECK-INST: vlb.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x05,0x12] -# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 05 12 - -vlh.v v8, (a0), v0.t -# CHECK-INST: vlh.v v8, (a0), v0.t -# CHECK-ENCODING: [0x07,0x54,0x05,0x10] -# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 05 10 - -vlh.v v8, (a0) -# CHECK-INST: vlh.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x05,0x12] -# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 05 12 - -vlw.v v8, (a0), v0.t -# CHECK-INST: vlw.v v8, (a0), v0.t -# CHECK-ENCODING: [0x07,0x64,0x05,0x10] -# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 05 10 - -vlw.v v8, (a0) -# CHECK-INST: vlw.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x05,0x12] -# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 05 12 - -vlbu.v v8, (a0), v0.t -# CHECK-INST: vlbu.v v8, (a0), v0.t +vle8.v v8, (a0), v0.t +# CHECK-INST: vle8.v v8, (a0), v0.t # CHECK-ENCODING: [0x07,0x04,0x05,0x00] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 04 05 00 -vlbu.v v8, (a0) -# CHECK-INST: vlbu.v v8, (a0) +vle8.v v8, (a0) +# CHECK-INST: vle8.v v8, (a0) # CHECK-ENCODING: [0x07,0x04,0x05,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 04 05 02 -vlhu.v v8, (a0), v0.t -# CHECK-INST: vlhu.v v8, (a0), v0.t +vle16.v v8, (a0), v0.t +# CHECK-INST: vle16.v v8, (a0), v0.t # CHECK-ENCODING: [0x07,0x54,0x05,0x00] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 54 05 00 -vlhu.v v8, (a0) -# CHECK-INST: vlhu.v v8, (a0) +vle16.v v8, (a0) +# CHECK-INST: vle16.v v8, (a0) # CHECK-ENCODING: [0x07,0x54,0x05,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 54 05 02 -vlwu.v v8, (a0), v0.t -# CHECK-INST: vlwu.v v8, (a0), v0.t +vle32.v v8, (a0), v0.t +# CHECK-INST: vle32.v v8, (a0), v0.t # CHECK-ENCODING: [0x07,0x64,0x05,0x00] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 64 05 00 -vlwu.v v8, (a0) -# CHECK-INST: vlwu.v v8, (a0) +vle32.v v8, (a0) +# CHECK-INST: vle32.v v8, (a0) # CHECK-ENCODING: [0x07,0x64,0x05,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 64 05 02 -vlbff.v v8, (a0), v0.t -# CHECK-INST: vlbff.v v8, (a0), v0.t -# CHECK-ENCODING: [0x07,0x04,0x05,0x11] +vle64.v v8, (a0), v0.t +# CHECK-INST: vle64.v v8, (a0), v0.t +# CHECK-ENCODING: [0x07,0x74,0x05,0x00] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 05 11 +# CHECK-UNKNOWN: 07 74 05 00 -vlbff.v v8, (a0) -# CHECK-INST: vlbff.v v8, (a0) -# CHECK-ENCODING: [0x07,0x04,0x05,0x13] +vle64.v v8, (a0) +# CHECK-INST: vle64.v v8, (a0) +# CHECK-ENCODING: [0x07,0x74,0x05,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 05 13 +# CHECK-UNKNOWN: 07 74 05 02 -vlhff.v v8, (a0), v0.t -# CHECK-INST: vlhff.v v8, (a0), v0.t -# CHECK-ENCODING: [0x07,0x54,0x05,0x11] +vle128.v v8, (a0), v0.t +# CHECK-INST: vle128.v v8, (a0), v0.t +# CHECK-ENCODING: [0x07,0x04,0x05,0x10] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 05 11 +# CHECK-UNKNOWN: 07 04 05 10 -vlhff.v v8, (a0) -# CHECK-INST: vlhff.v v8, (a0) -# CHECK-ENCODING: [0x07,0x54,0x05,0x13] +vle128.v v8, (a0) +# CHECK-INST: vle128.v v8, (a0) +# CHECK-ENCODING: [0x07,0x04,0x05,0x12] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 05 13 +# CHECK-UNKNOWN: 07 04 05 12 -vlwff.v v8, (a0), v0.t -# CHECK-INST: vlwff.v v8, (a0), v0.t -# CHECK-ENCODING: [0x07,0x64,0x05,0x11] +vle256.v v8, (a0), v0.t +# CHECK-INST: vle256.v v8, (a0), v0.t +# CHECK-ENCODING: [0x07,0x54,0x05,0x10] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 05 11 +# CHECK-UNKNOWN: 07 54 05 10 -vlwff.v v8, (a0) -# CHECK-INST: vlwff.v v8, (a0) -# CHECK-ENCODING: [0x07,0x64,0x05,0x13] +vle256.v v8, (a0) +# CHECK-INST: vle256.v v8, (a0) +# CHECK-ENCODING: [0x07,0x54,0x05,0x12] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 05 13 +# CHECK-UNKNOWN: 07 54 05 12 -vlbuff.v v8, (a0), v0.t -# CHECK-INST: vlbuff.v v8, (a0), v0.t +vle512.v v8, (a0), v0.t +# CHECK-INST: vle512.v v8, (a0), v0.t +# CHECK-ENCODING: [0x07,0x64,0x05,0x10] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 64 05 10 + +vle512.v v8, (a0) +# CHECK-INST: vle512.v v8, (a0) +# CHECK-ENCODING: [0x07,0x64,0x05,0x12] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 64 05 12 + +vle1024.v v8, (a0), v0.t +# CHECK-INST: vle1024.v v8, (a0), v0.t +# CHECK-ENCODING: [0x07,0x74,0x05,0x10] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 74 05 10 + +vle1024.v v8, (a0) +# CHECK-INST: vle1024.v v8, (a0) +# CHECK-ENCODING: [0x07,0x74,0x05,0x12] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 74 05 12 + +vle8ff.v v8, (a0), v0.t +# CHECK-INST: vle8ff.v v8, (a0), v0.t # CHECK-ENCODING: [0x07,0x04,0x05,0x01] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 04 05 01 -vlbuff.v v8, (a0) -# CHECK-INST: vlbuff.v v8, (a0) +vle8ff.v v8, (a0) +# CHECK-INST: vle8ff.v v8, (a0) # CHECK-ENCODING: [0x07,0x04,0x05,0x03] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 04 05 03 -vlhuff.v v8, (a0), v0.t -# CHECK-INST: vlhuff.v v8, (a0), v0.t +vle16ff.v v8, (a0), v0.t +# CHECK-INST: vle16ff.v v8, (a0), v0.t # CHECK-ENCODING: [0x07,0x54,0x05,0x01] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 54 05 01 -vlhuff.v v8, (a0) -# CHECK-INST: vlhuff.v v8, (a0) +vle16ff.v v8, (a0) +# CHECK-INST: vle16ff.v v8, (a0) # CHECK-ENCODING: [0x07,0x54,0x05,0x03] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 54 05 03 -vlwuff.v v8, (a0), v0.t -# CHECK-INST: vlwuff.v v8, (a0), v0.t +vle32ff.v v8, (a0), v0.t +# CHECK-INST: vle32ff.v v8, (a0), v0.t # CHECK-ENCODING: [0x07,0x64,0x05,0x01] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 64 05 01 -vlwuff.v v8, (a0) -# CHECK-INST: vlwuff.v v8, (a0) +vle32ff.v v8, (a0) +# CHECK-INST: vle32ff.v v8, (a0) # CHECK-ENCODING: [0x07,0x64,0x05,0x03] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 64 05 03 -vleff.v v8, (a0), v0.t -# CHECK-INST: vleff.v v8, (a0), v0.t +vle64ff.v v8, (a0), v0.t +# CHECK-INST: vle64ff.v v8, (a0), v0.t # CHECK-ENCODING: [0x07,0x74,0x05,0x01] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 74 05 01 -vleff.v v8, (a0) -# CHECK-INST: vleff.v v8, (a0) +vle64ff.v v8, (a0) +# CHECK-INST: vle64ff.v v8, (a0) # CHECK-ENCODING: [0x07,0x74,0x05,0x03] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 74 05 03 -vlsb.v v8, (a0), a1, v0.t -# CHECK-INST: vlsb.v v8, (a0), a1, v0.t -# CHECK-ENCODING: [0x07,0x04,0xb5,0x18] +vle128ff.v v8, (a0), v0.t +# CHECK-INST: vle128ff.v v8, (a0), v0.t +# CHECK-ENCODING: [0x07,0x04,0x05,0x11] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 b5 18 +# CHECK-UNKNOWN: 07 04 05 11 -vlsb.v v8, (a0), a1 -# CHECK-INST: vlsb.v v8, (a0), a1 -# CHECK-ENCODING: [0x07,0x04,0xb5,0x1a] +vle128ff.v v8, (a0) +# CHECK-INST: vle128ff.v v8, (a0) +# CHECK-ENCODING: [0x07,0x04,0x05,0x13] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 b5 1a +# CHECK-UNKNOWN: 07 04 05 13 -vlsh.v v8, (a0), a1, v0.t -# CHECK-INST: vlsh.v v8, (a0), a1, v0.t -# CHECK-ENCODING: [0x07,0x54,0xb5,0x18] +vle256ff.v v8, (a0), v0.t +# CHECK-INST: vle256ff.v v8, (a0), v0.t +# CHECK-ENCODING: [0x07,0x54,0x05,0x11] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 b5 18 +# CHECK-UNKNOWN: 07 54 05 11 -vlsh.v v8, (a0), a1 -# CHECK-INST: vlsh.v v8, (a0), a1 -# CHECK-ENCODING: [0x07,0x54,0xb5,0x1a] +vle256ff.v v8, (a0) +# CHECK-INST: vle256ff.v v8, (a0) +# CHECK-ENCODING: [0x07,0x54,0x05,0x13] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 b5 1a +# CHECK-UNKNOWN: 07 54 05 13 -vlsw.v v8, (a0), a1, v0.t -# CHECK-INST: vlsw.v v8, (a0), a1, v0.t -# CHECK-ENCODING: [0x07,0x64,0xb5,0x18] +vle512ff.v v8, (a0), v0.t +# CHECK-INST: vle512ff.v v8, (a0), v0.t +# CHECK-ENCODING: [0x07,0x64,0x05,0x11] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 b5 18 +# CHECK-UNKNOWN: 07 64 05 11 -vlsw.v v8, (a0), a1 -# CHECK-INST: vlsw.v v8, (a0), a1 -# CHECK-ENCODING: [0x07,0x64,0xb5,0x1a] +vle512ff.v v8, (a0) +# CHECK-INST: vle512ff.v v8, (a0) +# CHECK-ENCODING: [0x07,0x64,0x05,0x13] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 b5 1a +# CHECK-UNKNOWN: 07 64 05 13 + +vle1024ff.v v8, (a0), v0.t +# CHECK-INST: vle1024ff.v v8, (a0), v0.t +# CHECK-ENCODING: [0x07,0x74,0x05,0x11] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 74 05 11 + +vle1024ff.v v8, (a0) +# CHECK-INST: vle1024ff.v v8, (a0) +# CHECK-ENCODING: [0x07,0x74,0x05,0x13] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 74 05 13 -vlsbu.v v8, (a0), a1, v0.t -# CHECK-INST: vlsbu.v v8, (a0), a1, v0.t +vlse8.v v8, (a0), a1, v0.t +# CHECK-INST: vlse8.v v8, (a0), a1, v0.t # CHECK-ENCODING: [0x07,0x04,0xb5,0x08] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 04 b5 08 -vlsbu.v v8, (a0), a1 -# CHECK-INST: vlsbu.v v8, (a0), a1 +vlse8.v v8, (a0), a1 +# CHECK-INST: vlse8.v v8, (a0), a1 # CHECK-ENCODING: [0x07,0x04,0xb5,0x0a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 04 b5 0a -vlshu.v v8, (a0), a1, v0.t -# CHECK-INST: vlshu.v v8, (a0), a1, v0.t +vlse16.v v8, (a0), a1, v0.t +# CHECK-INST: vlse16.v v8, (a0), a1, v0.t # CHECK-ENCODING: [0x07,0x54,0xb5,0x08] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 54 b5 08 -vlshu.v v8, (a0), a1 -# CHECK-INST: vlshu.v v8, (a0), a1 +vlse16.v v8, (a0), a1 +# CHECK-INST: vlse16.v v8, (a0), a1 # CHECK-ENCODING: [0x07,0x54,0xb5,0x0a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 54 b5 0a -vlswu.v v8, (a0), a1, v0.t -# CHECK-INST: vlswu.v v8, (a0), a1, v0.t +vlse32.v v8, (a0), a1, v0.t +# CHECK-INST: vlse32.v v8, (a0), a1, v0.t # CHECK-ENCODING: [0x07,0x64,0xb5,0x08] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 64 b5 08 -vlswu.v v8, (a0), a1 -# CHECK-INST: vlswu.v v8, (a0), a1 +vlse32.v v8, (a0), a1 +# CHECK-INST: vlse32.v v8, (a0), a1 # CHECK-ENCODING: [0x07,0x64,0xb5,0x0a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 64 b5 0a -vlse.v v8, (a0), a1, v0.t -# CHECK-INST: vlse.v v8, (a0), a1, v0.t +vlse64.v v8, (a0), a1, v0.t +# CHECK-INST: vlse64.v v8, (a0), a1, v0.t # CHECK-ENCODING: [0x07,0x74,0xb5,0x08] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 74 b5 08 -vlse.v v8, (a0), a1 -# CHECK-INST: vlse.v v8, (a0), a1 +vlse64.v v8, (a0), a1 +# CHECK-INST: vlse64.v v8, (a0), a1 # CHECK-ENCODING: [0x07,0x74,0xb5,0x0a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 74 b5 0a -vlxb.v v8, (a0), v4, v0.t -# CHECK-INST: vlxb.v v8, (a0), v4, v0.t -# CHECK-ENCODING: [0x07,0x04,0x45,0x1c] +vlse128.v v8, (a0), a1, v0.t +# CHECK-INST: vlse128.v v8, (a0), a1, v0.t +# CHECK-ENCODING: [0x07,0x04,0xb5,0x18] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 45 1c +# CHECK-UNKNOWN: 07 04 b5 18 -vlxb.v v8, (a0), v4 -# CHECK-INST: vlxb.v v8, (a0), v4 -# CHECK-ENCODING: [0x07,0x04,0x45,0x1e] +vlse128.v v8, (a0), a1 +# CHECK-INST: vlse128.v v8, (a0), a1 +# CHECK-ENCODING: [0x07,0x04,0xb5,0x1a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 04 45 1e +# CHECK-UNKNOWN: 07 04 b5 1a -vlxh.v v8, (a0), v4, v0.t -# CHECK-INST: vlxh.v v8, (a0), v4, v0.t -# CHECK-ENCODING: [0x07,0x54,0x45,0x1c] +vlse256.v v8, (a0), a1, v0.t +# CHECK-INST: vlse256.v v8, (a0), a1, v0.t +# CHECK-ENCODING: [0x07,0x54,0xb5,0x18] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 45 1c +# CHECK-UNKNOWN: 07 54 b5 18 -vlxh.v v8, (a0), v4 -# CHECK-INST: vlxh.v v8, (a0), v4 -# CHECK-ENCODING: [0x07,0x54,0x45,0x1e] +vlse256.v v8, (a0), a1 +# CHECK-INST: vlse256.v v8, (a0), a1 +# CHECK-ENCODING: [0x07,0x54,0xb5,0x1a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 54 45 1e +# CHECK-UNKNOWN: 07 54 b5 1a -vlxw.v v8, (a0), v4, v0.t -# CHECK-INST: vlxw.v v8, (a0), v4, v0.t -# CHECK-ENCODING: [0x07,0x64,0x45,0x1c] +vlse512.v v8, (a0), a1, v0.t +# CHECK-INST: vlse512.v v8, (a0), a1, v0.t +# CHECK-ENCODING: [0x07,0x64,0xb5,0x18] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 45 1c +# CHECK-UNKNOWN: 07 64 b5 18 -vlxw.v v8, (a0), v4 -# CHECK-INST: vlxw.v v8, (a0), v4 -# CHECK-ENCODING: [0x07,0x64,0x45,0x1e] +vlse512.v v8, (a0), a1 +# CHECK-INST: vlse512.v v8, (a0), a1 +# CHECK-ENCODING: [0x07,0x64,0xb5,0x1a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 64 45 1e +# CHECK-UNKNOWN: 07 64 b5 1a + +vlse1024.v v8, (a0), a1, v0.t +# CHECK-INST: vlse1024.v v8, (a0), a1, v0.t +# CHECK-ENCODING: [0x07,0x74,0xb5,0x18] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 74 b5 18 -vlxbu.v v8, (a0), v4, v0.t -# CHECK-INST: vlxbu.v v8, (a0), v4, v0.t +vlse1024.v v8, (a0), a1 +# CHECK-INST: vlse1024.v v8, (a0), a1 +# CHECK-ENCODING: [0x07,0x74,0xb5,0x1a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 74 b5 1a + +vlxei8.v v8, (a0), v4, v0.t +# CHECK-INST: vlxei8.v v8, (a0), v4, v0.t # CHECK-ENCODING: [0x07,0x04,0x45,0x0c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 04 45 0c -vlxbu.v v8, (a0), v4 -# CHECK-INST: vlxbu.v v8, (a0), v4 +vlxei8.v v8, (a0), v4 +# CHECK-INST: vlxei8.v v8, (a0), v4 # CHECK-ENCODING: [0x07,0x04,0x45,0x0e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 04 45 0e -vlxhu.v v8, (a0), v4, v0.t -# CHECK-INST: vlxhu.v v8, (a0), v4, v0.t +vlxei16.v v8, (a0), v4, v0.t +# CHECK-INST: vlxei16.v v8, (a0), v4, v0.t # CHECK-ENCODING: [0x07,0x54,0x45,0x0c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 54 45 0c -vlxhu.v v8, (a0), v4 -# CHECK-INST: vlxhu.v v8, (a0), v4 +vlxei16.v v8, (a0), v4 +# CHECK-INST: vlxei16.v v8, (a0), v4 # CHECK-ENCODING: [0x07,0x54,0x45,0x0e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 54 45 0e -vlxwu.v v8, (a0), v4, v0.t -# CHECK-INST: vlxwu.v v8, (a0), v4, v0.t +vlxei32.v v8, (a0), v4, v0.t +# CHECK-INST: vlxei32.v v8, (a0), v4, v0.t # CHECK-ENCODING: [0x07,0x64,0x45,0x0c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 64 45 0c -vlxwu.v v8, (a0), v4 -# CHECK-INST: vlxwu.v v8, (a0), v4 +vlxei32.v v8, (a0), v4 +# CHECK-INST: vlxei32.v v8, (a0), v4 # CHECK-ENCODING: [0x07,0x64,0x45,0x0e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 64 45 0e -vlxe.v v8, (a0), v4, v0.t -# CHECK-INST: vlxe.v v8, (a0), v4, v0.t +vlxei64.v v8, (a0), v4, v0.t +# CHECK-INST: vlxei64.v v8, (a0), v4, v0.t # CHECK-ENCODING: [0x07,0x74,0x45,0x0c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 74 45 0c -vlxe.v v8, (a0), v4 -# CHECK-INST: vlxe.v v8, (a0), v4 +vlxei64.v v8, (a0), v4 +# CHECK-INST: vlxei64.v v8, (a0), v4 # CHECK-ENCODING: [0x07,0x74,0x45,0x0e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 07 74 45 0e +vlxei128.v v8, (a0), v4, v0.t +# CHECK-INST: vlxei128.v v8, (a0), v4, v0.t +# CHECK-ENCODING: [0x07,0x04,0x45,0x1c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 04 45 1c + +vlxei128.v v8, (a0), v4 +# CHECK-INST: vlxei128.v v8, (a0), v4 +# CHECK-ENCODING: [0x07,0x04,0x45,0x1e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 04 45 1e + +vlxei256.v v8, (a0), v4, v0.t +# CHECK-INST: vlxei256.v v8, (a0), v4, v0.t +# CHECK-ENCODING: [0x07,0x54,0x45,0x1c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 54 45 1c + +vlxei256.v v8, (a0), v4 +# CHECK-INST: vlxei256.v v8, (a0), v4 +# CHECK-ENCODING: [0x07,0x54,0x45,0x1e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 54 45 1e + +vlxei512.v v8, (a0), v4, v0.t +# CHECK-INST: vlxei512.v v8, (a0), v4, v0.t +# CHECK-ENCODING: [0x07,0x64,0x45,0x1c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 64 45 1c + +vlxei512.v v8, (a0), v4 +# CHECK-INST: vlxei512.v v8, (a0), v4 +# CHECK-ENCODING: [0x07,0x64,0x45,0x1e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 64 45 1e + +vlxei1024.v v8, (a0), v4, v0.t +# CHECK-INST: vlxei1024.v v8, (a0), v4, v0.t +# CHECK-ENCODING: [0x07,0x74,0x45,0x1c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 74 45 1c + +vlxei1024.v v8, (a0), v4 +# CHECK-INST: vlxei1024.v v8, (a0), v4 +# CHECK-ENCODING: [0x07,0x74,0x45,0x1e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 07 74 45 1e + vl1r.v v8, (a0) # CHECK-INST: vl1r.v v8, (a0) -# CHECK-ENCODING: [0x07,0x74,0x85,0x02] +# CHECK-ENCODING: [0x07,0x04,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 07 74 85 02 +# CHECK-UNKNOWN: 07 04 85 02 diff --git a/llvm/test/MC/RISCV/rvv/mask.s b/llvm/test/MC/RISCV/rvv/mask.s index d2157bc215bf3..ef029388da9c3 100644 --- a/llvm/test/MC/RISCV/rvv/mask.s +++ b/llvm/test/MC/RISCV/rvv/mask.s @@ -140,8 +140,8 @@ vid.v v8 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 a4 08 52 -vmcpy.m v8, v4 -# CHECK-INST: vmcpy.m v8, v4 +vmmv.m v8, v4 +# CHECK-INST: vmmv.m v8, v4 # CHECK-ENCODING: [0x57,0x24,0x42,0x66] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 24 42 66 diff --git a/llvm/test/MC/RISCV/rvv/reduction.s b/llvm/test/MC/RISCV/rvv/reduction.s index dc5adec0a5668..7599c603363f4 100644 --- a/llvm/test/MC/RISCV/rvv/reduction.s +++ b/llvm/test/MC/RISCV/rvv/reduction.s @@ -127,3 +127,9 @@ vwredsum.vs v8, v4, v20 # CHECK-ENCODING: [0x57,0x04,0x4a,0xc6] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 04 4a c6 + +vredsum.vs v0, v4, v20, v0.t +# CHECK-INST: vredsum.vs v0, v4, v20, v0.t +# CHECK-ENCODING: [0x57,0x20,0x4a,0x00] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 20 4a 00 diff --git a/llvm/test/MC/RISCV/rvv/snippet.s b/llvm/test/MC/RISCV/rvv/snippet.s index fd38631c4ba7e..94f6556ea10c5 100644 --- a/llvm/test/MC/RISCV/rvv/snippet.s +++ b/llvm/test/MC/RISCV/rvv/snippet.s @@ -5,28 +5,28 @@ # RUN: | FileCheck %s --check-prefix=CHECK-INST loop: - vsetvli a3, a0, e16,m4 # vtype = 16-bit integer vectors -# CHECK-INST: d7 76 65 00 vsetvli a3, a0, e16,m4 - vlh.v v4, (a1) # Get 16b vector -# CHECK-INST: 07 d2 05 12 vlh.v v4, (a1) - slli t1, a3, 1 # Multiply length by two bytes/element + vsetvli a3, a0, e16,m4,ta,ma # vtype = 16-bit integer vectors +# CHECK-INST: d7 76 65 0c vsetvli a3, a0, e16,m4,ta,ma + vle16.v v4, (a1) # Get 16b vector +# CHECK-INST: 07 d2 05 02 vle16.v v4, (a1) + slli t1, a3, 1 # Multiply length by two bytes/element # CHECK-INST: 13 93 16 00 slli t1, a3, 1 - add a1, a1, t1 # Bump pointer + add a1, a1, t1 # Bump pointer # CHECK-INST: b3 85 65 00 add a1, a1, t1 - vwmul.vx v8, v4, x10 # 32b in + vwmul.vx v8, v4, x10 # 32b in # CHECK-INST: 57 64 45 ee vwmul.vx v8, v4, a0 - vsetvli x0, a0, e32,m8 # Operate on 32b values -# CHECK-INST: 57 70 b5 00 vsetvli zero, a0, e32,m8 + vsetvli x0, a0, e32,m8,ta,ma # Operate on 32b values +# CHECK-INST: 57 70 b5 0c vsetvli zero, a0, e32,m8,ta,ma vsrl.vi v8, v8, 3 # CHECK-INST: 57 b4 81 a2 vsrl.vi v8, v8, 3 - vsw.v v8, (a2) # Store vector of 32b -# CHECK-INST: 27 64 06 02 vsw.v v8, (a2) - slli t1, a3, 2 # Multiply length by four bytes/element + vse32.v v8, (a2) # Store vector of 32b +# CHECK-INST: 27 64 06 02 vse32.v v8, (a2) + slli t1, a3, 2 # Multiply length by four bytes/element # CHECK-INST: 13 93 26 00 slli t1, a3, 2 - add a2, a2, t1 # Bump pointer + add a2, a2, t1 # Bump pointer # CHECK-INST: 33 06 66 00 add a2, a2, t1 - sub a0, a0, a3 # Decrement count + sub a0, a0, a3 # Decrement count # CHECK-INST: 33 05 d5 40 sub a0, a0, a3 - bnez a0, loop # Any more? + bnez a0, loop # Any more? # CHECK-INST: e3 1a 05 fc bnez a0, -44 diff --git a/llvm/test/MC/RISCV/rvv/store.s b/llvm/test/MC/RISCV/rvv/store.s index 9d25b593ef824..75dacb9d19e5d 100644 --- a/llvm/test/MC/RISCV/rvv/store.s +++ b/llvm/test/MC/RISCV/rvv/store.s @@ -8,200 +8,296 @@ # RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN -vsb.v v24, (a0), v0.t -# CHECK-INST: vsb.v v24, (a0), v0.t +vse8.v v24, (a0), v0.t +# CHECK-INST: vse8.v v24, (a0), v0.t # CHECK-ENCODING: [0x27,0x0c,0x05,0x00] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 0c 05 00 -vsb.v v24, (a0) -# CHECK-INST: vsb.v v24, (a0) +vse8.v v24, (a0) +# CHECK-INST: vse8.v v24, (a0) # CHECK-ENCODING: [0x27,0x0c,0x05,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 0c 05 02 -vsh.v v24, (a0), v0.t -# CHECK-INST: vsh.v v24, (a0), v0.t +vse16.v v24, (a0), v0.t +# CHECK-INST: vse16.v v24, (a0), v0.t # CHECK-ENCODING: [0x27,0x5c,0x05,0x00] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 5c 05 00 -vsh.v v24, (a0) -# CHECK-INST: vsh.v v24, (a0) +vse16.v v24, (a0) +# CHECK-INST: vse16.v v24, (a0) # CHECK-ENCODING: [0x27,0x5c,0x05,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 5c 05 02 -vsw.v v24, (a0), v0.t -# CHECK-INST: vsw.v v24, (a0), v0.t +vse32.v v24, (a0), v0.t +# CHECK-INST: vse32.v v24, (a0), v0.t # CHECK-ENCODING: [0x27,0x6c,0x05,0x00] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 6c 05 00 -vsw.v v24, (a0) -# CHECK-INST: vsw.v v24, (a0) +vse32.v v24, (a0) +# CHECK-INST: vse32.v v24, (a0) # CHECK-ENCODING: [0x27,0x6c,0x05,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 6c 05 02 -vse.v v24, (a0), v0.t -# CHECK-INST: vse.v v24, (a0), v0.t +vse64.v v24, (a0), v0.t +# CHECK-INST: vse64.v v24, (a0), v0.t # CHECK-ENCODING: [0x27,0x7c,0x05,0x00] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 7c 05 00 -vse.v v24, (a0) -# CHECK-INST: vse.v v24, (a0) +vse64.v v24, (a0) +# CHECK-INST: vse64.v v24, (a0) # CHECK-ENCODING: [0x27,0x7c,0x05,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 7c 05 02 -vssb.v v24, (a0), a1, v0.t -# CHECK-INST: vssb.v v24, (a0), a1, v0.t +vse128.v v24, (a0), v0.t +# CHECK-INST: vse128.v v24, (a0), v0.t +# CHECK-ENCODING: [0x27,0x0c,0x05,0x10] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 0c 05 10 + +vse128.v v24, (a0) +# CHECK-INST: vse128.v v24, (a0) +# CHECK-ENCODING: [0x27,0x0c,0x05,0x12] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 0c 05 12 + +vse256.v v24, (a0), v0.t +# CHECK-INST: vse256.v v24, (a0), v0.t +# CHECK-ENCODING: [0x27,0x5c,0x05,0x10] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 5c 05 10 + +vse256.v v24, (a0) +# CHECK-INST: vse256.v v24, (a0) +# CHECK-ENCODING: [0x27,0x5c,0x05,0x12] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 5c 05 12 + +vse512.v v24, (a0), v0.t +# CHECK-INST: vse512.v v24, (a0), v0.t +# CHECK-ENCODING: [0x27,0x6c,0x05,0x10] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 6c 05 10 + +vse512.v v24, (a0) +# CHECK-INST: vse512.v v24, (a0) +# CHECK-ENCODING: [0x27,0x6c,0x05,0x12] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 6c 05 12 + +vse1024.v v24, (a0), v0.t +# CHECK-INST: vse1024.v v24, (a0), v0.t +# CHECK-ENCODING: [0x27,0x7c,0x05,0x10] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 7c 05 10 + +vse1024.v v24, (a0) +# CHECK-INST: vse1024.v v24, (a0) +# CHECK-ENCODING: [0x27,0x7c,0x05,0x12] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 7c 05 12 + +vsse8.v v24, (a0), a1, v0.t +# CHECK-INST: vsse8.v v24, (a0), a1, v0.t # CHECK-ENCODING: [0x27,0x0c,0xb5,0x08] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 0c b5 08 -vssb.v v24, (a0), a1 -# CHECK-INST: vssb.v v24, (a0), a1 +vsse8.v v24, (a0), a1 +# CHECK-INST: vsse8.v v24, (a0), a1 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x0a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 0c b5 0a -vssh.v v24, (a0), a1, v0.t -# CHECK-INST: vssh.v v24, (a0), a1, v0.t +vsse16.v v24, (a0), a1, v0.t +# CHECK-INST: vsse16.v v24, (a0), a1, v0.t # CHECK-ENCODING: [0x27,0x5c,0xb5,0x08] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 5c b5 08 -vssh.v v24, (a0), a1 -# CHECK-INST: vssh.v v24, (a0), a1 +vsse16.v v24, (a0), a1 +# CHECK-INST: vsse16.v v24, (a0), a1 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x0a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 5c b5 0a -vssw.v v24, (a0), a1, v0.t -# CHECK-INST: vssw.v v24, (a0), a1, v0.t +vsse32.v v24, (a0), a1, v0.t +# CHECK-INST: vsse32.v v24, (a0), a1, v0.t # CHECK-ENCODING: [0x27,0x6c,0xb5,0x08] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 6c b5 08 -vssw.v v24, (a0), a1 -# CHECK-INST: vssw.v v24, (a0), a1 +vsse32.v v24, (a0), a1 +# CHECK-INST: vsse32.v v24, (a0), a1 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x0a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 6c b5 0a -vsse.v v24, (a0), a1, v0.t -# CHECK-INST: vsse.v v24, (a0), a1, v0.t +vsse64.v v24, (a0), a1, v0.t +# CHECK-INST: vsse64.v v24, (a0), a1, v0.t # CHECK-ENCODING: [0x27,0x7c,0xb5,0x08] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 7c b5 08 -vsse.v v24, (a0), a1 -# CHECK-INST: vsse.v v24, (a0), a1 +vsse64.v v24, (a0), a1 +# CHECK-INST: vsse64.v v24, (a0), a1 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x0a] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 7c b5 0a -vsxb.v v24, (a0), v4, v0.t -# CHECK-INST: vsxb.v v24, (a0), v4, v0.t +vsse128.v v24, (a0), a1, v0.t +# CHECK-INST: vsse128.v v24, (a0), a1, v0.t +# CHECK-ENCODING: [0x27,0x0c,0xb5,0x18] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 0c b5 18 + +vsse128.v v24, (a0), a1 +# CHECK-INST: vsse128.v v24, (a0), a1 +# CHECK-ENCODING: [0x27,0x0c,0xb5,0x1a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 0c b5 1a + +vsse256.v v24, (a0), a1, v0.t +# CHECK-INST: vsse256.v v24, (a0), a1, v0.t +# CHECK-ENCODING: [0x27,0x5c,0xb5,0x18] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 5c b5 18 + +vsse256.v v24, (a0), a1 +# CHECK-INST: vsse256.v v24, (a0), a1 +# CHECK-ENCODING: [0x27,0x5c,0xb5,0x1a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 5c b5 1a + +vsse512.v v24, (a0), a1, v0.t +# CHECK-INST: vsse512.v v24, (a0), a1, v0.t +# CHECK-ENCODING: [0x27,0x6c,0xb5,0x18] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 6c b5 18 + +vsse512.v v24, (a0), a1 +# CHECK-INST: vsse512.v v24, (a0), a1 +# CHECK-ENCODING: [0x27,0x6c,0xb5,0x1a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 6c b5 1a + +vsse1024.v v24, (a0), a1, v0.t +# CHECK-INST: vsse1024.v v24, (a0), a1, v0.t +# CHECK-ENCODING: [0x27,0x7c,0xb5,0x18] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 7c b5 18 + +vsse1024.v v24, (a0), a1 +# CHECK-INST: vsse1024.v v24, (a0), a1 +# CHECK-ENCODING: [0x27,0x7c,0xb5,0x1a] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 27 7c b5 1a + +vsxei8.v v24, (a0), v4, v0.t +# CHECK-INST: vsxei8.v v24, (a0), v4, v0.t # CHECK-ENCODING: [0x27,0x0c,0x45,0x0c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 0c 45 0c -vsxb.v v24, (a0), v4 -# CHECK-INST: vsxb.v v24, (a0), v4 +vsxei8.v v24, (a0), v4 +# CHECK-INST: vsxei8.v v24, (a0), v4 # CHECK-ENCODING: [0x27,0x0c,0x45,0x0e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 0c 45 0e -vsxh.v v24, (a0), v4, v0.t -# CHECK-INST: vsxh.v v24, (a0), v4, v0.t +vsxei16.v v24, (a0), v4, v0.t +# CHECK-INST: vsxei16.v v24, (a0), v4, v0.t # CHECK-ENCODING: [0x27,0x5c,0x45,0x0c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 5c 45 0c -vsxh.v v24, (a0), v4 -# CHECK-INST: vsxh.v v24, (a0), v4 +vsxei16.v v24, (a0), v4 +# CHECK-INST: vsxei16.v v24, (a0), v4 # CHECK-ENCODING: [0x27,0x5c,0x45,0x0e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 5c 45 0e -vsxw.v v24, (a0), v4, v0.t -# CHECK-INST: vsxw.v v24, (a0), v4, v0.t +vsxei32.v v24, (a0), v4, v0.t +# CHECK-INST: vsxei32.v v24, (a0), v4, v0.t # CHECK-ENCODING: [0x27,0x6c,0x45,0x0c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 6c 45 0c -vsxw.v v24, (a0), v4 -# CHECK-INST: vsxw.v v24, (a0), v4 +vsxei32.v v24, (a0), v4 +# CHECK-INST: vsxei32.v v24, (a0), v4 # CHECK-ENCODING: [0x27,0x6c,0x45,0x0e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 6c 45 0e -vsxe.v v24, (a0), v4, v0.t -# CHECK-INST: vsxe.v v24, (a0), v4, v0.t +vsxei64.v v24, (a0), v4, v0.t +# CHECK-INST: vsxei64.v v24, (a0), v4, v0.t # CHECK-ENCODING: [0x27,0x7c,0x45,0x0c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 7c 45 0c -vsxe.v v24, (a0), v4 -# CHECK-INST: vsxe.v v24, (a0), v4 +vsxei64.v v24, (a0), v4 +# CHECK-INST: vsxei64.v v24, (a0), v4 # CHECK-ENCODING: [0x27,0x7c,0x45,0x0e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 7c 45 0e -vsuxb.v v24, (a0), v4, v0.t -# CHECK-INST: vsuxb.v v24, (a0), v4, v0.t +vsxei128.v v24, (a0), v4, v0.t +# CHECK-INST: vsxei128.v v24, (a0), v4, v0.t # CHECK-ENCODING: [0x27,0x0c,0x45,0x1c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 0c 45 1c -vsuxb.v v24, (a0), v4 -# CHECK-INST: vsuxb.v v24, (a0), v4 +vsxei128.v v24, (a0), v4 +# CHECK-INST: vsxei128.v v24, (a0), v4 # CHECK-ENCODING: [0x27,0x0c,0x45,0x1e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 0c 45 1e -vsuxh.v v24, (a0), v4, v0.t -# CHECK-INST: vsuxh.v v24, (a0), v4, v0.t +vsxei256.v v24, (a0), v4, v0.t +# CHECK-INST: vsxei256.v v24, (a0), v4, v0.t # CHECK-ENCODING: [0x27,0x5c,0x45,0x1c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 5c 45 1c -vsuxh.v v24, (a0), v4 -# CHECK-INST: vsuxh.v v24, (a0), v4 +vsxei256.v v24, (a0), v4 +# CHECK-INST: vsxei256.v v24, (a0), v4 # CHECK-ENCODING: [0x27,0x5c,0x45,0x1e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 5c 45 1e -vsuxw.v v24, (a0), v4, v0.t -# CHECK-INST: vsuxw.v v24, (a0), v4, v0.t +vsxei512.v v24, (a0), v4, v0.t +# CHECK-INST: vsxei512.v v24, (a0), v4, v0.t # CHECK-ENCODING: [0x27,0x6c,0x45,0x1c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 6c 45 1c -vsuxw.v v24, (a0), v4 -# CHECK-INST: vsuxw.v v24, (a0), v4 +vsxei512.v v24, (a0), v4 +# CHECK-INST: vsxei512.v v24, (a0), v4 # CHECK-ENCODING: [0x27,0x6c,0x45,0x1e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 6c 45 1e -vsuxe.v v24, (a0), v4, v0.t -# CHECK-INST: vsuxe.v v24, (a0), v4, v0.t +vsxei1024.v v24, (a0), v4, v0.t +# CHECK-INST: vsxei1024.v v24, (a0), v4, v0.t # CHECK-ENCODING: [0x27,0x7c,0x45,0x1c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 7c 45 1c -vsuxe.v v24, (a0), v4 -# CHECK-INST: vsuxe.v v24, (a0), v4 +vsxei1024.v v24, (a0), v4 +# CHECK-INST: vsxei1024.v v24, (a0), v4 # CHECK-ENCODING: [0x27,0x7c,0x45,0x1e] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 27 7c 45 1e vs1r.v v24, (a0) # CHECK-INST: vs1r.v v24, (a0) -# CHECK-ENCODING: [0x27,0x7c,0x85,0x02] +# CHECK-ENCODING: [0x27,0x0c,0x85,0x02] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 27 7c 85 02 +# CHECK-UNKNOWN: 27 0c 85 02 diff --git a/llvm/test/MC/RISCV/rvv/vsetvl.s b/llvm/test/MC/RISCV/rvv/vsetvl.s index 784533445f0c6..351d3febdf039 100644 --- a/llvm/test/MC/RISCV/rvv/vsetvl.s +++ b/llvm/test/MC/RISCV/rvv/vsetvl.s @@ -8,11 +8,71 @@ # RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \ # RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN -vsetvli a2, a0, e32,m4 -# CHECK-INST: vsetvli a2, a0, e32,m4 -# CHECK-ENCODING: [0x57,0x76,0xa5,0x00] +vsetvli a2, a0, e32,m1,ta,ma +# CHECK-INST: vsetvli a2, a0, e32,m1,ta,ma +# CHECK-ENCODING: [0x57,0x76,0x85,0x0c] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) -# CHECK-UNKNOWN: 57 76 a5 00 +# CHECK-UNKNOWN: 57 76 85 0c + +vsetvli a2, a0, e32,m2,ta,ma +# CHECK-INST: vsetvli a2, a0, e32,m2,ta,ma +# CHECK-ENCODING: [0x57,0x76,0x95,0x0c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 95 0c + +vsetvli a2, a0, e32,m4,ta,ma +# CHECK-INST: vsetvli a2, a0, e32,m4,ta,ma +# CHECK-ENCODING: [0x57,0x76,0xa5,0x0c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 a5 0c + +vsetvli a2, a0, e32,m8,ta,ma +# CHECK-INST: vsetvli a2, a0, e32,m8,ta,ma +# CHECK-ENCODING: [0x57,0x76,0xb5,0x0c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 b5 0c + +vsetvli a2, a0, e32,mf2,ta,ma +# CHECK-INST: vsetvli a2, a0, e32,mf2,ta,ma +# CHECK-ENCODING: [0x57,0x76,0xb5,0x0e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 b5 0e + +vsetvli a2, a0, e32,mf4,ta,ma +# CHECK-INST: vsetvli a2, a0, e32,mf4,ta,ma +# CHECK-ENCODING: [0x57,0x76,0xa5,0x0e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 a5 0e + +vsetvli a2, a0, e32,mf8,ta,ma +# CHECK-INST: vsetvli a2, a0, e32,mf8,ta,ma +# CHECK-ENCODING: [0x57,0x76,0x95,0x0e] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 95 0e + +vsetvli a2, a0, e32,m1,ta,ma +# CHECK-INST: vsetvli a2, a0, e32,m1,ta,ma +# CHECK-ENCODING: [0x57,0x76,0x85,0x0c] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 85 0c + +vsetvli a2, a0, e32,m1,tu,ma +# CHECK-INST: vsetvli a2, a0, e32,m1,tu,ma +# CHECK-ENCODING: [0x57,0x76,0x85,0x08] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 85 08 + +vsetvli a2, a0, e32,m1,ta,mu +# CHECK-INST: vsetvli a2, a0, e32,m1,ta,mu +# CHECK-ENCODING: [0x57,0x76,0x85,0x04] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 85 04 + +vsetvli a2, a0, e32,m1,tu,mu +# CHECK-INST: vsetvli a2, a0, e32,m1 +# CHECK-ENCODING: [0x57,0x76,0x85,0x00] +# CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) +# CHECK-UNKNOWN: 57 76 85 00 vsetvl a2, a0, a1 # CHECK-INST: vsetvl a2, a0, a1 From 721d93fc5aa8c9f9fc9b86a9d3d1a58c6790213e Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Fri, 5 Jun 2020 03:34:01 +0800 Subject: [PATCH 069/600] Support experimental v extension v0.9. Differential revision: https://reviews.llvm.org/D81213 --- clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 2 +- clang/test/Driver/riscv-arch.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index 09ae4538b3acc..7ca05a1f3a395 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -63,7 +63,7 @@ isExperimentalExtension(StringRef Ext) { Ext == "zbs" || Ext == "zbt" || Ext == "zbproposedc") return RISCVExtensionVersion{"0", "92"}; if (Ext == "v") - return RISCVExtensionVersion{"0", "8"}; + return RISCVExtensionVersion{"0", "9"}; return None; } diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index 725201a77ba7c..8b630b1846c97 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -380,6 +380,6 @@ // RV32-EXPERIMENTAL-V-BADVERS: error: invalid arch name 'rv32iv0p1' // RV32-EXPERIMENTAL-V-BADVERS: unsupported version number 0.1 for experimental extension -// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p8 -menable-experimental-extensions -### %s -c 2>&1 | \ +// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p9 -menable-experimental-extensions -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s // RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v" From ff756f5231cc2ee9457129404e78420fa2791c7b Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Fri, 31 Jul 2020 16:09:13 -0700 Subject: [PATCH 070/600] [compiler-rt][Darwin] Fix linker errors for check-asan A recent change broke `ninja check-asan` on Darwin by causing an error during linking of ASan unit tests [1]. Move the addition of `-ObjC` compiler flag outside of the new `if(COMPILER_RT_STANDALONE_BUILD)` block. It doesn't add any global flags (e.g, `${CMAKE_CXX_FLAGS}`) and the decision to add is based solely on source paths (`${source_rpath}`). [1] 8b2fcc42b895, https://reviews.llvm.org/D84466 Differential Revision: https://reviews.llvm.org/D85057 --- compiler-rt/cmake/Modules/CompilerRTCompile.cmake | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/compiler-rt/cmake/Modules/CompilerRTCompile.cmake b/compiler-rt/cmake/Modules/CompilerRTCompile.cmake index 3330038f80688..0b679dbf68fae 100644 --- a/compiler-rt/cmake/Modules/CompilerRTCompile.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTCompile.cmake @@ -73,7 +73,6 @@ function(clang_compile object_file source) if(COMPILER_RT_STANDALONE_BUILD) # Only add global flags in standalone build. string(REGEX MATCH "[.](cc|cpp)$" is_cxx ${source_rpath}) - string(REGEX MATCH "[.](m|mm)$" is_objc ${source_rpath}) if(is_cxx) string(REPLACE " " ";" global_flags "${CMAKE_CXX_FLAGS}") else() @@ -87,9 +86,6 @@ function(clang_compile object_file source) if (APPLE) set(global_flags ${OSX_SYSROOT_FLAG} ${global_flags}) endif() - if (is_objc) - list(APPEND global_flags -ObjC) - endif() # Ignore unknown warnings. CMAKE_CXX_FLAGS may contain GCC-specific options # which are not supported by Clang. @@ -98,6 +94,12 @@ function(clang_compile object_file source) else() set(compile_flags ${SOURCE_CFLAGS}) endif() + + string(REGEX MATCH "[.](m|mm)$" is_objc ${source_rpath}) + if (is_objc) + list(APPEND compile_flags "-ObjC") + endif() + add_custom_command( OUTPUT ${object_file} COMMAND ${COMPILER_RT_TEST_COMPILER} ${compile_flags} -c From 5954755939febabcf1edb52b53214f25f06ce584 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 31 Jul 2020 19:59:29 -0400 Subject: [PATCH 071/600] [libc] [obvious] Fix strchr and strrchr tests so that constness is actually verified. --- libc/test/src/string/strchr_test.cpp | 19 +++++++------------ libc/test/src/string/strrchr_test.cpp | 12 ++++-------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/libc/test/src/string/strchr_test.cpp b/libc/test/src/string/strchr_test.cpp index 37b3733857997..dda930c0c1552 100644 --- a/libc/test/src/string/strchr_test.cpp +++ b/libc/test/src/string/strchr_test.cpp @@ -11,42 +11,38 @@ TEST(StrChrTest, FindsFirstCharacter) { const char *src = "abcde"; - const char *src_copy = src; // Should return original string since 'a' is the first character. ASSERT_STREQ(__llvm_libc::strchr(src, 'a'), "abcde"); // Source string should not change. - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); } TEST(StrChrTest, FindsMiddleCharacter) { const char *src = "abcde"; - const char *src_copy = src; // Should return characters after (and including) 'c'. ASSERT_STREQ(__llvm_libc::strchr(src, 'c'), "cde"); // Source string should not change. - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); } TEST(StrChrTest, FindsLastCharacterThatIsNotNullTerminator) { const char *src = "abcde"; - const char *src_copy = src; // Should return 'e' and null-terminator. ASSERT_STREQ(__llvm_libc::strchr(src, 'e'), "e"); // Source string should not change. - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); } TEST(StrChrTest, FindsNullTerminator) { const char *src = "abcde"; - const char *src_copy = src; // Should return null terminator. ASSERT_STREQ(__llvm_libc::strchr(src, '\0'), ""); // Source string should not change. - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); } TEST(StrChrTest, CharacterNotWithinStringShouldReturnNullptr) { @@ -56,16 +52,15 @@ TEST(StrChrTest, CharacterNotWithinStringShouldReturnNullptr) { TEST(StrChrTest, TheSourceShouldNotChange) { const char *src = "abcde"; - const char *src_copy = src; // When the character is found, the source string should not change. __llvm_libc::strchr(src, 'd'); - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); // Same case for when the character is not found. __llvm_libc::strchr(src, 'z'); - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); // Same case for when looking for nullptr. __llvm_libc::strchr(src, '\0'); - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); } TEST(StrChrTest, ShouldFindFirstOfDuplicates) { diff --git a/libc/test/src/string/strrchr_test.cpp b/libc/test/src/string/strrchr_test.cpp index cf29de220d498..5ed83aa64bfbc 100644 --- a/libc/test/src/string/strrchr_test.cpp +++ b/libc/test/src/string/strrchr_test.cpp @@ -11,42 +11,38 @@ TEST(StrRChrTest, FindsFirstCharacter) { const char *src = "abcde"; - const char *src_copy = src; // Should return original string since 'a' is the first character. ASSERT_STREQ(__llvm_libc::strrchr(src, 'a'), "abcde"); // Source string should not change. - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); } TEST(StrRChrTest, FindsMiddleCharacter) { const char *src = "abcde"; - const char *src_copy = src; // Should return characters after (and including) 'c'. ASSERT_STREQ(__llvm_libc::strrchr(src, 'c'), "cde"); // Source string should not change. - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); } TEST(StrRChrTest, FindsLastCharacterThatIsNotNullTerminator) { const char *src = "abcde"; - const char *src_copy = src; // Should return 'e' and null-terminator. ASSERT_STREQ(__llvm_libc::strrchr(src, 'e'), "e"); // Source string should not change. - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); } TEST(StrRChrTest, FindsNullTerminator) { const char *src = "abcde"; - const char *src_copy = src; // Should return null terminator. ASSERT_STREQ(__llvm_libc::strrchr(src, '\0'), ""); // Source string should not change. - ASSERT_STREQ(src, src_copy); + ASSERT_STREQ(src, "abcde"); } TEST(StrRChrTest, FindsLastBehindFirstNullTerminator) { From 1b35c4fed29d6136ce241a692ce0a7165e59bf81 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Fri, 31 Jul 2020 20:14:34 -0400 Subject: [PATCH 072/600] [libc] [obvious] In strrchr, remove cast to unsigned char before comparison. --- libc/src/string/strrchr.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/libc/src/string/strrchr.cpp b/libc/src/string/strrchr.cpp index 28716c28a2664..374a802fbb9e2 100644 --- a/libc/src/string/strrchr.cpp +++ b/libc/src/string/strrchr.cpp @@ -13,16 +13,13 @@ namespace __llvm_libc { char *LLVM_LIBC_ENTRYPOINT(strrchr)(const char *src, int c) { - unsigned char *str = - const_cast(reinterpret_cast(src)); - const unsigned char ch = c; - - unsigned char *last_occurrence = nullptr; + const char ch = c; + char *last_occurrence = nullptr; do { - if (*str == ch) - last_occurrence = str; - } while (*str++); - return reinterpret_cast(last_occurrence); + if (*src == ch) + last_occurrence = const_cast(src); + } while (*src++); + return last_occurrence; } } // namespace __llvm_libc From 93c678a79b0a8914954d77973593ea36706db5d5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Jul 2020 17:07:20 -0700 Subject: [PATCH 073/600] [X86] Simplify vpternlog immediate selection. Rather than hardcoding immediate values for 12 different combinations in a nested pair of switches, we can perform the matched logic operation on 3 magic constants to calculate the immediate. Special thanks to this tweet https://twitter.com/rygorous/status/1187034321992871936 for making me realize I could do this. --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 49 +++++++++---------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 697c160f5bffd..3b333496dd748 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3974,39 +3974,26 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { SDValue B = FoldableOp.getOperand(0); SDValue C = FoldableOp.getOperand(1); - unsigned Opc1 = N->getOpcode(); - unsigned Opc2 = FoldableOp.getOpcode(); + // We can build the appropriate control immediate by performing the logic + // operation we're matching using these constants for A, B, and C. + const uint8_t TernlogMagicA = 0xf0; + const uint8_t TernlogMagicB = 0xcc; + const uint8_t TernlogMagicC = 0xaa; + + uint8_t Imm; + switch (FoldableOp.getOpcode()) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break; + case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break; + case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break; + case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break; + } - uint64_t Imm; - switch (Opc1) { + switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode!"); - case ISD::AND: - switch (Opc2) { - default: llvm_unreachable("Unexpected opcode!"); - case ISD::AND: Imm = 0x80; break; - case ISD::OR: Imm = 0xe0; break; - case ISD::XOR: Imm = 0x60; break; - case X86ISD::ANDNP: Imm = 0x20; break; - } - break; - case ISD::OR: - switch (Opc2) { - default: llvm_unreachable("Unexpected opcode!"); - case ISD::AND: Imm = 0xf8; break; - case ISD::OR: Imm = 0xfe; break; - case ISD::XOR: Imm = 0xf6; break; - case X86ISD::ANDNP: Imm = 0xf2; break; - } - break; - case ISD::XOR: - switch (Opc2) { - default: llvm_unreachable("Unexpected opcode!"); - case ISD::AND: Imm = 0x78; break; - case ISD::OR: Imm = 0x1e; break; - case ISD::XOR: Imm = 0x96; break; - case X86ISD::ANDNP: Imm = 0xd2; break; - } - break; + case ISD::AND: Imm &= TernlogMagicA; break; + case ISD::OR: Imm |= TernlogMagicA; break; + case ISD::XOR: Imm ^= TernlogMagicA; break; } auto tryFoldLoadOrBCast = From 234f51a65a45b79402996ac6f0abcbb5793814bf Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Fri, 31 Jul 2020 17:07:36 -0700 Subject: [PATCH 074/600] Don't crash if we deserialize a pack expansion type whose pattern contains no packs. Fixes a regression from 740a164dec483225cbd02ab6c82199e2747ffacb. --- clang/include/clang/AST/TypeProperties.td | 3 ++- clang/lib/AST/ASTImporter.cpp | 3 ++- clang/test/PCH/cxx-variadic-templates.cpp | 5 +++++ clang/test/PCH/cxx-variadic-templates.h | 5 +++++ clang/test/PCH/cxx1y-lambdas.mm | 4 ++++ clang/test/PCH/cxx2a-constraints.cpp | 5 ++++- 6 files changed, 22 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index 4540ea0e1952a..ed91670829b8b 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -722,7 +722,8 @@ let Class = PackExpansionType in { } def : Creator<[{ - return ctx.getPackExpansionType(pattern, numExpansions); + return ctx.getPackExpansionType(pattern, numExpansions, + /*ExpectPackInType*/false); }]>; } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 12dcd14c06bfa..ee6daf45b7c30 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1498,7 +1498,8 @@ ASTNodeImporter::VisitPackExpansionType(const PackExpansionType *T) { return ToPatternOrErr.takeError(); return Importer.getToContext().getPackExpansionType(*ToPatternOrErr, - T->getNumExpansions()); + T->getNumExpansions(), + /*ExpactPack=*/false); } ExpectedType ASTNodeImporter::VisitDependentTemplateSpecializationType( diff --git a/clang/test/PCH/cxx-variadic-templates.cpp b/clang/test/PCH/cxx-variadic-templates.cpp index 87b101d73c142..b1eed5adb647b 100644 --- a/clang/test/PCH/cxx-variadic-templates.cpp +++ b/clang/test/PCH/cxx-variadic-templates.cpp @@ -19,3 +19,8 @@ shared_ptr spi = shared_ptr::allocate_shared(1, 2); template struct A {}; template struct B {}; outer::inner<1, 2, A, B> i(A<1>{}, B<2>{}); + +void test_nondependent_pack() { + take_nondependent_pack(nullptr, nullptr); + take_nondependent_pack_2({}); +} diff --git a/clang/test/PCH/cxx-variadic-templates.h b/clang/test/PCH/cxx-variadic-templates.h index 50596cdf5dbf9..45395e9ae84aa 100644 --- a/clang/test/PCH/cxx-variadic-templates.h +++ b/clang/test/PCH/cxx-variadic-templates.h @@ -23,3 +23,8 @@ template struct outer { }; }; template struct outer; + +template void take_nondependent_pack(int (...arr)[sizeof(sizeof(T))]); + +template using hide = int; +template void take_nondependent_pack_2(outer...>); diff --git a/clang/test/PCH/cxx1y-lambdas.mm b/clang/test/PCH/cxx1y-lambdas.mm index f140a15215b8f..9c4c11970473b 100644 --- a/clang/test/PCH/cxx1y-lambdas.mm +++ b/clang/test/PCH/cxx1y-lambdas.mm @@ -39,6 +39,8 @@ int init_capture(T t) { return [&, x(t)] { return sizeof(x); }; } +auto with_pack = [](auto ...xs){}; + #else // CHECK-PRINT: T add_slowly @@ -55,4 +57,6 @@ int add(int x, int y) { // CHECK-PRINT: init_capture // CHECK-PRINT: [&, x(t)] +void use_with_pack() { with_pack(1, 2, 3); } + #endif diff --git a/clang/test/PCH/cxx2a-constraints.cpp b/clang/test/PCH/cxx2a-constraints.cpp index d8b79337c8f18..3f3b5e536cc93 100644 --- a/clang/test/PCH/cxx2a-constraints.cpp +++ b/clang/test/PCH/cxx2a-constraints.cpp @@ -24,6 +24,8 @@ template T> void h(T) {} template T> void i(T) {} template void i(T) {} +void j(SizedLike auto ...ints) {} + #else /*included pch*/ int main() { @@ -35,6 +37,7 @@ int main() { (void)h(1); (void)i('1'); (void)i(1); + (void)j(1, 2, 3); } -#endif // HEADER \ No newline at end of file +#endif // HEADER From 01bfe2e494027e473ba920ef324b1929af16936e Mon Sep 17 00:00:00 2001 From: Huihui Zhang Date: Fri, 31 Jul 2020 17:08:17 -0700 Subject: [PATCH 075/600] [AArch64][SVE] Allow vector of pointers as legal type for masked load/store. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refer to LangRef http://llvm.org/docs/LangRef.html#llvm-masked-load-intrinsics 'llvm.masked.load/store.*’ intrinsics are overloaded intrinsic, which allow the load/store data to be a vector of any integer, floating-point or pointer data type. Therefore, allow pointer data type when checking 'isLegalMaskedLoadStore()'. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D85045 --- .../AArch64/AArch64TargetTransformInfo.h | 3 + .../CodeGen/AArch64/sve-masked-ldst-nonext.ll | 102 ++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 5d1371f13fb3e..05b7f70f2335c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -166,6 +166,9 @@ class AArch64TTIImpl : public BasicTTIImplBase { return false; Type *Ty = cast(DataType)->getElementType(); + if (Ty->isPointerTy()) + return true; + if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) return true; diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll index f5047a7bcbaff..5e48ad8b628e0 100644 --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll @@ -188,6 +188,94 @@ define void @masked_store_nxv8bf16( *%a, @masked.load.nxv2p0i8(* %vector_ptr, %mask) nounwind { +; CHECK-LABEL: masked.load.nxv2p0i8: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %v = call @llvm.masked.load.nxv2p0i8.p0nxv2p0i8(* %vector_ptr, i32 8, %mask, undef) + ret %v +} +define @masked.load.nxv2p0i16(* %vector_ptr, %mask) nounwind { +; CHECK-LABEL: masked.load.nxv2p0i16: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %v = call @llvm.masked.load.nxv2p0i16.p0nxv2p0i16(* %vector_ptr, i32 8, %mask, undef) + ret %v +} +define @masked.load.nxv2p0i32(* %vector_ptr, %mask) nounwind { +; CHECK-LABEL: masked.load.nxv2p0i32: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %v = call @llvm.masked.load.nxv2p0i32.p0nxv2p0i32(* %vector_ptr, i32 8, %mask, undef) + ret %v +} +define @masked.load.nxv2p0i64(* %vector_ptr, %mask) nounwind { +; CHECK-LABEL: masked.load.nxv2p0i64: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %v = call @llvm.masked.load.nxv2p0i64.p0nxv2p0i64(* %vector_ptr, i32 8, %mask, undef) + ret %v +} + +; Pointer of floating-point type + +define @masked.load.nxv2p0bf16(* %vector_ptr, %mask) nounwind #0 { +; CHECK-LABEL: masked.load.nxv2p0bf16: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %v = call @llvm.masked.load.nxv2p0bf16.p0nxv2p0bf16(* %vector_ptr, i32 8, %mask, undef) + ret %v +} +define @masked.load.nxv2p0f16(* %vector_ptr, %mask) nounwind { +; CHECK-LABEL: masked.load.nxv2p0f16: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %v = call @llvm.masked.load.nxv2p0f16.p0nxv2p0f16(* %vector_ptr, i32 8, %mask, undef) + ret %v +} +define @masked.load.nxv2p0f32(* %vector_ptr, %mask) nounwind { +; CHECK-LABEL: masked.load.nxv2p0f32: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %v = call @llvm.masked.load.nxv2p0f32.p0nxv2p0f32(* %vector_ptr, i32 8, %mask, undef) + ret %v +} +define @masked.load.nxv2p0f64(* %vector_ptr, %mask) nounwind { +; CHECK-LABEL: masked.load.nxv2p0f64: +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %v = call @llvm.masked.load.nxv2p0f64.p0nxv2p0f64(* %vector_ptr, i32 8, %mask, undef) + ret %v +} + +; Pointer of array type + +define void @masked.store.nxv2p0a64i16( %data, * %vector_ptr, %mask) nounwind { +; CHECK-LABEL: masked.store.nxv2p0a64i16: +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv2p0a64i16.p0nxv2p0a64i16( %data, * %vector_ptr, i32 8, %mask) + ret void +} + +; Pointer of struct type + +%struct = type { i8*, i32 } +define void @masked.store.nxv2p0s_struct( %data, * %vector_ptr, %mask) nounwind { +; CHECK-LABEL: masked.store.nxv2p0s_struct: +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv2p0s_struct.p0nxv2p0s_struct( %data, * %vector_ptr, i32 8, %mask) + ret void +} + + declare @llvm.masked.load.nxv2i64(*, i32, , ) declare @llvm.masked.load.nxv4i32(*, i32, , ) declare @llvm.masked.load.nxv8i16(*, i32, , ) @@ -214,5 +302,19 @@ declare void @llvm.masked.store.nxv4f16(, declare void @llvm.masked.store.nxv8f16(, *, i32, ) declare void @llvm.masked.store.nxv8bf16(, *, i32, ) +declare @llvm.masked.load.nxv2p0i8.p0nxv2p0i8(*, i32 immarg, , ) +declare @llvm.masked.load.nxv2p0i16.p0nxv2p0i16(*, i32 immarg, , ) +declare @llvm.masked.load.nxv2p0i32.p0nxv2p0i32(*, i32 immarg, , ) +declare @llvm.masked.load.nxv2p0i64.p0nxv2p0i64(*, i32 immarg, , ) + +declare @llvm.masked.load.nxv2p0bf16.p0nxv2p0bf16(*, i32 immarg, , ) +declare @llvm.masked.load.nxv2p0f16.p0nxv2p0f16(*, i32 immarg, , ) +declare @llvm.masked.load.nxv2p0f32.p0nxv2p0f32(*, i32 immarg, , ) +declare @llvm.masked.load.nxv2p0f64.p0nxv2p0f64(*, i32 immarg, , ) + +declare void @llvm.masked.store.nxv2p0a64i16.p0nxv2p0a64i16(, *, i32 immarg, ) + +declare void @llvm.masked.store.nxv2p0s_struct.p0nxv2p0s_struct(, *, i32 immarg, ) + ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" } From 1fd2049e38daf0992f63883d68609b85dfb9cb26 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Sat, 1 Aug 2020 01:45:33 +0100 Subject: [PATCH 076/600] [clang-tidy][NFC] Added convienence methods for getting optional options These methods abstract away Error handling when trying to read options that can't be parsed by logging the error automatically and returning None. Reviewed By: gribozavr2 Differential Revision: https://reviews.llvm.org/D84812 --- .../clang-tidy/ClangTidyCheck.cpp | 39 +++++++++++---- clang-tools-extra/clang-tidy/ClangTidyCheck.h | 49 +++++++++++++++++-- 2 files changed, 74 insertions(+), 14 deletions(-) diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp index 737d85e092d97..c99931e0aa3a2 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" +#include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" namespace clang { @@ -126,7 +127,7 @@ bool ClangTidyCheck::OptionsView::get(StringRef LocalName, llvm::Expected ValueOr = get(LocalName); if (ValueOr) return *ValueOr; - logErrToStdErr(ValueOr.takeError()); + logIfOptionParsingError(ValueOr.takeError()); return Default; } @@ -145,7 +146,7 @@ bool ClangTidyCheck::OptionsView::getLocalOrGlobal(StringRef LocalName, llvm::Expected ValueOr = getLocalOrGlobal(LocalName); if (ValueOr) return *ValueOr; - logErrToStdErr(ValueOr.takeError()); + logIfOptionParsingError(ValueOr.takeError()); return Default; } @@ -204,13 +205,33 @@ llvm::Expected ClangTidyCheck::OptionsView::getEnumInt( Iter->getValue().Value); } -void ClangTidyCheck::OptionsView::logErrToStdErr(llvm::Error &&Err) { - llvm::logAllUnhandledErrors( - llvm::handleErrors(std::move(Err), - [](const MissingOptionError &) -> llvm::Error { - return llvm::Error::success(); - }), - llvm::errs(), "warning: "); +void ClangTidyCheck::OptionsView::logIfOptionParsingError(llvm::Error &&Err) { + if (auto RemainingErrors = + llvm::handleErrors(std::move(Err), [](const MissingOptionError &) {})) + llvm::logAllUnhandledErrors(std::move(RemainingErrors), + llvm::WithColor::warning()); } + +template <> +Optional ClangTidyCheck::OptionsView::getOptional( + StringRef LocalName) const { + if (auto ValueOr = get(LocalName)) + return *ValueOr; + else + consumeError(ValueOr.takeError()); + return llvm::None; +} + +template <> +Optional +ClangTidyCheck::OptionsView::getOptionalLocalOrGlobal( + StringRef LocalName) const { + if (auto ValueOr = getLocalOrGlobal(LocalName)) + return *ValueOr; + else + consumeError(ValueOr.takeError()); + return llvm::None; +} + } // namespace tidy } // namespace clang diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.h b/clang-tools-extra/clang-tidy/ClangTidyCheck.h index 4df8071c841e0..6237e216656be 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.h @@ -268,7 +268,7 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback { if (llvm::Expected ValueOr = get(LocalName)) return *ValueOr; else - logErrToStdErr(ValueOr.takeError()); + logIfOptionParsingError(ValueOr.takeError()); return Default; } @@ -314,7 +314,7 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback { if (llvm::Expected ValueOr = getLocalOrGlobal(LocalName)) return *ValueOr; else - logErrToStdErr(ValueOr.takeError()); + logIfOptionParsingError(ValueOr.takeError()); return Default; } @@ -353,7 +353,7 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback { if (auto ValueOr = get(LocalName, IgnoreCase)) return *ValueOr; else - logErrToStdErr(ValueOr.takeError()); + logIfOptionParsingError(ValueOr.takeError()); return Default; } @@ -395,10 +395,35 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback { if (auto ValueOr = getLocalOrGlobal(LocalName, IgnoreCase)) return *ValueOr; else - logErrToStdErr(ValueOr.takeError()); + logIfOptionParsingError(ValueOr.takeError()); return Default; } + /// Returns the value for the option \p LocalName represented as a ``T``. + /// If the option is missing returns None, if the option can't be parsed + /// as a ``T``, log that to stderr and return None. + template + llvm::Optional getOptional(StringRef LocalName) const { + if (auto ValueOr = get(LocalName)) + return *ValueOr; + else + logIfOptionParsingError(ValueOr.takeError()); + return llvm::None; + } + + /// Returns the value for the local or global option \p LocalName + /// represented as a ``T``. + /// If the option is missing returns None, if the + /// option can't be parsed as a ``T``, log that to stderr and return None. + template + llvm::Optional getOptionalLocalOrGlobal(StringRef LocalName) const { + if (auto ValueOr = getLocalOrGlobal(LocalName)) + return *ValueOr; + else + logIfOptionParsingError(ValueOr.takeError()); + return llvm::None; + } + /// Stores an option with the check-local name \p LocalName with /// string value \p Value to \p Options. void store(ClangTidyOptions::OptionMap &Options, StringRef LocalName, @@ -456,7 +481,8 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback { void storeInt(ClangTidyOptions::OptionMap &Options, StringRef LocalName, int64_t Value) const; - static void logErrToStdErr(llvm::Error &&Err); + /// Logs an Error to stderr if a \p Err is not a MissingOptionError. + static void logIfOptionParsingError(llvm::Error &&Err); std::string NamePrefix; const ClangTidyOptions::OptionMap &CheckOptions; @@ -524,6 +550,19 @@ void ClangTidyCheck::OptionsView::store( ClangTidyOptions::OptionMap &Options, StringRef LocalName, bool Value) const; +/// Returns the value for the option \p LocalName. +/// If the option is missing returns None. +template <> +Optional ClangTidyCheck::OptionsView::getOptional( + StringRef LocalName) const; + +/// Returns the value for the local or global option \p LocalName. +/// If the option is missing returns None. +template <> +Optional +ClangTidyCheck::OptionsView::getOptionalLocalOrGlobal( + StringRef LocalName) const; + } // namespace tidy } // namespace clang From 605fd4d77ce19c4d8c331732b490ef436ab093c2 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Sat, 11 Jul 2020 16:01:13 +0900 Subject: [PATCH 077/600] [VE] Change calling convention to follow ABI Change to expand all arguments and return values to i64 to follow ABI. Update regression tests also. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D84581 --- llvm/lib/Target/VE/VECallingConv.td | 62 +++---- llvm/lib/Target/VE/VEISelLowering.cpp | 96 +++++++--- llvm/test/CodeGen/VE/addition.ll | 34 +++- llvm/test/CodeGen/VE/bitcast.ll | 5 +- llvm/test/CodeGen/VE/bitreverse.ll | 12 -- llvm/test/CodeGen/VE/branch1.ll | 126 +++++++------ llvm/test/CodeGen/VE/bswap.ll | 17 +- llvm/test/CodeGen/VE/call.ll | 34 ++-- llvm/test/CodeGen/VE/cast.ll | 98 +++++----- llvm/test/CodeGen/VE/constants.ll | 3 - llvm/test/CodeGen/VE/ctlz.ll | 3 +- llvm/test/CodeGen/VE/ctpop.ll | 7 +- llvm/test/CodeGen/VE/cttz.ll | 6 +- llvm/test/CodeGen/VE/div.ll | 30 +++- llvm/test/CodeGen/VE/fp_to_int.ll | 10 +- llvm/test/CodeGen/VE/int_to_fp.ll | 14 +- llvm/test/CodeGen/VE/left_shift.ll | 40 +++-- llvm/test/CodeGen/VE/load_off.ll | 2 +- llvm/test/CodeGen/VE/max.ll | 26 ++- llvm/test/CodeGen/VE/min.ll | 31 ++-- llvm/test/CodeGen/VE/multiply.ll | 35 +++- llvm/test/CodeGen/VE/nnd.ll | 29 +++ llvm/test/CodeGen/VE/or.ll | 10 ++ .../test/CodeGen/VE/pic_access_static_data.ll | 2 +- llvm/test/CodeGen/VE/rem.ll | 34 +++- llvm/test/CodeGen/VE/right_shift.ll | 38 ++-- llvm/test/CodeGen/VE/rotl.ll | 4 +- llvm/test/CodeGen/VE/rotr.ll | 4 +- llvm/test/CodeGen/VE/select.ll | 20 ++- llvm/test/CodeGen/VE/selectccf32.ll | 28 --- llvm/test/CodeGen/VE/selectccf32c.ll | 32 ++-- llvm/test/CodeGen/VE/selectccf32i.ll | 28 --- llvm/test/CodeGen/VE/selectccf64c.ll | 18 +- llvm/test/CodeGen/VE/selectcci32.ll | 154 ++++++++-------- llvm/test/CodeGen/VE/selectcci32c.ll | 85 +++++---- llvm/test/CodeGen/VE/selectcci32i.ll | 168 +++++++++--------- llvm/test/CodeGen/VE/selectcci64c.ll | 18 +- llvm/test/CodeGen/VE/setccf32.ll | 112 ++++++------ llvm/test/CodeGen/VE/setccf32i.ll | 112 ++++++------ llvm/test/CodeGen/VE/setccf64.ll | 112 ++++++------ llvm/test/CodeGen/VE/setccf64i.ll | 112 ++++++------ llvm/test/CodeGen/VE/setcci32.ll | 100 ++++++----- llvm/test/CodeGen/VE/setcci32i.ll | 90 +++++----- llvm/test/CodeGen/VE/setcci64.ll | 80 ++++----- llvm/test/CodeGen/VE/setcci64i.ll | 80 ++++----- llvm/test/CodeGen/VE/sext_zext_load.ll | 12 +- llvm/test/CodeGen/VE/subtraction.ll | 34 +++- llvm/test/CodeGen/VE/truncstore.ll | 2 - llvm/test/CodeGen/VE/va_caller.ll | 28 ++- llvm/test/CodeGen/VE/xor.ll | 13 ++ 50 files changed, 1221 insertions(+), 1029 deletions(-) diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td index 4f04dae884ab5..acdae68323fdd 100644 --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -14,13 +14,6 @@ // Aurora VE //===----------------------------------------------------------------------===// def CC_VE_C_Stack: CallingConv<[ - // float --> need special handling like below. - // 0 4 - // +------+------+ - // | empty| float| - // +------+------+ - CCIfType<[f32], CCCustom<"allocateFloat">>, - // All of the rest are assigned to the stack in 8-byte aligned units. CCAssignToStack<0, 8> ]>; @@ -28,20 +21,18 @@ def CC_VE_C_Stack: CallingConv<[ def CC_VE : CallingConv<[ // All arguments get passed in generic registers if there is space. - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/i32 arguments to i64. + CCIfType<[i1, i8, i16, i32], CCPromoteToType>, - // bool, char, int, enum, long --> generic integer 32 bit registers - CCIfType<[i32], CCAssignToRegWithShadow< - [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7], - [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, - - // float --> generic floating point 32 bit registers - CCIfType<[f32], CCAssignToRegWithShadow< - [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7], - [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // Convert float arguments to i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + CCIfType<[f32], CCBitConvertToType>, - // long long/double --> generic 64 bit registers + // bool, char, int, enum, long, long long, float, double + // --> generic 64 bit registers CCIfType<[i64, f64], CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, @@ -52,31 +43,32 @@ def CC_VE : CallingConv<[ // All arguments get passed in stack for varargs function or non-prototyped // function. def CC_VE2 : CallingConv<[ - // float --> need special handling like below. - // 0 4 + // Promote i1/i8/i16/i32 arguments to i64. + CCIfType<[i1, i8, i16, i32], CCPromoteToType>, + + // Convert float arguments to i64 with padding. + // 63 31 0 // +------+------+ - // | empty| float| + // | float| 0 | // +------+------+ - CCIfType<[f32], CCCustom<"allocateFloat">>, + CCIfType<[f32], CCBitConvertToType>, CCAssignToStack<0, 8> ]>; def RetCC_VE : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, - - // bool, char, int, enum, long --> generic integer 32 bit registers - CCIfType<[i32], CCAssignToRegWithShadow< - [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7], - [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // Promote i1/i8/i16/i32 return values to i64. + CCIfType<[i1, i8, i16, i32], CCPromoteToType>, - // float --> generic floating point 32 bit registers - CCIfType<[f32], CCAssignToRegWithShadow< - [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7], - [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // Convert float return values to i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + CCIfType<[f32], CCBitConvertToType>, - // long long/double --> generic 64 bit registers + // bool, char, int, enum, long, long long, float, double + // --> generic 64 bit registers CCIfType<[i64, f64], CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, ]>; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index ab720545dd831..e2232f4500e35 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -38,28 +38,6 @@ using namespace llvm; // Calling Convention Implementation //===----------------------------------------------------------------------===// -static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - switch (LocVT.SimpleTy) { - case MVT::f32: { - // Allocate stack like below - // 0 4 - // +------+------+ - // | empty| float| - // +------+------+ - // Use align=8 for dummy area to align the beginning of these 2 area. - State.AllocateStack(4, Align(8)); // for empty area - // Use align=4 for value to place it at just after the dummy area. - unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return true; - } - default: - return false; - } -} - #include "VEGenCallingConv.inc" bool VETargetLowering::CanLowerReturn( @@ -109,6 +87,22 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, case CCValAssign::AExt: OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); break; + case CCValAssign::BCvt: { + // Convert a float return value to i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT() == MVT::f32); + SDValue Undef = SDValue( + DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0); + SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); + OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + MVT::i64, Undef, OutVal, Sub_f32), + 0); + break; + } default: llvm_unreachable("Unknown loc info!"); } @@ -179,6 +173,20 @@ SDValue VETargetLowering::LowerFormalArguments( Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg, DAG.getValueType(VA.getValVT())); break; + case CCValAssign::BCvt: { + // Extract a float argument from i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT() == MVT::f32); + SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); + Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + MVT::f32, Arg, Sub_f32), + 0); + break; + } default: break; } @@ -197,6 +205,20 @@ SDValue VETargetLowering::LowerFormalArguments( // beginning of the arguments area at %fp+176. unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset; unsigned ValSize = VA.getValVT().getSizeInBits() / 8; + + // Adjust offset for a float argument by adding 4 since the argument is + // stored in 8 bytes buffer with offset like below. LLVM generates + // 4 bytes load instruction, so need to adjust offset here. This + // adjustment is required in only LowerFormalArguments. In LowerCall, + // a float argument is converted to i64 first, and stored as 8 bytes + // data, which is required by ABI, so no need for adjustment. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + if (VA.getValVT() == MVT::f32) + Offset += 4; + int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true); InVals.push_back( DAG.getLoad(VA.getValVT(), DL, Chain, @@ -371,6 +393,22 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; + case CCValAssign::BCvt: { + // Convert a float argument to i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT() == MVT::f32); + SDValue Undef = SDValue( + DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0); + SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); + Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + MVT::i64, Undef, Arg, Sub_f32), + 0); + break; + } } if (VA.isRegLoc()) { @@ -488,6 +526,20 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, DAG.getValueType(VA.getValVT())); break; + case CCValAssign::BCvt: { + // Extract a float return value from i64 with padding. + // 63 31 0 + // +------+------+ + // | float| 0 | + // +------+------+ + assert(VA.getLocVT() == MVT::i64); + assert(VA.getValVT() == MVT::f32); + SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32); + RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + MVT::f32, RV, Sub_f32), + 0); + break; + } default: break; } diff --git a/llvm/test/CodeGen/VE/addition.ll b/llvm/test/CodeGen/VE/addition.ll index 730776ec534b8..54275e9e0e267 100644 --- a/llvm/test/CodeGen/VE/addition.ll +++ b/llvm/test/CodeGen/VE/addition.ll @@ -3,9 +3,11 @@ define signext i8 @func1(i8 signext %0, i8 signext %1) { ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %3 = add i8 %1, %0 ret i8 %3 @@ -14,9 +16,11 @@ define signext i8 @func1(i8 signext %0, i8 signext %1) { define signext i16 @func2(i16 signext %0, i16 signext %1) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %3 = add i16 %1, %0 ret i16 %3 @@ -25,6 +29,8 @@ define signext i16 @func2(i16 signext %0, i16 signext %1) { define i32 @func3(i32 %0, i32 %1) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %3 = add nsw i32 %1, %0 @@ -43,6 +49,8 @@ define i64 @func4(i64 %0, i64 %1) { define zeroext i8 @func6(i8 zeroext %0, i8 zeroext %1) { ; CHECK-LABEL: func6: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 ; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -53,6 +61,8 @@ define zeroext i8 @func6(i8 zeroext %0, i8 zeroext %1) { define zeroext i16 @func7(i16 zeroext %0, i16 zeroext %1) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 ; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -63,6 +73,8 @@ define zeroext i16 @func7(i16 zeroext %0, i16 zeroext %1) { define i32 @func8(i32 %0, i32 %1) { ; CHECK-LABEL: func8: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %3 = add i32 %1, %0 @@ -81,9 +93,10 @@ define i64 @func9(i64 %0, i64 %1) { define signext i8 @func13(i8 signext %0) { ; CHECK-LABEL: func13: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, 5, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %2 = add i8 %0, 5 ret i8 %2 @@ -92,9 +105,10 @@ define signext i8 @func13(i8 signext %0) { define signext i16 @func14(i16 signext %0) { ; CHECK-LABEL: func14: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, 5, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %2 = add i16 %0, 5 ret i16 %2 @@ -103,6 +117,7 @@ define signext i16 @func14(i16 signext %0) { define i32 @func15(i32 %0) { ; CHECK-LABEL: func15: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, 5, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = add nsw i32 %0, 5 @@ -121,6 +136,7 @@ define i64 @func16(i64 %0) { define zeroext i8 @func18(i8 zeroext %0) { ; CHECK-LABEL: func18: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, 5, %s0 ; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -131,6 +147,7 @@ define zeroext i8 @func18(i8 zeroext %0) { define zeroext i16 @func19(i16 zeroext %0) { ; CHECK-LABEL: func19: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, 5, %s0 ; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -141,6 +158,7 @@ define zeroext i16 @func19(i16 zeroext %0) { define i32 @func20(i32 %0) { ; CHECK-LABEL: func20: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, 5, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = add i32 %0, 5 diff --git a/llvm/test/CodeGen/VE/bitcast.ll b/llvm/test/CodeGen/VE/bitcast.ll index dacc8f189e966..d7c09cd46b613 100644 --- a/llvm/test/CodeGen/VE/bitcast.ll +++ b/llvm/test/CodeGen/VE/bitcast.ll @@ -22,9 +22,8 @@ define dso_local double @bitcastl2d(i64 %x) { define dso_local float @bitcastw2f(i32 %x) { ; CHECK-LABEL: bitcastw2f: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sll %s0, %s0, 32 -; CHECK-NEXT: # kill: def $sf0 killed $sf0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = bitcast i32 %x to float ret float %r @@ -34,9 +33,7 @@ define dso_local float @bitcastw2f(i32 %x) { define dso_local i32 @bitcastf2w(float %x) { ; CHECK-LABEL: bitcastf2w: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf0 killed $sf0 def $sx0 ; CHECK-NEXT: sra.l %s0, %s0, 32 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = bitcast float %x to i32 ret i32 %r diff --git a/llvm/test/CodeGen/VE/bitreverse.ll b/llvm/test/CodeGen/VE/bitreverse.ll index fce969af657e2..af58afe38fd97 100644 --- a/llvm/test/CodeGen/VE/bitreverse.ll +++ b/llvm/test/CodeGen/VE/bitreverse.ll @@ -14,10 +14,8 @@ declare i64 @llvm.bitreverse.i64(i64) define i32 @func2(i32 %p) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: srl %s0, %s0, 32 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.bitreverse.i32(i32 %p) ret i32 %r @@ -28,10 +26,8 @@ declare i32 @llvm.bitreverse.i32(i32) define signext i16 @func3(i16 signext %p) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: sra.l %s0, %s0, 48 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i16 @llvm.bitreverse.i16(i16 %p) ret i16 %r @@ -42,10 +38,8 @@ declare i16 @llvm.bitreverse.i16(i16) define signext i8 @func4(i8 signext %p) { ; CHECK-LABEL: func4: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: sra.l %s0, %s0, 56 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i8 @llvm.bitreverse.i8(i8 %p) ret i8 %r @@ -65,10 +59,8 @@ define i64 @func5(i64 %p) { define i32 @func6(i32 %p) { ; CHECK-LABEL: func6: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: srl %s0, %s0, 32 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.bitreverse.i32(i32 %p) ret i32 %r @@ -77,10 +69,8 @@ define i32 @func6(i32 %p) { define zeroext i16 @func7(i16 zeroext %p) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: srl %s0, %s0, 48 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i16 @llvm.bitreverse.i16(i16 %p) ret i16 %r @@ -89,10 +79,8 @@ define zeroext i16 @func7(i16 zeroext %p) { define zeroext i8 @func8(i8 zeroext %p) { ; CHECK-LABEL: func8: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: srl %s0, %s0, 56 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i8 @llvm.bitreverse.i8(i8 %p) ret i8 %r diff --git a/llvm/test/CodeGen/VE/branch1.ll b/llvm/test/CodeGen/VE/branch1.ll index c9f0a22c4c0a0..5561284c992e6 100644 --- a/llvm/test/CodeGen/VE/branch1.ll +++ b/llvm/test/CodeGen/VE/branch1.ll @@ -3,19 +3,22 @@ define signext i8 @func1(i8 signext %a, i8 signext %b) { ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_5: -; CHECK-NEXT: brle.w %s0, %s1, .LBB0_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: brle.w %s0, %s1, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB0_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = icmp sgt i8 %a, %b @@ -36,17 +39,20 @@ declare i32 @ret(i32) define i32 @func2(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_5: -; CHECK-NEXT: brle.w %s0, %s1, .LBB1_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: brle.w %s0, %s1, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB1_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = icmp sgt i16 %a, %b @@ -64,17 +70,20 @@ join: define i32 @func3(i32 %a, i32 %b) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_5: -; CHECK-NEXT: brle.w %s0, %s1, .LBB2_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: brle.w %s0, %s1, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB2_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = icmp sgt i32 %a, %b @@ -92,17 +101,18 @@ join: define i32 @func4(i64 %a, i64 %b) { ; CHECK-LABEL: func4: ; CHECK: .LBB{{[0-9]+}}_5: -; CHECK-NEXT: brle.l %s0, %s1, .LBB3_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: brle.l %s0, %s1, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB3_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = icmp sgt i64 %a, %b @@ -120,18 +130,21 @@ join: define i32 @func5(i8 zeroext %a, i8 zeroext %b) { ; CHECK-LABEL: func5: ; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: cmpu.w %s0, %s1, %s0 -; CHECK-NEXT: brle.w 0, %s0, .LBB4_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: brle.w 0, %s0, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB4_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = icmp ugt i8 %a, %b @@ -149,18 +162,21 @@ join: define i32 @func6(i16 zeroext %a, i16 zeroext %b) { ; CHECK-LABEL: func6: ; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: cmpu.w %s0, %s1, %s0 -; CHECK-NEXT: brle.w 0, %s0, .LBB5_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: brle.w 0, %s0, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB5_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = icmp ugt i16 %a, %b @@ -178,18 +194,21 @@ join: define i32 @func7(i32 %a, i32 %b) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: cmpu.w %s0, %s1, %s0 -; CHECK-NEXT: brle.w 0, %s0, .LBB6_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: brle.w 0, %s0, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB6_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = icmp ugt i32 %a, %b @@ -207,17 +226,18 @@ join: define i32 @func8(float %a, float %b) { ; CHECK-LABEL: func8: ; CHECK: .LBB{{[0-9]+}}_5: -; CHECK-NEXT: brlenan.s %s0, %s1, .LBB7_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: brlenan.s %s0, %s1, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB7_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = fcmp ogt float %a, %b @@ -235,17 +255,18 @@ join: define i32 @func9(double %a, double %b) { ; CHECK-LABEL: func9: ; CHECK: .LBB{{[0-9]+}}_5: -; CHECK-NEXT: brlenan.d %s0, %s1, .LBB8_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: brlenan.d %s0, %s1, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB8_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = fcmp ogt double %a, %b @@ -264,17 +285,18 @@ define i32 @func10(double %a, double %b) { ; CHECK-LABEL: func10: ; CHECK: .LBB{{[0-9]+}}_5: ; CHECK-NEXT: lea.sl %s1, 1075052544 -; CHECK-NEXT: brlenan.d %s0, %s1, .LBB9_1 -; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: brlenan.d %s0, %s1, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %on.true ; CHECK-NEXT: lea %s0, ret@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, ret@hi(, %s0) ; CHECK-NEXT: or %s0, 2, (0)1 ; CHECK-NEXT: bsic %s10, (, %s12) -; CHECK-NEXT: br.l.t .LBB9_3 -; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: br.l.t .LBB{{[0-9]+}}_3 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: ; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %join ; CHECK-NEXT: or %s11, 0, %s9 entry: %cmp = fcmp ogt double %a, 5.000000e+00 diff --git a/llvm/test/CodeGen/VE/bswap.ll b/llvm/test/CodeGen/VE/bswap.ll index 274085462856f..39569d8889c5a 100644 --- a/llvm/test/CodeGen/VE/bswap.ll +++ b/llvm/test/CodeGen/VE/bswap.ll @@ -14,9 +14,8 @@ declare i64 @llvm.bswap.i64(i64) define i32 @func2(i32 %p) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: bswp %s0, %s0, 1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.bswap.i32(i32 %p) ret i32 %r @@ -27,9 +26,12 @@ declare i32 @llvm.bswap.i32(i32) define signext i16 @func3(i16 signext %p) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: bswp %s0, %s0, 1 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: srl %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i16 @llvm.bswap.i16(i16 %p) ret i16 %r @@ -49,9 +51,8 @@ define i64 @func4(i64 %p) { define i32 @func5(i32 %p) { ; CHECK-LABEL: func5: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: bswp %s0, %s0, 1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.bswap.i32(i32 %p) ret i32 %r @@ -60,11 +61,11 @@ define i32 @func5(i32 %p) { define zeroext i16 @func6(i16 zeroext %p) { ; CHECK-LABEL: func6: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: bswp %s0, %s0, 1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: srl %s0, %s0, 16 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i16 @llvm.bswap.i16(i16 %p) ret i16 %r diff --git a/llvm/test/CodeGen/VE/call.ll b/llvm/test/CodeGen/VE/call.ll index 9e9f22b6d8233..386a5fd74bf97 100644 --- a/llvm/test/CodeGen/VE/call.ll +++ b/llvm/test/CodeGen/VE/call.ll @@ -20,7 +20,7 @@ define i32 @stack_call_int() { ; CHECK-LABEL: stack_call_int: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s0, 10, (0)1 -; CHECK-NEXT: stl %s0, 248(, %s11) +; CHECK-NEXT: st %s0, 248(, %s11) ; CHECK-NEXT: or %s34, 9, (0)1 ; CHECK-NEXT: lea %s0, stack_callee_int@lo ; CHECK-NEXT: and %s0, %s0, (32)0 @@ -33,7 +33,7 @@ define i32 @stack_call_int() { ; CHECK-NEXT: or %s5, 6, (0)1 ; CHECK-NEXT: or %s6, 7, (0)1 ; CHECK-NEXT: or %s7, 8, (0)1 -; CHECK-NEXT: stl %s34, 240(, %s11) +; CHECK-NEXT: st %s34, 240(, %s11) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @stack_callee_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10) @@ -46,7 +46,7 @@ define i32 @stack_call_int_szext() { ; CHECK-LABEL: stack_call_int_szext: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s0, -1, (0)1 -; CHECK-NEXT: stl %s0, 248(, %s11) +; CHECK-NEXT: st %s0, 248(, %s11) ; CHECK-NEXT: lea %s34, 65535 ; CHECK-NEXT: lea %s1, stack_callee_int_szext@lo ; CHECK-NEXT: and %s1, %s1, (32)0 @@ -58,7 +58,7 @@ define i32 @stack_call_int_szext() { ; CHECK-NEXT: or %s5, 6, (0)1 ; CHECK-NEXT: or %s6, 7, (0)1 ; CHECK-NEXT: or %s7, 8, (0)1 -; CHECK-NEXT: stl %s34, 240(, %s11) +; CHECK-NEXT: st %s34, 240(, %s11) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @stack_callee_int_szext(i1 -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 -1, i8 -1) @@ -70,12 +70,9 @@ declare i32 @stack_callee_int_szext(i1 signext, i8 zeroext, i32, i32, i32, i32, define float @stack_call_float() { ; CHECK-LABEL: stack_call_float: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, 1092616192 -; CHECK-NEXT: stl %s0, 252(, %s11) -; CHECK-NEXT: lea %s34, 1091567616 -; CHECK-NEXT: lea %s0, stack_callee_float@lo -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s12, stack_callee_float@hi(, %s0) +; CHECK-NEXT: lea.sl %s0, 1092616192 +; CHECK-NEXT: st %s0, 248(, %s11) +; CHECK-NEXT: lea.sl %s34, 1091567616 ; CHECK-NEXT: lea.sl %s0, 1065353216 ; CHECK-NEXT: lea.sl %s1, 1073741824 ; CHECK-NEXT: lea.sl %s2, 1077936128 @@ -84,15 +81,10 @@ define float @stack_call_float() { ; CHECK-NEXT: lea.sl %s5, 1086324736 ; CHECK-NEXT: lea.sl %s6, 1088421888 ; CHECK-NEXT: lea.sl %s7, 1090519040 -; CHECK-NEXT: stl %s34, 244(, %s11) -; CHECK-NEXT: # kill: def $sf0 killed $sf0 killed $sx0 -; CHECK-NEXT: # kill: def $sf1 killed $sf1 killed $sx1 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 killed $sx2 -; CHECK-NEXT: # kill: def $sf3 killed $sf3 killed $sx3 -; CHECK-NEXT: # kill: def $sf4 killed $sf4 killed $sx4 -; CHECK-NEXT: # kill: def $sf5 killed $sf5 killed $sx5 -; CHECK-NEXT: # kill: def $sf6 killed $sf6 killed $sx6 -; CHECK-NEXT: # kill: def $sf7 killed $sf7 killed $sx7 +; CHECK-NEXT: lea %s35, stack_callee_float@lo +; CHECK-NEXT: and %s35, %s35, (32)0 +; CHECK-NEXT: lea.sl %s12, stack_callee_float@hi(, %s35) +; CHECK-NEXT: st %s34, 240(, %s11) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call float @stack_callee_float(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0) @@ -104,11 +96,11 @@ declare float @stack_callee_float(float, float, float, float, float, float, floa define float @stack_call_float2(float %p0) { ; CHECK-LABEL: stack_call_float2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: stu %s0, 252(, %s11) +; CHECK-NEXT: st %s0, 248(, %s11) ; CHECK-NEXT: lea %s1, stack_callee_float@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, stack_callee_float@hi(, %s1) -; CHECK-NEXT: stu %s0, 244(, %s11) +; CHECK-NEXT: st %s0, 240(, %s11) ; CHECK-NEXT: or %s1, 0, %s0 ; CHECK-NEXT: or %s2, 0, %s0 ; CHECK-NEXT: or %s3, 0, %s0 diff --git a/llvm/test/CodeGen/VE/cast.ll b/llvm/test/CodeGen/VE/cast.ll index 51126e123ac67..07ad969a1bd3e 100644 --- a/llvm/test/CodeGen/VE/cast.ll +++ b/llvm/test/CodeGen/VE/cast.ll @@ -4,6 +4,7 @@ define i32 @i() { ; CHECK-LABEL: i: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea %s0, -2147483648 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: or %s11, 0, %s9 ret i32 -2147483648 } @@ -12,6 +13,7 @@ define i32 @ui() { ; CHECK-LABEL: ui: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea %s0, -2147483648 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: or %s11, 0, %s9 ret i32 -2147483648 } @@ -37,6 +39,7 @@ define signext i8 @d2c(double %x) { ; CHECK-LABEL: d2c: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.d.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptosi double %x to i8 ret i8 %r @@ -46,6 +49,7 @@ define zeroext i8 @d2uc(double %x) { ; CHECK-LABEL: d2uc: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.d.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptoui double %x to i8 ret i8 %r @@ -55,6 +59,7 @@ define signext i16 @d2s(double %x) { ; CHECK-LABEL: d2s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.d.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptosi double %x to i16 ret i16 %r @@ -64,6 +69,7 @@ define zeroext i16 @d2us(double %x) { ; CHECK-LABEL: d2us: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.d.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptoui double %x to i16 ret i16 %r @@ -82,7 +88,6 @@ define i32 @d2ui(double %x) { ; CHECK-LABEL: d2ui: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.l.d.rz %s0, %s0 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptoui double %x to i32 ret i32 %r @@ -133,6 +138,7 @@ define signext i8 @f2c(float %x) { ; CHECK-LABEL: f2c: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.s.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptosi float %x to i8 ret i8 %r @@ -142,6 +148,7 @@ define zeroext i8 @f2uc(float %x) { ; CHECK-LABEL: f2uc: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.s.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptoui float %x to i8 ret i8 %r @@ -151,6 +158,7 @@ define signext i16 @f2s(float %x) { ; CHECK-LABEL: f2s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.s.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptosi float %x to i16 ret i16 %r @@ -160,6 +168,7 @@ define zeroext i16 @f2us(float %x) { ; CHECK-LABEL: f2us: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.s.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptoui float %x to i16 ret i16 %r @@ -179,7 +188,6 @@ define i32 @f2ui(float %x) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.d.s %s0, %s0 ; CHECK-NEXT: cvt.l.d.rz %s0, %s0 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = fptoui float %x to i32 ret i32 %r @@ -234,7 +242,6 @@ define signext i8 @ll2c(i64 %0) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: sll %s0, %s0, 56 ; CHECK-NEXT: sra.l %s0, %s0, 56 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i64 %0 to i8 ret i8 %2 @@ -254,7 +261,6 @@ define signext i16 @ll2s(i64 %0) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: sll %s0, %s0, 48 ; CHECK-NEXT: sra.l %s0, %s0, 48 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i64 %0 to i16 ret i16 %2 @@ -272,7 +278,6 @@ define zeroext i16 @ll2us(i64 %0) { define i32 @ll2i(i64 %0) { ; CHECK-LABEL: ll2i: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i64 %0 to i32 ret i32 %2 @@ -281,7 +286,6 @@ define i32 @ll2i(i64 %0) { define i32 @ll2ui(i64 %0) { ; CHECK-LABEL: ll2ui: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i64 %0 to i32 ret i32 %2 @@ -325,7 +329,6 @@ define signext i8 @ull2c(i64 %0) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: sll %s0, %s0, 56 ; CHECK-NEXT: sra.l %s0, %s0, 56 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i64 %0 to i8 ret i8 %2 @@ -345,7 +348,6 @@ define signext i16 @ull2s(i64 %0) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: sll %s0, %s0, 48 ; CHECK-NEXT: sra.l %s0, %s0, 48 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i64 %0 to i16 ret i16 %2 @@ -363,7 +365,6 @@ define zeroext i16 @ull2us(i64 %0) { define i32 @ull2i(i64 %0) { ; CHECK-LABEL: ull2i: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i64 %0 to i32 ret i32 %2 @@ -372,7 +373,6 @@ define i32 @ull2i(i64 %0) { define i32 @ull2ui(i64 %0) { ; CHECK-LABEL: ull2ui: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i64 %0 to i32 ret i32 %2 @@ -433,8 +433,8 @@ define double @ull2d(i64 %x) { define signext i8 @i2c(i32 %0) { ; CHECK-LABEL: i2c: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i32 %0 to i8 ret i8 %2 @@ -452,8 +452,8 @@ define zeroext i8 @i2uc(i32 %0) { define signext i16 @i2s(i32 %0) { ; CHECK-LABEL: i2s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i32 %0 to i16 ret i16 %2 @@ -503,6 +503,7 @@ define i64 @i2ull(i32 %0) { define float @i2f(i32 %x) { ; CHECK-LABEL: i2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = sitofp i32 %x to float @@ -512,6 +513,7 @@ define float @i2f(i32 %x) { define double @i2d(i32 %x) { ; CHECK-LABEL: i2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = sitofp i32 %x to double @@ -521,8 +523,8 @@ define double @i2d(i32 %x) { define signext i8 @ui2c(i32 %0) { ; CHECK-LABEL: ui2c: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i32 %0 to i8 ret i8 %2 @@ -540,8 +542,8 @@ define zeroext i8 @ui2uc(i32 %0) { define signext i16 @ui2s(i32 %0) { ; CHECK-LABEL: ui2s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i32 %0 to i16 ret i16 %2 @@ -573,7 +575,7 @@ define i32 @ui2ui(i32 returned %0) { define i64 @ui2ll(i32 %0) { ; CHECK-LABEL: ui2ll: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i32 %0 to i64 ret i64 %2 @@ -582,7 +584,7 @@ define i64 @ui2ll(i32 %0) { define i64 @ui2ull(i32 %0) { ; CHECK-LABEL: ui2ull: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i32 %0 to i64 ret i64 %2 @@ -591,7 +593,7 @@ define i64 @ui2ull(i32 %0) { define float @ui2f(i32 %x) { ; CHECK-LABEL: ui2f: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cvt.d.l %s0, %s0 ; CHECK-NEXT: cvt.s.d %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -602,7 +604,7 @@ define float @ui2f(i32 %x) { define double @ui2d(i32 %x) { ; CHECK-LABEL: ui2d: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cvt.d.l %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = uitofp i32 %x to double @@ -612,8 +614,8 @@ define double @ui2d(i32 %x) { define signext i8 @s2c(i16 signext %0) { ; CHECK-LABEL: s2c: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i16 %0 to i8 ret i8 %2 @@ -662,7 +664,6 @@ define i32 @s2ui(i16 signext %0) { define i64 @s2ll(i16 signext %0) { ; CHECK-LABEL: s2ll: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = sext i16 %0 to i64 ret i64 %2 @@ -671,7 +672,6 @@ define i64 @s2ll(i16 signext %0) { define i64 @s2ull(i16 signext %0) { ; CHECK-LABEL: s2ull: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = sext i16 %0 to i64 ret i64 %2 @@ -680,6 +680,7 @@ define i64 @s2ull(i16 signext %0) { define float @s2f(i16 signext %x) { ; CHECK-LABEL: s2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = sitofp i16 %x to float @@ -689,6 +690,7 @@ define float @s2f(i16 signext %x) { define double @s2d(i16 signext %x) { ; CHECK-LABEL: s2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = sitofp i16 %x to double @@ -698,8 +700,8 @@ define double @s2d(i16 signext %x) { define signext i8 @us2c(i16 zeroext %0) { ; CHECK-LABEL: us2c: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i16 %0 to i8 ret i8 %2 @@ -717,8 +719,8 @@ define zeroext i8 @us2uc(i16 zeroext %0) { define signext i16 @us2s(i16 returned zeroext %0) { ; CHECK-LABEL: us2s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 ret i16 %0 } @@ -749,7 +751,6 @@ define i32 @us2ui(i16 zeroext %0) { define i64 @us2ll(i16 zeroext %0) { ; CHECK-LABEL: us2ll: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i16 %0 to i64 ret i64 %2 @@ -758,7 +759,6 @@ define i64 @us2ll(i16 zeroext %0) { define i64 @us2ull(i16 zeroext %0) { ; CHECK-LABEL: us2ull: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i16 %0 to i64 ret i64 %2 @@ -767,6 +767,7 @@ define i64 @us2ull(i16 zeroext %0) { define float @us2f(i16 zeroext %x) { ; CHECK-LABEL: us2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = uitofp i16 %x to float @@ -776,6 +777,7 @@ define float @us2f(i16 zeroext %x) { define double @us2d(i16 zeroext %x) { ; CHECK-LABEL: us2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = uitofp i16 %x to double @@ -833,7 +835,6 @@ define i32 @c2ui(i8 signext %0) { define i64 @c2ll(i8 signext %0) { ; CHECK-LABEL: c2ll: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = sext i8 %0 to i64 ret i64 %2 @@ -842,7 +843,6 @@ define i64 @c2ll(i8 signext %0) { define i64 @c2ull(i8 signext %0) { ; CHECK-LABEL: c2ull: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = sext i8 %0 to i64 ret i64 %2 @@ -851,6 +851,7 @@ define i64 @c2ull(i8 signext %0) { define float @c2f(i8 signext %x) { ; CHECK-LABEL: c2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = sitofp i8 %x to float @@ -860,6 +861,7 @@ define float @c2f(i8 signext %x) { define double @c2d(i8 signext %x) { ; CHECK-LABEL: c2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = sitofp i8 %x to double @@ -869,8 +871,8 @@ define double @c2d(i8 signext %x) { define signext i8 @uc2c(i8 returned zeroext %0) { ; CHECK-LABEL: uc2c: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 ret i8 %0 } @@ -917,7 +919,6 @@ define i32 @uc2ui(i8 zeroext %0) { define i64 @uc2ll(i8 zeroext %0) { ; CHECK-LABEL: uc2ll: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i8 %0 to i64 ret i64 %2 @@ -926,7 +927,6 @@ define i64 @uc2ll(i8 zeroext %0) { define i64 @uc2ull(i8 zeroext %0) { ; CHECK-LABEL: uc2ull: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i8 %0 to i64 ret i64 %2 @@ -935,6 +935,7 @@ define i64 @uc2ull(i8 zeroext %0) { define float @uc2f(i8 zeroext %x) { ; CHECK-LABEL: uc2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = uitofp i8 %x to float @@ -944,6 +945,7 @@ define float @uc2f(i8 zeroext %x) { define double @uc2d(i8 zeroext %x) { ; CHECK-LABEL: uc2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = uitofp i8 %x to double @@ -976,7 +978,6 @@ define signext i8 @i1282c(i128 %0) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: sll %s0, %s0, 56 ; CHECK-NEXT: sra.l %s0, %s0, 56 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i128 %0 to i8 ret i8 %2 @@ -988,7 +989,6 @@ define signext i8 @ui1282c(i128 %0) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: sll %s0, %s0, 56 ; CHECK-NEXT: sra.l %s0, %s0, 56 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i128 %0 to i8 ret i8 %2 @@ -1020,7 +1020,6 @@ define signext i16 @i1282s(i128 %0) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: sll %s0, %s0, 48 ; CHECK-NEXT: sra.l %s0, %s0, 48 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i128 %0 to i16 ret i16 %2 @@ -1032,7 +1031,6 @@ define signext i16 @ui1282s(i128 %0) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: sll %s0, %s0, 48 ; CHECK-NEXT: sra.l %s0, %s0, 48 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i128 %0 to i16 ret i16 %2 @@ -1062,7 +1060,6 @@ define zeroext i16 @ui1282us(i128 %0) { define i32 @i1282i(i128 %0) { ; CHECK-LABEL: i1282i: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i128 %0 to i32 ret i32 %2 @@ -1072,7 +1069,6 @@ define i32 @i1282i(i128 %0) { define i32 @ui1282i(i128 %0) { ; CHECK-LABEL: ui1282i: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i128 %0 to i32 ret i32 %2 @@ -1082,7 +1078,6 @@ define i32 @ui1282i(i128 %0) { define i32 @i1282ui(i128 %0) { ; CHECK-LABEL: i1282ui: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i128 %0 to i32 ret i32 %2 @@ -1092,7 +1087,6 @@ define i32 @i1282ui(i128 %0) { define i32 @ui1282ui(i128 %0) { ; CHECK-LABEL: ui1282ui: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = trunc i128 %0 to i32 ret i32 %2 @@ -1216,7 +1210,7 @@ define i128 @i2ui128(i32 %0) { define i128 @ui2i128(i32 %0) { ; CHECK-LABEL: ui2i128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: or %s1, 0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i32 %0 to i128 @@ -1227,7 +1221,7 @@ define i128 @ui2i128(i32 %0) { define i128 @ui2ui128(i32 %0) { ; CHECK-LABEL: ui2ui128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: or %s1, 0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i32 %0 to i128 @@ -1238,7 +1232,6 @@ define i128 @ui2ui128(i32 %0) { define i128 @s2i128(i16 signext %0) { ; CHECK-LABEL: s2i128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.l %s1, %s0, 63 ; CHECK-NEXT: or %s11, 0, %s9 %2 = sext i16 %0 to i128 @@ -1249,7 +1242,6 @@ define i128 @s2i128(i16 signext %0) { define i128 @s2ui128(i16 signext %0) { ; CHECK-LABEL: s2ui128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.l %s1, %s0, 63 ; CHECK-NEXT: or %s11, 0, %s9 %2 = sext i16 %0 to i128 @@ -1260,7 +1252,6 @@ define i128 @s2ui128(i16 signext %0) { define i128 @us2i128(i16 zeroext %0) { ; CHECK-LABEL: us2i128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i16 %0 to i128 @@ -1271,7 +1262,6 @@ define i128 @us2i128(i16 zeroext %0) { define i128 @us2ui128(i16 zeroext %0) { ; CHECK-LABEL: us2ui128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i16 %0 to i128 @@ -1282,7 +1272,6 @@ define i128 @us2ui128(i16 zeroext %0) { define i128 @c2i128(i8 signext %0) { ; CHECK-LABEL: c2i128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.l %s1, %s0, 63 ; CHECK-NEXT: or %s11, 0, %s9 %2 = sext i8 %0 to i128 @@ -1293,7 +1282,6 @@ define i128 @c2i128(i8 signext %0) { define i128 @char2ui128(i8 signext %0) { ; CHECK-LABEL: char2ui128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.l %s1, %s0, 63 ; CHECK-NEXT: or %s11, 0, %s9 %2 = sext i8 %0 to i128 @@ -1304,7 +1292,6 @@ define i128 @char2ui128(i8 signext %0) { define i128 @uc2i128(i8 zeroext %0) { ; CHECK-LABEL: uc2i128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i8 %0 to i128 @@ -1315,7 +1302,6 @@ define i128 @uc2i128(i8 zeroext %0) { define i128 @uc2ui128(i8 zeroext %0) { ; CHECK-LABEL: uc2ui128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = zext i8 %0 to i128 diff --git a/llvm/test/CodeGen/VE/constants.ll b/llvm/test/CodeGen/VE/constants.ll index b7a43605ae347..baebf5ef3621e 100644 --- a/llvm/test/CodeGen/VE/constants.ll +++ b/llvm/test/CodeGen/VE/constants.ll @@ -304,7 +304,6 @@ define float @m5f32() { ; CHECK-LABEL: m5f32: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s0, -1063256064 -; CHECK-NEXT: # kill: def $sf0 killed $sf0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 ret float -5.000000e+00 } @@ -321,7 +320,6 @@ define float @p2p3f32() { ; CHECK-LABEL: p2p3f32: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s0, 1075000115 -; CHECK-NEXT: # kill: def $sf0 killed $sf0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 ret float 0x4002666660000000 ; 2.3 } @@ -339,7 +337,6 @@ define float @p128p3f32() { ; CHECK-LABEL: p128p3f32: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s0, 1124093133 -; CHECK-NEXT: # kill: def $sf0 killed $sf0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 ret float 0x40600999A0000000 ; 128.3 } diff --git a/llvm/test/CodeGen/VE/ctlz.ll b/llvm/test/CodeGen/VE/ctlz.ll index de44790014a07..5853851ac9c97 100644 --- a/llvm/test/CodeGen/VE/ctlz.ll +++ b/llvm/test/CodeGen/VE/ctlz.ll @@ -14,10 +14,9 @@ declare i64 @llvm.ctlz.i64(i64, i1) define i32 @func2(i32 %p) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sll %s0, %s0, 32 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true) ret i32 %r diff --git a/llvm/test/CodeGen/VE/ctpop.ll b/llvm/test/CodeGen/VE/ctpop.ll index 3d25909ab25cb..8fee9104ed077 100644 --- a/llvm/test/CodeGen/VE/ctpop.ll +++ b/llvm/test/CodeGen/VE/ctpop.ll @@ -14,10 +14,9 @@ declare i64 @llvm.ctpop.i64(i64 %p) define i32 @func2(i32 %p) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: pcnt %s0, %s0 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.ctpop.i32(i32 %p) ret i32 %r @@ -29,9 +28,7 @@ define i16 @func3(i16 %p) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: and %s0, %s0, (48)0 -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: pcnt %s0, %s0 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i16 @llvm.ctpop.i16(i16 %p) ret i16 %r @@ -43,9 +40,7 @@ define i8 @func4(i8 %p) { ; CHECK-LABEL: func4: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: and %s0, %s0, (56)0 -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: pcnt %s0, %s0 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i8 @llvm.ctpop.i8(i8 %p) ret i8 %r diff --git a/llvm/test/CodeGen/VE/cttz.ll b/llvm/test/CodeGen/VE/cttz.ll index 4b79a0f988e86..46bb52d29102b 100644 --- a/llvm/test/CodeGen/VE/cttz.ll +++ b/llvm/test/CodeGen/VE/cttz.ll @@ -16,12 +16,12 @@ declare i64 @llvm.cttz.i64(i64, i1) define i32 @func2(i32 %p) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s1, -1, %s0 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: pcnt %s0, %s0 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.cttz.i32(i32 %p, i1 true) ret i32 %r @@ -32,12 +32,12 @@ declare i32 @llvm.cttz.i32(i32, i1) define i16 @func3(i16 %p) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s1, -1, %s0 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: pcnt %s0, %s0 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i16 @llvm.cttz.i16(i16 %p, i1 true) ret i16 %r @@ -48,12 +48,12 @@ declare i16 @llvm.cttz.i16(i16, i1) define i8 @func4(i8 %p) { ; CHECK-LABEL: func4: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s1, -1, %s0 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: pcnt %s0, %s0 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i8 @llvm.cttz.i8(i8 %p, i1 true) ret i8 %r diff --git a/llvm/test/CodeGen/VE/div.ll b/llvm/test/CodeGen/VE/div.ll index 8d4a0ddd2c156..ed434a9a3c7b5 100644 --- a/llvm/test/CodeGen/VE/div.ll +++ b/llvm/test/CodeGen/VE/div.ll @@ -14,6 +14,8 @@ define i64 @divi64(i64 %a, i64 %b) { define i32 @divi32(i32 %a, i32 %b) { ; CHECK-LABEL: divi32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %r = sdiv i32 %a, %b @@ -34,6 +36,8 @@ define i64 @divu64(i64 %a, i64 %b) { define i32 @divu32(i32 %a, i32 %b) { ; CHECK-LABEL: divu32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divu.w %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %r = udiv i32 %a, %b @@ -44,9 +48,11 @@ define i32 @divu32(i32 %a, i32 %b) { define signext i16 @divi16(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: divi16: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divs.w.sx %s0, %s0, %s1 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %a32 = sext i16 %a to i32 %b32 = sext i16 %b to i32 @@ -59,7 +65,10 @@ define signext i16 @divi16(i16 signext %a, i16 signext %b) { define zeroext i16 @divu16(i16 zeroext %a, i16 zeroext %b) { ; CHECK-LABEL: divu16: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divu.w %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = udiv i16 %a, %b ret i16 %r @@ -69,9 +78,11 @@ define zeroext i16 @divu16(i16 zeroext %a, i16 zeroext %b) { define signext i8 @divi8(i8 signext %a, i8 signext %b) { ; CHECK-LABEL: divi8: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divs.w.sx %s0, %s0, %s1 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %a32 = sext i8 %a to i32 %b32 = sext i8 %b to i32 @@ -84,7 +95,10 @@ define signext i8 @divi8(i8 signext %a, i8 signext %b) { define zeroext i8 @divu8(i8 zeroext %a, i8 zeroext %b) { ; CHECK-LABEL: divu8: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divu.w %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = udiv i8 %a, %b ret i8 %r @@ -104,6 +118,7 @@ define i64 @divi64ri(i64 %a, i64 %b) { define i32 @divi32ri(i32 %a, i32 %b) { ; CHECK-LABEL: divi32ri: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divs.w.sx %s0, %s0, (62)0 ; CHECK-NEXT: or %s11, 0, %s9 %r = sdiv i32 %a, 3 @@ -124,6 +139,7 @@ define i64 @divu64ri(i64 %a, i64 %b) { define i32 @divu32ri(i32 %a, i32 %b) { ; CHECK-LABEL: divu32ri: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divu.w %s0, %s0, (62)0 ; CHECK-NEXT: or %s11, 0, %s9 %r = udiv i32 %a, 3 @@ -144,7 +160,8 @@ define i64 @divi64li(i64 %a, i64 %b) { define i32 @divi32li(i32 %a, i32 %b) { ; CHECK-LABEL: divi32li: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: divs.w.sx %s0, 3, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1 +; CHECK-NEXT: divs.w.sx %s0, 3, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = sdiv i32 3, %b ret i32 %r @@ -164,7 +181,8 @@ define i64 @divu64li(i64 %a, i64 %b) { define i32 @divu32li(i32 %a, i32 %b) { ; CHECK-LABEL: divu32li: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: divu.w %s0, 3, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1 +; CHECK-NEXT: divu.w %s0, 3, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = udiv i32 3, %b ret i32 %r diff --git a/llvm/test/CodeGen/VE/fp_to_int.ll b/llvm/test/CodeGen/VE/fp_to_int.ll index 9a1a7e35c1190..d9b1926ea9d5d 100644 --- a/llvm/test/CodeGen/VE/fp_to_int.ll +++ b/llvm/test/CodeGen/VE/fp_to_int.ll @@ -5,6 +5,7 @@ define signext i8 @f2c(float %a) { ; CHECK-LABEL: f2c: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.s.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptosi float %a to i8 @@ -16,6 +17,7 @@ define signext i16 @f2s(float %a) { ; CHECK-LABEL: f2s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.s.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptosi float %a to i16 @@ -50,6 +52,7 @@ define zeroext i8 @f2uc(float %a) { ; CHECK-LABEL: f2uc: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.s.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptoui float %a to i8 @@ -61,6 +64,7 @@ define zeroext i16 @f2us(float %a) { ; CHECK-LABEL: f2us: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.s.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptoui float %a to i16 @@ -73,7 +77,6 @@ define i32 @f2ui(float %a) { ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.d.s %s0, %s0 ; CHECK-NEXT: cvt.l.d.rz %s0, %s0 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptoui float %a to i32 @@ -105,6 +108,7 @@ define signext i8 @d2c(double %a) { ; CHECK-LABEL: d2c: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.d.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptosi double %a to i8 @@ -116,6 +120,7 @@ define signext i16 @d2s(double %a) { ; CHECK-LABEL: d2s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.d.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptosi double %a to i16 @@ -149,6 +154,7 @@ define zeroext i8 @d2uc(double %a) { ; CHECK-LABEL: d2uc: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.d.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptoui double %a to i8 @@ -160,6 +166,7 @@ define zeroext i16 @d2us(double %a) { ; CHECK-LABEL: d2us: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.w.d.sx.rz %s0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptoui double %a to i16 @@ -171,7 +178,6 @@ define i32 @d2ui(double %a) { ; CHECK-LABEL: d2ui: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: cvt.l.d.rz %s0, %s0 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 entry: %conv = fptoui double %a to i32 diff --git a/llvm/test/CodeGen/VE/int_to_fp.ll b/llvm/test/CodeGen/VE/int_to_fp.ll index 5069a0ca0d7d5..2e850142e2e9f 100644 --- a/llvm/test/CodeGen/VE/int_to_fp.ll +++ b/llvm/test/CodeGen/VE/int_to_fp.ll @@ -4,6 +4,7 @@ define float @c2f(i8 signext %a) { ; CHECK-LABEL: c2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -15,6 +16,7 @@ entry: define float @s2f(i16 signext %a) { ; CHECK-LABEL: s2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -26,6 +28,7 @@ entry: define float @i2f(i32 %a) { ; CHECK-LABEL: i2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -49,6 +52,7 @@ entry: define float @uc2f(i8 zeroext %a) { ; CHECK-LABEL: uc2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -60,6 +64,7 @@ entry: define float @us2f(i16 zeroext %a) { ; CHECK-LABEL: us2f: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.s.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -71,7 +76,7 @@ entry: define float @ui2f(i32 %a) { ; CHECK-LABEL: ui2f: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cvt.d.l %s0, %s0 ; CHECK-NEXT: cvt.s.d %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -106,6 +111,7 @@ entry: define double @c2d(i8 signext %a) { ; CHECK-LABEL: c2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -117,6 +123,7 @@ entry: define double @s2d(i16 signext %a) { ; CHECK-LABEL: s2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -128,6 +135,7 @@ entry: define double @i2d(i32 %a) { ; CHECK-LABEL: i2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -150,6 +158,7 @@ entry: define double @uc2d(i8 zeroext %a) { ; CHECK-LABEL: uc2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -161,6 +170,7 @@ entry: define double @us2d(i16 zeroext %a) { ; CHECK-LABEL: us2d: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cvt.d.w %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: @@ -172,7 +182,7 @@ entry: define double @ui2d(i32 %a) { ; CHECK-LABEL: ui2d: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: cvt.d.l %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 entry: diff --git a/llvm/test/CodeGen/VE/left_shift.ll b/llvm/test/CodeGen/VE/left_shift.ll index d568846974dd9..fa595a916e529 100644 --- a/llvm/test/CodeGen/VE/left_shift.ll +++ b/llvm/test/CodeGen/VE/left_shift.ll @@ -3,9 +3,11 @@ define signext i8 @func1(i8 signext %0, i8 signext %1) { ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, %s1 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %3 = sext i8 %0 to i32 %4 = sext i8 %1 to i32 @@ -17,9 +19,11 @@ define signext i8 @func1(i8 signext %0, i8 signext %1) { define signext i16 @func2(i16 signext %0, i16 signext %1) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, %s1 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %3 = sext i16 %0 to i32 %4 = sext i16 %1 to i32 @@ -31,6 +35,8 @@ define signext i16 @func2(i16 signext %0, i16 signext %1) { define i32 @func3(i32 %0, i32 %1) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = shl i32 %0, %1 @@ -50,6 +56,8 @@ define i64 @func4(i64 %0, i64 %1) { define zeroext i8 @func6(i8 zeroext %0, i8 zeroext %1) { ; CHECK-LABEL: func6: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, %s1 ; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -63,6 +71,8 @@ define zeroext i8 @func6(i8 zeroext %0, i8 zeroext %1) { define zeroext i16 @func7(i16 zeroext %0, i16 zeroext %1) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, %s1 ; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -76,6 +86,8 @@ define zeroext i16 @func7(i16 zeroext %0, i16 zeroext %1) { define i32 @func8(i32 %0, i32 %1) { ; CHECK-LABEL: func8: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = shl i32 %0, %1 @@ -95,9 +107,10 @@ define i64 @func9(i64 %0, i64 %1) { define signext i8 @func11(i8 signext %0) { ; CHECK-LABEL: func11: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %2 = shl i8 %0, 5 ret i8 %2 @@ -106,9 +119,10 @@ define signext i8 @func11(i8 signext %0) { define signext i16 @func12(i16 signext %0) { ; CHECK-LABEL: func12: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %2 = shl i16 %0, 5 ret i16 %2 @@ -117,6 +131,7 @@ define signext i16 @func12(i16 signext %0) { define i32 @func13(i32 %0) { ; CHECK-LABEL: func13: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 ; CHECK-NEXT: or %s11, 0, %s9 %2 = shl i32 %0, 5 @@ -135,8 +150,10 @@ define i64 @func14(i64 %0) { define zeroext i8 @func16(i8 zeroext %0) { ; CHECK-LABEL: func16: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 -; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: lea %s1, 224 +; CHECK-NEXT: and %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = shl i8 %0, 5 ret i8 %2 @@ -145,8 +162,10 @@ define zeroext i8 @func16(i8 zeroext %0) { define zeroext i16 @func17(i16 zeroext %0) { ; CHECK-LABEL: func17: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 -; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: lea %s1, 65504 +; CHECK-NEXT: and %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = shl i16 %0, 5 ret i16 %2 @@ -155,6 +174,7 @@ define zeroext i16 @func17(i16 zeroext %0) { define i32 @func18(i32 %0) { ; CHECK-LABEL: func18: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, 5 ; CHECK-NEXT: or %s11, 0, %s9 %2 = shl i32 %0, 5 diff --git a/llvm/test/CodeGen/VE/load_off.ll b/llvm/test/CodeGen/VE/load_off.ll index cc3da7a3a1cd5..2ebd616e533cc 100644 --- a/llvm/test/CodeGen/VE/load_off.ll +++ b/llvm/test/CodeGen/VE/load_off.ll @@ -118,7 +118,7 @@ define zeroext i32 @loadi32z() { ; CHECK-NEXT: lea %s0, bufi32+8@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s0, bufi32+8@hi(, %s0) -; CHECK-NEXT: ldl.sx %s0, (, %s0) +; CHECK-NEXT: ldl.zx %s0, (, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bufi32, i64 0, i64 2), align 4 diff --git a/llvm/test/CodeGen/VE/max.ll b/llvm/test/CodeGen/VE/max.ll index 67f68b02b4510..2c342faa1f052 100644 --- a/llvm/test/CodeGen/VE/max.ll +++ b/llvm/test/CodeGen/VE/max.ll @@ -69,8 +69,6 @@ define float @max2f32(float, float) { define float @maxuf32(float, float) { ; CHECK-LABEL: maxuf32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf1 killed $sf1 def $sx1 -; CHECK-NEXT: # kill: def $sf0 killed $sf0 def $sx0 ; CHECK-NEXT: fcmp.s %s2, %s0, %s1 ; CHECK-NEXT: cmov.s.gtnan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 @@ -83,8 +81,6 @@ define float @maxuf32(float, float) { define float @max2uf32(float, float) { ; CHECK-LABEL: max2uf32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf1 killed $sf1 def $sx1 -; CHECK-NEXT: # kill: def $sf0 killed $sf0 def $sx0 ; CHECK-NEXT: fcmp.s %s2, %s0, %s1 ; CHECK-NEXT: cmov.s.genan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 @@ -141,6 +137,8 @@ define i64 @max2u64(i64, i64) { define i32 @maxi32(i32, i32) { ; CHECK-LABEL: maxi32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sgt i32 %0, %1 @@ -151,6 +149,8 @@ define i32 @maxi32(i32, i32) { define i32 @max2i32(i32, i32) { ; CHECK-LABEL: max2i32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: maxs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sge i32 %0, %1 @@ -161,11 +161,10 @@ define i32 @max2i32(i32, i32) { define i32 @maxu32(i32, i32) { ; CHECK-LABEL: maxu32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw1 killed $sw1 def $sx1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 -; CHECK-NEXT: cmpu.w %s2, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s1, %s0, %s2 -; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: adds.w.sx %s2, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s2, %s0 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ugt i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 @@ -175,11 +174,10 @@ define i32 @maxu32(i32, i32) { define i32 @max2u32(i32, i32) { ; CHECK-LABEL: max2u32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw1 killed $sw1 def $sx1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 -; CHECK-NEXT: cmpu.w %s2, %s0, %s1 -; CHECK-NEXT: cmov.w.ge %s1, %s0, %s2 -; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: adds.w.sx %s2, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s2, %s0 +; CHECK-NEXT: cmov.w.ge %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp uge i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 diff --git a/llvm/test/CodeGen/VE/min.ll b/llvm/test/CodeGen/VE/min.ll index 3e28f757d9e98..dd6ad8460c806 100644 --- a/llvm/test/CodeGen/VE/min.ll +++ b/llvm/test/CodeGen/VE/min.ll @@ -67,8 +67,6 @@ define float @min2f32(float, float) { define float @minuf32(float, float) { ; CHECK-LABEL: minuf32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf1 killed $sf1 def $sx1 -; CHECK-NEXT: # kill: def $sf0 killed $sf0 def $sx0 ; CHECK-NEXT: fcmp.s %s2, %s0, %s1 ; CHECK-NEXT: cmov.s.ltnan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 @@ -81,8 +79,6 @@ define float @minuf32(float, float) { define float @min2uf32(float, float) { ; CHECK-LABEL: min2uf32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf1 killed $sf1 def $sx1 -; CHECK-NEXT: # kill: def $sf0 killed $sf0 def $sx0 ; CHECK-NEXT: fcmp.s %s2, %s0, %s1 ; CHECK-NEXT: cmov.s.lenan %s1, %s0, %s2 ; CHECK-NEXT: or %s0, 0, %s1 @@ -139,6 +135,8 @@ define i64 @min2u64(i64, i64) { define i32 @mini32(i32, i32) { ; CHECK-LABEL: mini32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp slt i32 %0, %1 @@ -149,6 +147,8 @@ define i32 @mini32(i32, i32) { define i32 @min2i32(i32, i32) { ; CHECK-LABEL: min2i32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: mins.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sle i32 %0, %1 @@ -159,11 +159,10 @@ define i32 @min2i32(i32, i32) { define i32 @minu32(i32, i32) { ; CHECK-LABEL: minu32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw1 killed $sw1 def $sx1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 -; CHECK-NEXT: cmpu.w %s2, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s1, %s0, %s2 -; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: adds.w.sx %s2, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s2, %s0 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ult i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 @@ -173,11 +172,10 @@ define i32 @minu32(i32, i32) { define i32 @min2u32(i32, i32) { ; CHECK-LABEL: min2u32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw1 killed $sw1 def $sx1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 -; CHECK-NEXT: cmpu.w %s2, %s0, %s1 -; CHECK-NEXT: cmov.w.le %s1, %s0, %s2 -; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: adds.w.sx %s2, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s2, %s0 +; CHECK-NEXT: cmov.w.le %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ule i32 %0, %1 %4 = select i1 %3, i32 %0, i32 %1 @@ -187,10 +185,11 @@ define i32 @min2u32(i32, i32) { define zeroext i1 @mini1(i1 zeroext, i1 zeroext) { ; CHECK-LABEL: mini1: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw1 killed $sw1 def $sx1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: and %s2, %s1, %s0 ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 -; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: adds.w.zx %s0, %s2, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = xor i1 %0, true %4 = and i1 %3, %1 diff --git a/llvm/test/CodeGen/VE/multiply.ll b/llvm/test/CodeGen/VE/multiply.ll index dabb6cf85d12f..83b7a67ff453b 100644 --- a/llvm/test/CodeGen/VE/multiply.ll +++ b/llvm/test/CodeGen/VE/multiply.ll @@ -3,9 +3,11 @@ define signext i8 @func1(i8 signext %a, i8 signext %b) { ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: muls.w.sx %s0, %s1, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %r = mul i8 %b, %a ret i8 %r @@ -14,9 +16,11 @@ define signext i8 @func1(i8 signext %a, i8 signext %b) { define signext i16 @func2(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: muls.w.sx %s0, %s1, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %r = mul i16 %b, %a ret i16 %r @@ -25,6 +29,8 @@ define signext i16 @func2(i16 signext %a, i16 signext %b) { define i32 @func3(i32 %a, i32 %b) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: muls.w.sx %s0, %s1, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = mul nsw i32 %b, %a @@ -43,6 +49,8 @@ define i64 @func4(i64 %a, i64 %b) { define zeroext i8 @func5(i8 zeroext %a, i8 zeroext %b) { ; CHECK-LABEL: func5: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: muls.w.sx %s0, %s1, %s0 ; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -53,6 +61,8 @@ define zeroext i8 @func5(i8 zeroext %a, i8 zeroext %b) { define zeroext i16 @func6(i16 zeroext %a, i16 zeroext %b) { ; CHECK-LABEL: func6: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: muls.w.sx %s0, %s1, %s0 ; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -63,6 +73,8 @@ define zeroext i16 @func6(i16 zeroext %a, i16 zeroext %b) { define i32 @func7(i32 %a, i32 %b) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: muls.w.sx %s0, %s1, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = mul i32 %b, %a @@ -81,9 +93,10 @@ define i64 @func8(i64 %a, i64 %b) { define signext i8 @func9(i8 signext %a) { ; CHECK-LABEL: func9: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: muls.w.sx %s0, 5, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %r = mul i8 %a, 5 ret i8 %r @@ -92,9 +105,10 @@ define signext i8 @func9(i8 signext %a) { define signext i16 @func10(i16 signext %a) { ; CHECK-LABEL: func10: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: muls.w.sx %s0, 5, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %r = mul i16 %a, 5 ret i16 %r @@ -103,6 +117,7 @@ define signext i16 @func10(i16 signext %a) { define i32 @func11(i32 %a) { ; CHECK-LABEL: func11: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: muls.w.sx %s0, 5, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = mul nsw i32 %a, 5 @@ -121,6 +136,7 @@ define i64 @func12(i64 %a) { define zeroext i8 @func13(i8 zeroext %a) { ; CHECK-LABEL: func13: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: muls.w.sx %s0, 5, %s0 ; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -131,6 +147,7 @@ define zeroext i8 @func13(i8 zeroext %a) { define zeroext i16 @func14(i16 zeroext %a) { ; CHECK-LABEL: func14: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: muls.w.sx %s0, 5, %s0 ; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -141,6 +158,7 @@ define zeroext i16 @func14(i16 zeroext %a) { define i32 @func15(i32 %a) { ; CHECK-LABEL: func15: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: muls.w.sx %s0, 5, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = mul i32 %a, 5 @@ -159,6 +177,7 @@ define i64 @func16(i64 %a) { define i32 @func17(i32 %a) { ; CHECK-LABEL: func17: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s0, %s0, 31 ; CHECK-NEXT: or %s11, 0, %s9 %r = shl i32 %a, 31 diff --git a/llvm/test/CodeGen/VE/nnd.ll b/llvm/test/CodeGen/VE/nnd.ll index aea10d4834cdd..aedb85050f301 100644 --- a/llvm/test/CodeGen/VE/nnd.ll +++ b/llvm/test/CodeGen/VE/nnd.ll @@ -3,8 +3,11 @@ define signext i8 @func8s(i8 signext %a, i8 signext %b) { ; CHECK-LABEL: func8s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i8 %a, -1 %res = and i8 %not, %b @@ -14,8 +17,11 @@ define signext i8 @func8s(i8 signext %a, i8 signext %b) { define zeroext i8 @func8z(i8 zeroext %a, i8 zeroext %b) { ; CHECK-LABEL: func8z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s1, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i8 %a, -1 %res = and i8 %b, %not @@ -25,8 +31,10 @@ define zeroext i8 @func8z(i8 zeroext %a, i8 zeroext %b) { define signext i8 @funci8s(i8 signext %a) { ; CHECK-LABEL: funci8s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, 5, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i8 %a, -1 %res = and i8 %not, 5 @@ -36,6 +44,7 @@ define signext i8 @funci8s(i8 signext %a) { define zeroext i8 @funci8z(i8 zeroext %a) { ; CHECK-LABEL: funci8z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: lea %s1, 251 ; CHECK-NEXT: and %s0, %s0, %s1 @@ -48,8 +57,11 @@ define zeroext i8 @funci8z(i8 zeroext %a) { define signext i16 @func16s(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: func16s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i16 %a, -1 %res = and i16 %not, %b @@ -59,8 +71,11 @@ define signext i16 @func16s(i16 signext %a, i16 signext %b) { define zeroext i16 @func16z(i16 zeroext %a, i16 zeroext %b) { ; CHECK-LABEL: func16z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s1, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i16 %a, -1 %res = and i16 %b, %not @@ -70,7 +85,9 @@ define zeroext i16 @func16z(i16 zeroext %a, i16 zeroext %b) { define signext i16 @funci16s(i16 signext %a) { ; CHECK-LABEL: funci16s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i16 %a, -1 %res = and i16 %not, 65535 @@ -80,8 +97,10 @@ define signext i16 @funci16s(i16 signext %a) { define zeroext i16 @funci16z(i16 zeroext %a) { ; CHECK-LABEL: funci16z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, (52)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i16 %a, -1 %res = and i16 4095, %not @@ -91,8 +110,11 @@ define zeroext i16 @funci16z(i16 zeroext %a) { define signext i32 @func32s(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: func32s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i32 %a, -1 %res = and i32 %not, %b @@ -102,8 +124,11 @@ define signext i32 @func32s(i32 signext %a, i32 signext %b) { define zeroext i32 @func32z(i32 zeroext %a, i32 zeroext %b) { ; CHECK-LABEL: func32z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i32 %a, -1 %res = and i32 %not, %b @@ -113,8 +138,10 @@ define zeroext i32 @func32z(i32 zeroext %a, i32 zeroext %b) { define signext i32 @funci32s(i32 signext %a) { ; CHECK-LABEL: funci32s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, (36)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i32 %a, -1 %res = and i32 %not, 268435455 @@ -124,8 +151,10 @@ define signext i32 @funci32s(i32 signext %a) { define zeroext i32 @funci32z(i32 zeroext %a) { ; CHECK-LABEL: funci32z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, (36)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %not = xor i32 %a, -1 %res = and i32 %not, 268435455 diff --git a/llvm/test/CodeGen/VE/or.ll b/llvm/test/CodeGen/VE/or.ll index 1f8c35012f811..8ddb1b5fbf80a 100644 --- a/llvm/test/CodeGen/VE/or.ll +++ b/llvm/test/CodeGen/VE/or.ll @@ -21,7 +21,9 @@ define zeroext i8 @func8z(i8 zeroext %a, i8 zeroext %b) { define signext i8 @funci8s(i8 signext %a) { ; CHECK-LABEL: funci8s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s0, 5, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = or i8 %a, 5 ret i8 %res @@ -30,8 +32,10 @@ define signext i8 @funci8s(i8 signext %a) { define zeroext i8 @funci8z(i8 zeroext %a) { ; CHECK-LABEL: funci8z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: lea %s1, 251 ; CHECK-NEXT: or %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = or i8 -5, %a ret i8 %res @@ -67,7 +71,9 @@ define signext i16 @funci16s(i16 signext %a) { define zeroext i16 @funci16z(i16 zeroext %a) { ; CHECK-LABEL: funci16z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s0, %s0, (52)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = or i16 4095, %a ret i16 %res @@ -94,7 +100,9 @@ define zeroext i32 @func32z(i32 zeroext %a, i32 zeroext %b) { define signext i32 @funci32s(i32 signext %a) { ; CHECK-LABEL: funci32s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s0, %s0, (36)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = or i32 %a, 268435455 ret i32 %res @@ -103,7 +111,9 @@ define signext i32 @funci32s(i32 signext %a) { define zeroext i32 @funci32z(i32 zeroext %a) { ; CHECK-LABEL: funci32z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s0, %s0, (36)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = or i32 %a, 268435455 ret i32 %res diff --git a/llvm/test/CodeGen/VE/pic_access_static_data.ll b/llvm/test/CodeGen/VE/pic_access_static_data.ll index e0741724172e9..892aa8465d99f 100644 --- a/llvm/test/CodeGen/VE/pic_access_static_data.ll +++ b/llvm/test/CodeGen/VE/pic_access_static_data.ll @@ -52,7 +52,7 @@ define i32 @main() { ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s0, dst@gotoff_hi(, %s0) ; CHECK-NEXT: ldl.sx %s1, (%s0, %s15) -; CHECK-NEXT: stl %s1, 184(, %s11) +; CHECK-NEXT: st %s1, 184(, %s11) ; CHECK-NEXT: lea %s0, .L.str@gotoff_lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s0, .L.str@gotoff_hi(, %s0) diff --git a/llvm/test/CodeGen/VE/rem.ll b/llvm/test/CodeGen/VE/rem.ll index 9fa558f5ca3cd..52ac3c3a3c9e7 100644 --- a/llvm/test/CodeGen/VE/rem.ll +++ b/llvm/test/CodeGen/VE/rem.ll @@ -16,6 +16,8 @@ define i64 @remi64(i64 %a, i64 %b) { define i32 @remi32(i32 %a, i32 %b) { ; CHECK-LABEL: remi32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divs.w.sx %s2, %s0, %s1 ; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 @@ -40,6 +42,8 @@ define i64 @remu64(i64 %a, i64 %b) { define i32 @remu32(i32 %a, i32 %b) { ; CHECK-LABEL: remu32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divu.w %s2, %s0, %s1 ; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 @@ -52,11 +56,13 @@ define i32 @remu32(i32 %a, i32 %b) { define signext i16 @remi16(i16 signext %a, i16 signext %b) { ; CHECK-LABEL: remi16: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divs.w.sx %s2, %s0, %s1 ; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %a32 = sext i16 %a to i32 %b32 = sext i16 %b to i32 @@ -69,9 +75,12 @@ define signext i16 @remi16(i16 signext %a, i16 signext %b) { define zeroext i16 @remu16(i16 zeroext %a, i16 zeroext %b) { ; CHECK-LABEL: remu16: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divu.w %s2, %s0, %s1 ; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = urem i16 %a, %b ret i16 %r @@ -81,11 +90,13 @@ define zeroext i16 @remu16(i16 zeroext %a, i16 zeroext %b) { define signext i8 @remi8(i8 signext %a, i8 signext %b) { ; CHECK-LABEL: remi8: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divs.w.sx %s2, %s0, %s1 ; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %a32 = sext i8 %a to i32 %b32 = sext i8 %b to i32 @@ -98,9 +109,12 @@ define signext i8 @remi8(i8 signext %a, i8 signext %b) { define zeroext i8 @remu8(i8 zeroext %a, i8 zeroext %b) { ; CHECK-LABEL: remu8: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divu.w %s2, %s0, %s1 ; CHECK-NEXT: muls.w.sx %s1, %s2, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = urem i8 %a, %b ret i8 %r @@ -122,6 +136,7 @@ define i64 @remi64ri(i64 %a, i64 %b) { define i32 @remi32ri(i32 %a, i32 %b) { ; CHECK-LABEL: remi32ri: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divs.w.sx %s1, %s0, (62)0 ; CHECK-NEXT: muls.w.sx %s1, 3, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 @@ -146,6 +161,7 @@ define i64 @remu64ri(i64 %a, i64 %b) { define i32 @remu32ri(i32 %a, i32 %b) { ; CHECK-LABEL: remu32ri: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: divu.w %s1, %s0, (62)0 ; CHECK-NEXT: muls.w.sx %s1, 3, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 @@ -170,8 +186,9 @@ define i64 @remi64li(i64 %a, i64 %b) { define i32 @remi32li(i32 %a, i32 %b) { ; CHECK-LABEL: remi32li: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: divs.w.sx %s0, 3, %s1 -; CHECK-NEXT: muls.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1 +; CHECK-NEXT: divs.w.sx %s1, 3, %s0 +; CHECK-NEXT: muls.w.sx %s0, %s1, %s0 ; CHECK-NEXT: subs.w.sx %s0, 3, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = srem i32 3, %b @@ -194,8 +211,9 @@ define i64 @remu64li(i64 %a, i64 %b) { define i32 @remu32li(i32 %a, i32 %b) { ; CHECK-LABEL: remu32li: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: divu.w %s0, 3, %s1 -; CHECK-NEXT: muls.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s1, (0)1 +; CHECK-NEXT: divu.w %s1, 3, %s0 +; CHECK-NEXT: muls.w.sx %s0, %s1, %s0 ; CHECK-NEXT: subs.w.sx %s0, 3, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = urem i32 3, %b diff --git a/llvm/test/CodeGen/VE/right_shift.ll b/llvm/test/CodeGen/VE/right_shift.ll index faad722a30531..87ac6df7e62ed 100644 --- a/llvm/test/CodeGen/VE/right_shift.ll +++ b/llvm/test/CodeGen/VE/right_shift.ll @@ -3,7 +3,10 @@ define signext i8 @func1(i8 signext %0, i8 signext %1) { ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = sext i8 %0 to i32 %4 = sext i8 %1 to i32 @@ -15,7 +18,10 @@ define signext i8 @func1(i8 signext %0, i8 signext %1) { define signext i16 @func2(i16 signext %0, i16 signext %1) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.w.sx %s0, %s0, %s1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = sext i16 %0 to i32 %4 = sext i16 %1 to i32 @@ -27,6 +33,8 @@ define signext i16 @func2(i16 signext %0, i16 signext %1) { define i32 @func3(i32 %0, i32 %1) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = ashr i32 %0, %1 @@ -46,10 +54,11 @@ define i64 @func4(i64 %0, i64 %1) { define zeroext i8 @func7(i8 zeroext %0, i8 zeroext %1) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: srl %s0, %s0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = zext i8 %0 to i32 %4 = zext i8 %1 to i32 @@ -61,10 +70,11 @@ define zeroext i8 @func7(i8 zeroext %0, i8 zeroext %1) { define zeroext i16 @func8(i16 zeroext %0, i16 zeroext %1) { ; CHECK-LABEL: func8: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: srl %s0, %s0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = zext i16 %0 to i32 %4 = zext i16 %1 to i32 @@ -76,10 +86,10 @@ define zeroext i16 @func8(i16 zeroext %0, i16 zeroext %1) { define i32 @func9(i32 %0, i32 %1) { ; CHECK-LABEL: func9: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: srl %s0, %s0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %3 = lshr i32 %0, %1 ret i32 %3 @@ -98,7 +108,9 @@ define i64 @func10(i64 %0, i64 %1) { define signext i8 @func12(i8 signext %0) { ; CHECK-LABEL: func12: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.w.sx %s0, %s0, 5 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = ashr i8 %0, 5 ret i8 %2 @@ -107,7 +119,9 @@ define signext i8 @func12(i8 signext %0) { define signext i16 @func13(i16 signext %0) { ; CHECK-LABEL: func13: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.w.sx %s0, %s0, 5 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = ashr i16 %0, 5 ret i16 %2 @@ -116,6 +130,7 @@ define signext i16 @func13(i16 signext %0) { define i32 @func14(i32 %0) { ; CHECK-LABEL: func14: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sra.w.sx %s0, %s0, 5 ; CHECK-NEXT: or %s11, 0, %s9 %2 = ashr i32 %0, 5 @@ -134,10 +149,10 @@ define i64 @func15(i64 %0) { define zeroext i8 @func17(i8 zeroext %0) { ; CHECK-LABEL: func17: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: srl %s0, %s0, 5 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = lshr i8 %0, 5 ret i8 %2 @@ -146,10 +161,10 @@ define zeroext i8 @func17(i8 zeroext %0) { define zeroext i16 @func18(i16 zeroext %0) { ; CHECK-LABEL: func18: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: srl %s0, %s0, 5 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = lshr i16 %0, 5 ret i16 %2 @@ -158,10 +173,9 @@ define zeroext i16 @func18(i16 zeroext %0) { define i32 @func19(i32 %0) { ; CHECK-LABEL: func19: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: srl %s0, %s0, 5 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 ; CHECK-NEXT: or %s11, 0, %s9 %2 = lshr i32 %0, 5 ret i32 %2 diff --git a/llvm/test/CodeGen/VE/rotl.ll b/llvm/test/CodeGen/VE/rotl.ll index e7c498f1d34d2..cc5e004478ab5 100644 --- a/llvm/test/CodeGen/VE/rotl.ll +++ b/llvm/test/CodeGen/VE/rotl.ll @@ -3,6 +3,7 @@ define i64 @func1(i64 %a, i32 %b) { ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: sll %s2, %s0, %s1 ; CHECK-NEXT: lea %s3, 64 ; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 @@ -21,7 +22,8 @@ define i64 @func1(i64 %a, i32 %b) { define i32 @func2(i32 %a, i32 %b) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: sla.w.sx %s2, %s0, %s1 ; CHECK-NEXT: subs.w.sx %s1, 32, %s1 ; CHECK-NEXT: and %s0, %s0, (32)0 diff --git a/llvm/test/CodeGen/VE/rotr.ll b/llvm/test/CodeGen/VE/rotr.ll index 40734a3d5178a..93dcbbc7e0a82 100644 --- a/llvm/test/CodeGen/VE/rotr.ll +++ b/llvm/test/CodeGen/VE/rotr.ll @@ -3,6 +3,7 @@ define i64 @func1(i64 %a, i32 %b) { ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 ; CHECK-NEXT: srl %s2, %s0, %s1 ; CHECK-NEXT: lea %s3, 64 ; CHECK-NEXT: subs.w.sx %s1, %s3, %s1 @@ -21,7 +22,8 @@ define i64 @func1(i64 %a, i32 %b) { define i32 @func2(i32 %a, i32 %b) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: and %s2, %s0, (32)0 ; CHECK-NEXT: srl %s2, %s2, %s1 ; CHECK-NEXT: subs.w.sx %s1, 32, %s1 diff --git a/llvm/test/CodeGen/VE/select.ll b/llvm/test/CodeGen/VE/select.ll index 6efe073381d51..81234d3d955cc 100644 --- a/llvm/test/CodeGen/VE/select.ll +++ b/llvm/test/CodeGen/VE/select.ll @@ -3,6 +3,7 @@ define double @selectf64(i1 zeroext, double, double) { ; CHECK-LABEL: selectf64: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: or %s11, 0, %s9 @@ -13,8 +14,7 @@ define double @selectf64(i1 zeroext, double, double) { define float @selectf32(i1 zeroext, float, float) { ; CHECK-LABEL: selectf32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 -; CHECK-NEXT: # kill: def $sf1 killed $sf1 def $sx1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: or %s11, 0, %s9 @@ -25,6 +25,7 @@ define float @selectf32(i1 zeroext, float, float) { define i64 @selecti64(i1 zeroext, i64, i64) { ; CHECK-LABEL: selecti64: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 ; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: or %s11, 0, %s9 @@ -35,10 +36,10 @@ define i64 @selecti64(i1 zeroext, i64, i64) { define i32 @selecti32(i1 zeroext, i32, i32) { ; CHECK-LABEL: selecti32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: # kill: def $sw1 killed $sw1 def $sx1 -; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 -; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: adds.w.sx %s3, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s2, (0)1 +; CHECK-NEXT: cmov.w.ne %s0, %s1, %s3 ; CHECK-NEXT: or %s11, 0, %s9 %4 = select i1 %0, i32 %1, i32 %2 ret i32 %4 @@ -47,10 +48,11 @@ define i32 @selecti32(i1 zeroext, i32, i32) { define zeroext i1 @selecti1(i1 zeroext, i1 zeroext, i1 zeroext) { ; CHECK-LABEL: selecti1: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: # kill: def $sw1 killed $sw1 def $sx1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 ; CHECK-NEXT: cmov.w.ne %s2, %s1, %s0 -; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: adds.w.zx %s0, %s2, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %4 = select i1 %0, i1 %1, i1 %2 ret i1 %4 diff --git a/llvm/test/CodeGen/VE/selectccf32.ll b/llvm/test/CodeGen/VE/selectccf32.ll index 748a3e9275571..2832be6bc12a6 100644 --- a/llvm/test/CodeGen/VE/selectccf32.ll +++ b/llvm/test/CodeGen/VE/selectccf32.ll @@ -23,8 +23,6 @@ define float @selectccat(float, float, float, float) { define float @selectccoeq(float, float, float, float) { ; CHECK-LABEL: selectccoeq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.eq %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -37,8 +35,6 @@ define float @selectccoeq(float, float, float, float) { define float @selectccone(float, float, float, float) { ; CHECK-LABEL: selectccone: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.ne %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -51,8 +47,6 @@ define float @selectccone(float, float, float, float) { define float @selectccogt(float, float, float, float) { ; CHECK-LABEL: selectccogt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -65,8 +59,6 @@ define float @selectccogt(float, float, float, float) { define float @selectccoge(float, float, float, float) { ; CHECK-LABEL: selectccoge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.ge %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -79,8 +71,6 @@ define float @selectccoge(float, float, float, float) { define float @selectccolt(float, float, float, float) { ; CHECK-LABEL: selectccolt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.lt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -93,8 +83,6 @@ define float @selectccolt(float, float, float, float) { define float @selectccole(float, float, float, float) { ; CHECK-LABEL: selectccole: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.le %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -107,8 +95,6 @@ define float @selectccole(float, float, float, float) { define float @selectccord(float, float, float, float) { ; CHECK-LABEL: selectccord: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.num %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -121,8 +107,6 @@ define float @selectccord(float, float, float, float) { define float @selectccuno(float, float, float, float) { ; CHECK-LABEL: selectccuno: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.nan %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -135,8 +119,6 @@ define float @selectccuno(float, float, float, float) { define float @selectccueq(float, float, float, float) { ; CHECK-LABEL: selectccueq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.eqnan %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -149,8 +131,6 @@ define float @selectccueq(float, float, float, float) { define float @selectccune(float, float, float, float) { ; CHECK-LABEL: selectccune: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.nenan %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -163,8 +143,6 @@ define float @selectccune(float, float, float, float) { define float @selectccugt(float, float, float, float) { ; CHECK-LABEL: selectccugt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.gtnan %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -177,8 +155,6 @@ define float @selectccugt(float, float, float, float) { define float @selectccuge(float, float, float, float) { ; CHECK-LABEL: selectccuge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.genan %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -191,8 +167,6 @@ define float @selectccuge(float, float, float, float) { define float @selectccult(float, float, float, float) { ; CHECK-LABEL: selectccult: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.ltnan %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -205,8 +179,6 @@ define float @selectccult(float, float, float, float) { define float @selectccule(float, float, float, float) { ; CHECK-LABEL: selectccule: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.lenan %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 diff --git a/llvm/test/CodeGen/VE/selectccf32c.ll b/llvm/test/CodeGen/VE/selectccf32c.ll index 78a9aaf96729c..54a9da4c8e460 100644 --- a/llvm/test/CodeGen/VE/selectccf32c.ll +++ b/llvm/test/CodeGen/VE/selectccf32c.ll @@ -3,12 +3,10 @@ define float @selectccsgti8(i8, i8, float, float) { ; CHECK-LABEL: selectccsgti8: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 -; CHECK-NEXT: sla.w.sx %s1, %s1, 24 -; CHECK-NEXT: sra.w.sx %s1, %s1, 24 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s1, %s1, 56 +; CHECK-NEXT: sra.l %s1, %s1, 56 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 ; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -21,12 +19,10 @@ define float @selectccsgti8(i8, i8, float, float) { define float @selectccsgti16(i16, i16, float, float) { ; CHECK-LABEL: selectccsgti16: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 -; CHECK-NEXT: sla.w.sx %s1, %s1, 16 -; CHECK-NEXT: sra.w.sx %s1, %s1, 16 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s1, %s1, 48 +; CHECK-NEXT: sra.l %s1, %s1, 48 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 ; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -39,8 +35,8 @@ define float @selectccsgti16(i16, i16, float, float) { define float @selectccsgti32(i32, i32, float, float) { ; CHECK-LABEL: selectccsgti32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 ; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -53,8 +49,6 @@ define float @selectccsgti32(i32, i32, float, float) { define float @selectccsgti64(i64, i64, float, float) { ; CHECK-LABEL: selectccsgti64: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: cmps.l %s0, %s0, %s1 ; CHECK-NEXT: cmov.l.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -67,8 +61,6 @@ define float @selectccsgti64(i64, i64, float, float) { define float @selectccsgti128(i128, i128, float, float) { ; CHECK-LABEL: selectccsgti128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf5 killed $sf5 def $sx5 -; CHECK-NEXT: # kill: def $sf4 killed $sf4 def $sx4 ; CHECK-NEXT: or %s6, 0, (0)1 ; CHECK-NEXT: cmps.l %s1, %s1, %s3 ; CHECK-NEXT: or %s3, 0, %s6 @@ -89,8 +81,6 @@ define float @selectccsgti128(i128, i128, float, float) { define float @selectccogtf32(float, float, float, float) { ; CHECK-LABEL: selectccogtf32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -103,8 +93,6 @@ define float @selectccogtf32(float, float, float, float) { define float @selectccogtf64(double, double, float, float) { ; CHECK-LABEL: selectccogtf64: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.d %s0, %s0, %s1 ; CHECK-NEXT: cmov.d.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 diff --git a/llvm/test/CodeGen/VE/selectccf32i.ll b/llvm/test/CodeGen/VE/selectccf32i.ll index e8285c818c54d..5e2698b4feb17 100644 --- a/llvm/test/CodeGen/VE/selectccf32i.ll +++ b/llvm/test/CodeGen/VE/selectccf32i.ll @@ -23,8 +23,6 @@ define float @selectccat(float, float, float, float) { define float @selectccoeq(float, float, float, float) { ; CHECK-LABEL: selectccoeq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.eq %s3, %s2, %s0 @@ -38,8 +36,6 @@ define float @selectccoeq(float, float, float, float) { define float @selectccone(float, float, float, float) { ; CHECK-LABEL: selectccone: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.ne %s3, %s2, %s0 @@ -53,8 +49,6 @@ define float @selectccone(float, float, float, float) { define float @selectccogt(float, float, float, float) { ; CHECK-LABEL: selectccogt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.gt %s3, %s2, %s0 @@ -68,8 +62,6 @@ define float @selectccogt(float, float, float, float) { define float @selectccoge(float, float, float, float) { ; CHECK-LABEL: selectccoge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.ge %s3, %s2, %s0 @@ -83,8 +75,6 @@ define float @selectccoge(float, float, float, float) { define float @selectccolt(float, float, float, float) { ; CHECK-LABEL: selectccolt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.lt %s3, %s2, %s0 @@ -98,8 +88,6 @@ define float @selectccolt(float, float, float, float) { define float @selectccole(float, float, float, float) { ; CHECK-LABEL: selectccole: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.le %s3, %s2, %s0 @@ -113,8 +101,6 @@ define float @selectccole(float, float, float, float) { define float @selectccord(float, float, float, float) { ; CHECK-LABEL: selectccord: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s0 ; CHECK-NEXT: cmov.s.num %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -127,8 +113,6 @@ define float @selectccord(float, float, float, float) { define float @selectccuno(float, float, float, float) { ; CHECK-LABEL: selectccuno: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: fcmp.s %s0, %s0, %s0 ; CHECK-NEXT: cmov.s.nan %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -141,8 +125,6 @@ define float @selectccuno(float, float, float, float) { define float @selectccueq(float, float, float, float) { ; CHECK-LABEL: selectccueq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.eqnan %s3, %s2, %s0 @@ -156,8 +138,6 @@ define float @selectccueq(float, float, float, float) { define float @selectccune(float, float, float, float) { ; CHECK-LABEL: selectccune: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.nenan %s3, %s2, %s0 @@ -171,8 +151,6 @@ define float @selectccune(float, float, float, float) { define float @selectccugt(float, float, float, float) { ; CHECK-LABEL: selectccugt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.gtnan %s3, %s2, %s0 @@ -186,8 +164,6 @@ define float @selectccugt(float, float, float, float) { define float @selectccuge(float, float, float, float) { ; CHECK-LABEL: selectccuge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.genan %s3, %s2, %s0 @@ -201,8 +177,6 @@ define float @selectccuge(float, float, float, float) { define float @selectccult(float, float, float, float) { ; CHECK-LABEL: selectccult: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.ltnan %s3, %s2, %s0 @@ -216,8 +190,6 @@ define float @selectccult(float, float, float, float) { define float @selectccule(float, float, float, float) { ; CHECK-LABEL: selectccule: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sf3 killed $sf3 def $sx3 -; CHECK-NEXT: # kill: def $sf2 killed $sf2 def $sx2 ; CHECK-NEXT: lea.sl %s1, 0 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 ; CHECK-NEXT: cmov.s.lenan %s3, %s2, %s0 diff --git a/llvm/test/CodeGen/VE/selectccf64c.ll b/llvm/test/CodeGen/VE/selectccf64c.ll index 4481d1d6197f5..24b61ece8d545 100644 --- a/llvm/test/CodeGen/VE/selectccf64c.ll +++ b/llvm/test/CodeGen/VE/selectccf64c.ll @@ -3,10 +3,10 @@ define double @selectccsgti8(i8, i8, double, double) { ; CHECK-LABEL: selectccsgti8: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s1, %s1, 24 -; CHECK-NEXT: sra.w.sx %s1, %s1, 24 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s1, %s1, 56 +; CHECK-NEXT: sra.l %s1, %s1, 56 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 ; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -19,10 +19,10 @@ define double @selectccsgti8(i8, i8, double, double) { define double @selectccsgti16(i16, i16, double, double) { ; CHECK-LABEL: selectccsgti16: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s1, %s1, 16 -; CHECK-NEXT: sra.w.sx %s1, %s1, 16 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s1, %s1, 48 +; CHECK-NEXT: sra.l %s1, %s1, 48 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 ; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -35,6 +35,8 @@ define double @selectccsgti16(i16, i16, double, double) { define double @selectccsgti32(i32, i32, double, double) { ; CHECK-LABEL: selectccsgti32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 ; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 diff --git a/llvm/test/CodeGen/VE/selectcci32.ll b/llvm/test/CodeGen/VE/selectcci32.ll index eea115c8b936b..af1861487b892 100644 --- a/llvm/test/CodeGen/VE/selectcci32.ll +++ b/llvm/test/CodeGen/VE/selectcci32.ll @@ -3,11 +3,12 @@ define i32 @selectcceq(i32, i32, i32, i32) { ; CHECK-LABEL: selectcceq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.eq %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.eq %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp eq i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -17,11 +18,12 @@ define i32 @selectcceq(i32, i32, i32, i32) { define i32 @selectccne(i32, i32, i32, i32) { ; CHECK-LABEL: selectccne: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.ne %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.ne %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ne i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -31,11 +33,12 @@ define i32 @selectccne(i32, i32, i32, i32) { define i32 @selectccsgt(i32, i32, i32, i32) { ; CHECK-LABEL: selectccsgt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sgt i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -45,11 +48,12 @@ define i32 @selectccsgt(i32, i32, i32, i32) { define i32 @selectccsge(i32, i32, i32, i32) { ; CHECK-LABEL: selectccsge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.ge %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.ge %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sge i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -59,11 +63,12 @@ define i32 @selectccsge(i32, i32, i32, i32) { define i32 @selectccslt(i32, i32, i32, i32) { ; CHECK-LABEL: selectccslt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp slt i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -73,11 +78,12 @@ define i32 @selectccslt(i32, i32, i32, i32) { define i32 @selectccsle(i32, i32, i32, i32) { ; CHECK-LABEL: selectccsle: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.le %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.le %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sle i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -87,11 +93,12 @@ define i32 @selectccsle(i32, i32, i32, i32) { define i32 @selectccugt(i32, i32, i32, i32) { ; CHECK-LABEL: selectccugt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ugt i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -101,11 +108,12 @@ define i32 @selectccugt(i32, i32, i32, i32) { define i32 @selectccuge(i32, i32, i32, i32) { ; CHECK-LABEL: selectccuge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.ge %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.ge %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp uge i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -115,11 +123,12 @@ define i32 @selectccuge(i32, i32, i32, i32) { define i32 @selectccult(i32, i32, i32, i32) { ; CHECK-LABEL: selectccult: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ult i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -129,11 +138,12 @@ define i32 @selectccult(i32, i32, i32, i32) { define i32 @selectccule(i32, i32, i32, i32) { ; CHECK-LABEL: selectccule: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.le %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.le %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ule i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -143,11 +153,12 @@ define i32 @selectccule(i32, i32, i32, i32) { define i32 @selectccugt2(i32, i32, i32, i32) { ; CHECK-LABEL: selectccugt2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ugt i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -157,11 +168,12 @@ define i32 @selectccugt2(i32, i32, i32, i32) { define i32 @selectccuge2(i32, i32, i32, i32) { ; CHECK-LABEL: selectccuge2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.ge %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.ge %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp uge i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -171,11 +183,12 @@ define i32 @selectccuge2(i32, i32, i32, i32) { define i32 @selectccult2(i32, i32, i32, i32) { ; CHECK-LABEL: selectccult2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ult i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -185,11 +198,12 @@ define i32 @selectccult2(i32, i32, i32, i32) { define i32 @selectccule2(i32, i32, i32, i32) { ; CHECK-LABEL: selectccule2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.le %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.le %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ule i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 diff --git a/llvm/test/CodeGen/VE/selectcci32c.ll b/llvm/test/CodeGen/VE/selectcci32c.ll index 474e9e0dcd4dd..e4017c46c5790 100644 --- a/llvm/test/CodeGen/VE/selectcci32c.ll +++ b/llvm/test/CodeGen/VE/selectcci32c.ll @@ -3,15 +3,14 @@ define i32 @selectccsgti8(i8, i8, i32, i32) { ; CHECK-LABEL: selectccsgti8: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: sla.w.sx %s1, %s1, 24 -; CHECK-NEXT: sra.w.sx %s1, %s1, 24 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: sll %s1, %s1, 56 +; CHECK-NEXT: sra.l %s1, %s1, 56 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s4, %s0, 56 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sgt i8 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -21,15 +20,14 @@ define i32 @selectccsgti8(i8, i8, i32, i32) { define i32 @selectccsgti16(i16, i16, i32, i32) { ; CHECK-LABEL: selectccsgti16: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: sla.w.sx %s1, %s1, 16 -; CHECK-NEXT: sra.w.sx %s1, %s1, 16 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: sll %s1, %s1, 48 +; CHECK-NEXT: sra.l %s1, %s1, 48 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s4, %s0, 48 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sgt i16 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -39,11 +37,12 @@ define i32 @selectccsgti16(i16, i16, i32, i32) { define i32 @selectccsgti32(i32, i32, i32, i32) { ; CHECK-LABEL: selectccsgti32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s1 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sgt i32 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -53,11 +52,11 @@ define i32 @selectccsgti32(i32, i32, i32, i32) { define i32 @selectccsgti64(i64, i64, i32, i32) { ; CHECK-LABEL: selectccsgti64: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 +; CHECK-NEXT: adds.w.sx %s4, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s3, (0)1 ; CHECK-NEXT: cmps.l %s0, %s0, %s1 -; CHECK-NEXT: cmov.l.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: cmov.l.gt %s2, %s4, %s0 +; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sgt i64 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -67,19 +66,19 @@ define i32 @selectccsgti64(i64, i64, i32, i32) { define i32 @selectccsgti128(i128, i128, i32, i32) { ; CHECK-LABEL: selectccsgti128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw5 killed $sw5 def $sx5 -; CHECK-NEXT: # kill: def $sw4 killed $sw4 def $sx4 -; CHECK-NEXT: or %s6, 0, (0)1 +; CHECK-NEXT: adds.w.sx %s6, %s4, (0)1 +; CHECK-NEXT: adds.w.sx %s4, %s5, (0)1 +; CHECK-NEXT: or %s5, 0, (0)1 ; CHECK-NEXT: cmps.l %s1, %s1, %s3 -; CHECK-NEXT: or %s3, 0, %s6 +; CHECK-NEXT: or %s3, 0, %s5 ; CHECK-NEXT: cmov.l.gt %s3, (63)0, %s1 ; CHECK-NEXT: cmpu.l %s0, %s0, %s2 -; CHECK-NEXT: cmov.l.gt %s6, (63)0, %s0 -; CHECK-NEXT: cmov.l.eq %s3, %s6, %s1 +; CHECK-NEXT: cmov.l.gt %s5, (63)0, %s0 +; CHECK-NEXT: cmov.l.eq %s3, %s5, %s1 ; CHECK-NEXT: or %s0, 0, (0)1 ; CHECK-NEXT: cmps.w.sx %s0, %s3, %s0 -; CHECK-NEXT: cmov.w.ne %s5, %s4, %s0 -; CHECK-NEXT: or %s0, 0, %s5 +; CHECK-NEXT: cmov.w.ne %s4, %s6, %s0 +; CHECK-NEXT: or %s0, 0, %s4 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sgt i128 %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -89,11 +88,11 @@ define i32 @selectccsgti128(i128, i128, i32, i32) { define i32 @selectccogtf32(float, float, i32, i32) { ; CHECK-LABEL: selectccogtf32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 +; CHECK-NEXT: adds.w.sx %s4, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s3, (0)1 ; CHECK-NEXT: fcmp.s %s0, %s0, %s1 -; CHECK-NEXT: cmov.s.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: cmov.s.gt %s2, %s4, %s0 +; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: or %s11, 0, %s9 %5 = fcmp ogt float %0, %1 %6 = select i1 %5, i32 %2, i32 %3 @@ -103,11 +102,11 @@ define i32 @selectccogtf32(float, float, i32, i32) { define i32 @selectccogtf64(double, double, i32, i32) { ; CHECK-LABEL: selectccogtf64: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 +; CHECK-NEXT: adds.w.sx %s4, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s3, (0)1 ; CHECK-NEXT: fcmp.d %s0, %s0, %s1 -; CHECK-NEXT: cmov.d.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: cmov.d.gt %s2, %s4, %s0 +; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: or %s11, 0, %s9 %5 = fcmp ogt double %0, %1 %6 = select i1 %5, i32 %2, i32 %3 diff --git a/llvm/test/CodeGen/VE/selectcci32i.ll b/llvm/test/CodeGen/VE/selectcci32i.ll index 0e17f83ad1781..a4cccd0ebf935 100644 --- a/llvm/test/CodeGen/VE/selectcci32i.ll +++ b/llvm/test/CodeGen/VE/selectcci32i.ll @@ -3,12 +3,12 @@ define i32 @selectcceq(i32, i32, i32, i32) { ; CHECK-LABEL: selectcceq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.eq %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 12, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.eq %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp eq i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -18,12 +18,12 @@ define i32 @selectcceq(i32, i32, i32, i32) { define i32 @selectccne(i32, i32, i32, i32) { ; CHECK-LABEL: selectccne: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.ne %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 12, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.ne %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ne i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -33,12 +33,12 @@ define i32 @selectccne(i32, i32, i32, i32) { define i32 @selectccsgt(i32, i32, i32, i32) { ; CHECK-LABEL: selectccsgt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 12, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sgt i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -48,12 +48,12 @@ define i32 @selectccsgt(i32, i32, i32, i32) { define i32 @selectccsge(i32, i32, i32, i32) { ; CHECK-LABEL: selectccsge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 11, (0)1 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 11, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sge i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -63,12 +63,12 @@ define i32 @selectccsge(i32, i32, i32, i32) { define i32 @selectccslt(i32, i32, i32, i32) { ; CHECK-LABEL: selectccslt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 12, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp slt i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -78,12 +78,12 @@ define i32 @selectccslt(i32, i32, i32, i32) { define i32 @selectccsle(i32, i32, i32, i32) { ; CHECK-LABEL: selectccsle: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 13, (0)1 -; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 13, (0)1 +; CHECK-NEXT: cmps.w.sx %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp sle i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -93,12 +93,12 @@ define i32 @selectccsle(i32, i32, i32, i32) { define i32 @selectccugt(i32, i32, i32, i32) { ; CHECK-LABEL: selectccugt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 12, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ugt i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -108,12 +108,12 @@ define i32 @selectccugt(i32, i32, i32, i32) { define i32 @selectccuge(i32, i32, i32, i32) { ; CHECK-LABEL: selectccuge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 11, (0)1 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 11, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp uge i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -123,12 +123,12 @@ define i32 @selectccuge(i32, i32, i32, i32) { define i32 @selectccult(i32, i32, i32, i32) { ; CHECK-LABEL: selectccult: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 12, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ult i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -138,12 +138,12 @@ define i32 @selectccult(i32, i32, i32, i32) { define i32 @selectccule(i32, i32, i32, i32) { ; CHECK-LABEL: selectccule: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 13, (0)1 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 13, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ule i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -153,12 +153,12 @@ define i32 @selectccule(i32, i32, i32, i32) { define i32 @selectccugt2(i32, i32, i32, i32) { ; CHECK-LABEL: selectccugt2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 12, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ugt i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -168,12 +168,12 @@ define i32 @selectccugt2(i32, i32, i32, i32) { define i32 @selectccuge2(i32, i32, i32, i32) { ; CHECK-LABEL: selectccuge2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 11, (0)1 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 11, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.gt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp uge i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -183,12 +183,12 @@ define i32 @selectccuge2(i32, i32, i32, i32) { define i32 @selectccult2(i32, i32, i32, i32) { ; CHECK-LABEL: selectccult2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 12, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ult i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 @@ -198,12 +198,12 @@ define i32 @selectccult2(i32, i32, i32, i32) { define i32 @selectccule2(i32, i32, i32, i32) { ; CHECK-LABEL: selectccule2: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: # kill: def $sw3 killed $sw3 def $sx3 -; CHECK-NEXT: # kill: def $sw2 killed $sw2 def $sx2 -; CHECK-NEXT: or %s1, 13, (0)1 -; CHECK-NEXT: cmpu.w %s0, %s0, %s1 -; CHECK-NEXT: cmov.w.lt %s3, %s2, %s0 -; CHECK-NEXT: or %s0, 0, %s3 +; CHECK-NEXT: adds.w.sx %s1, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s2, %s2, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s3, (0)1 +; CHECK-NEXT: or %s3, 13, (0)1 +; CHECK-NEXT: cmpu.w %s1, %s1, %s3 +; CHECK-NEXT: cmov.w.lt %s0, %s2, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %5 = icmp ule i32 %0, 12 %6 = select i1 %5, i32 %2, i32 %3 diff --git a/llvm/test/CodeGen/VE/selectcci64c.ll b/llvm/test/CodeGen/VE/selectcci64c.ll index 7bb7d7fee1c31..276f23d9a5ffd 100644 --- a/llvm/test/CodeGen/VE/selectcci64c.ll +++ b/llvm/test/CodeGen/VE/selectcci64c.ll @@ -3,10 +3,10 @@ define i64 @selectccsgti8(i8, i8, i64, i64) { ; CHECK-LABEL: selectccsgti8: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s1, %s1, 24 -; CHECK-NEXT: sra.w.sx %s1, %s1, 24 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s1, %s1, 56 +; CHECK-NEXT: sra.l %s1, %s1, 56 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 ; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -19,10 +19,10 @@ define i64 @selectccsgti8(i8, i8, i64, i64) { define i64 @selectccsgti16(i16, i16, i64, i64) { ; CHECK-LABEL: selectccsgti16: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: sla.w.sx %s1, %s1, 16 -; CHECK-NEXT: sra.w.sx %s1, %s1, 16 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s1, %s1, 48 +; CHECK-NEXT: sra.l %s1, %s1, 48 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 ; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 @@ -35,6 +35,8 @@ define i64 @selectccsgti16(i16, i16, i64, i64) { define i64 @selectccsgti32(i32, i32, i64, i64) { ; CHECK-LABEL: selectccsgti32: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 ; CHECK-NEXT: cmov.w.gt %s3, %s2, %s0 ; CHECK-NEXT: or %s0, 0, %s3 diff --git a/llvm/test/CodeGen/VE/setccf32.ll b/llvm/test/CodeGen/VE/setccf32.ll index 6ced8ce53b9c4..f2e9062fcf624 100644 --- a/llvm/test/CodeGen/VE/setccf32.ll +++ b/llvm/test/CodeGen/VE/setccf32.ll @@ -21,10 +21,10 @@ define zeroext i1 @setccat(float, float) { define zeroext i1 @setccoeq(float, float) { ; CHECK-LABEL: setccoeq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.eq %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp oeq float %0, %1 ret i1 %3 @@ -33,10 +33,10 @@ define zeroext i1 @setccoeq(float, float) { define zeroext i1 @setccone(float, float) { ; CHECK-LABEL: setccone: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.ne %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.ne %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp one float %0, %1 ret i1 %3 @@ -45,10 +45,10 @@ define zeroext i1 @setccone(float, float) { define zeroext i1 @setccogt(float, float) { ; CHECK-LABEL: setccogt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ogt float %0, %1 ret i1 %3 @@ -57,10 +57,10 @@ define zeroext i1 @setccogt(float, float) { define zeroext i1 @setccoge(float, float) { ; CHECK-LABEL: setccoge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.ge %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.ge %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp oge float %0, %1 ret i1 %3 @@ -69,10 +69,10 @@ define zeroext i1 @setccoge(float, float) { define zeroext i1 @setccolt(float, float) { ; CHECK-LABEL: setccolt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp olt float %0, %1 ret i1 %3 @@ -81,10 +81,10 @@ define zeroext i1 @setccolt(float, float) { define zeroext i1 @setccole(float, float) { ; CHECK-LABEL: setccole: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.le %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.le %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ole float %0, %1 ret i1 %3 @@ -93,10 +93,10 @@ define zeroext i1 @setccole(float, float) { define zeroext i1 @setccord(float, float) { ; CHECK-LABEL: setccord: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.num %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.num %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ord float %0, %1 ret i1 %3 @@ -105,10 +105,10 @@ define zeroext i1 @setccord(float, float) { define zeroext i1 @setccuno(float, float) { ; CHECK-LABEL: setccuno: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.nan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.nan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp uno float %0, %1 ret i1 %3 @@ -117,10 +117,10 @@ define zeroext i1 @setccuno(float, float) { define zeroext i1 @setccueq(float, float) { ; CHECK-LABEL: setccueq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.eqnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.eqnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ueq float %0, %1 ret i1 %3 @@ -129,10 +129,10 @@ define zeroext i1 @setccueq(float, float) { define zeroext i1 @setccune(float, float) { ; CHECK-LABEL: setccune: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.nenan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.nenan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp une float %0, %1 ret i1 %3 @@ -141,10 +141,10 @@ define zeroext i1 @setccune(float, float) { define zeroext i1 @setccugt(float, float) { ; CHECK-LABEL: setccugt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.gtnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.gtnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ugt float %0, %1 ret i1 %3 @@ -153,10 +153,10 @@ define zeroext i1 @setccugt(float, float) { define zeroext i1 @setccuge(float, float) { ; CHECK-LABEL: setccuge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.genan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.genan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp uge float %0, %1 ret i1 %3 @@ -165,10 +165,10 @@ define zeroext i1 @setccuge(float, float) { define zeroext i1 @setccult(float, float) { ; CHECK-LABEL: setccult: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.ltnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.ltnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ult float %0, %1 ret i1 %3 @@ -177,10 +177,10 @@ define zeroext i1 @setccult(float, float) { define zeroext i1 @setccule(float, float) { ; CHECK-LABEL: setccule: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.lenan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.lenan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ule float %0, %1 ret i1 %3 diff --git a/llvm/test/CodeGen/VE/setccf32i.ll b/llvm/test/CodeGen/VE/setccf32i.ll index 8f79219359f3c..3f90a103fec53 100644 --- a/llvm/test/CodeGen/VE/setccf32i.ll +++ b/llvm/test/CodeGen/VE/setccf32i.ll @@ -22,10 +22,10 @@ define zeroext i1 @setccoeq(float, float) { ; CHECK-LABEL: setccoeq: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.eq %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp oeq float %0, 0.0 ret i1 %3 @@ -35,10 +35,10 @@ define zeroext i1 @setccone(float, float) { ; CHECK-LABEL: setccone: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.ne %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.ne %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp one float %0, 0.0 ret i1 %3 @@ -48,10 +48,10 @@ define zeroext i1 @setccogt(float, float) { ; CHECK-LABEL: setccogt: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ogt float %0, 0.0 ret i1 %3 @@ -61,10 +61,10 @@ define zeroext i1 @setccoge(float, float) { ; CHECK-LABEL: setccoge: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.ge %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.ge %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp oge float %0, 0.0 ret i1 %3 @@ -74,10 +74,10 @@ define zeroext i1 @setccolt(float, float) { ; CHECK-LABEL: setccolt: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp olt float %0, 0.0 ret i1 %3 @@ -87,10 +87,10 @@ define zeroext i1 @setccole(float, float) { ; CHECK-LABEL: setccole: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.le %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.le %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ole float %0, 0.0 ret i1 %3 @@ -99,10 +99,10 @@ define zeroext i1 @setccole(float, float) { define zeroext i1 @setccord(float, float) { ; CHECK-LABEL: setccord: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s0 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.num %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.num %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ord float %0, 0.0 ret i1 %3 @@ -111,10 +111,10 @@ define zeroext i1 @setccord(float, float) { define zeroext i1 @setccuno(float, float) { ; CHECK-LABEL: setccuno: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.s %s1, %s0, %s0 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.nan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.nan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp uno float %0, 0.0 ret i1 %3 @@ -124,10 +124,10 @@ define zeroext i1 @setccueq(float, float) { ; CHECK-LABEL: setccueq: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.eqnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.eqnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ueq float %0, 0.0 ret i1 %3 @@ -137,10 +137,10 @@ define zeroext i1 @setccune(float, float) { ; CHECK-LABEL: setccune: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.nenan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.nenan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp une float %0, 0.0 ret i1 %3 @@ -150,10 +150,10 @@ define zeroext i1 @setccugt(float, float) { ; CHECK-LABEL: setccugt: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.gtnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.gtnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ugt float %0, 0.0 ret i1 %3 @@ -163,10 +163,10 @@ define zeroext i1 @setccuge(float, float) { ; CHECK-LABEL: setccuge: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.genan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.genan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp uge float %0, 0.0 ret i1 %3 @@ -176,10 +176,10 @@ define zeroext i1 @setccult(float, float) { ; CHECK-LABEL: setccult: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.ltnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.ltnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ult float %0, 0.0 ret i1 %3 @@ -189,10 +189,10 @@ define zeroext i1 @setccule(float, float) { ; CHECK-LABEL: setccule: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.s %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.s.lenan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.s %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.s.lenan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ule float %0, 0.0 ret i1 %3 diff --git a/llvm/test/CodeGen/VE/setccf64.ll b/llvm/test/CodeGen/VE/setccf64.ll index dca40e8231fa9..98c0e6c56bf41 100644 --- a/llvm/test/CodeGen/VE/setccf64.ll +++ b/llvm/test/CodeGen/VE/setccf64.ll @@ -21,10 +21,10 @@ define zeroext i1 @setccat(double, double) { define zeroext i1 @setccoeq(double, double) { ; CHECK-LABEL: setccoeq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.eq %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp oeq double %0, %1 ret i1 %3 @@ -33,10 +33,10 @@ define zeroext i1 @setccoeq(double, double) { define zeroext i1 @setccone(double, double) { ; CHECK-LABEL: setccone: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.ne %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.ne %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp one double %0, %1 ret i1 %3 @@ -45,10 +45,10 @@ define zeroext i1 @setccone(double, double) { define zeroext i1 @setccogt(double, double) { ; CHECK-LABEL: setccogt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ogt double %0, %1 ret i1 %3 @@ -57,10 +57,10 @@ define zeroext i1 @setccogt(double, double) { define zeroext i1 @setccoge(double, double) { ; CHECK-LABEL: setccoge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.ge %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.ge %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp oge double %0, %1 ret i1 %3 @@ -69,10 +69,10 @@ define zeroext i1 @setccoge(double, double) { define zeroext i1 @setccolt(double, double) { ; CHECK-LABEL: setccolt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp olt double %0, %1 ret i1 %3 @@ -81,10 +81,10 @@ define zeroext i1 @setccolt(double, double) { define zeroext i1 @setccole(double, double) { ; CHECK-LABEL: setccole: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.le %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.le %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ole double %0, %1 ret i1 %3 @@ -93,10 +93,10 @@ define zeroext i1 @setccole(double, double) { define zeroext i1 @setccord(double, double) { ; CHECK-LABEL: setccord: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.num %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.num %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ord double %0, %1 ret i1 %3 @@ -105,10 +105,10 @@ define zeroext i1 @setccord(double, double) { define zeroext i1 @setccuno(double, double) { ; CHECK-LABEL: setccuno: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.nan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.nan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp uno double %0, %1 ret i1 %3 @@ -117,10 +117,10 @@ define zeroext i1 @setccuno(double, double) { define zeroext i1 @setccueq(double, double) { ; CHECK-LABEL: setccueq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.eqnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.eqnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ueq double %0, %1 ret i1 %3 @@ -129,10 +129,10 @@ define zeroext i1 @setccueq(double, double) { define zeroext i1 @setccune(double, double) { ; CHECK-LABEL: setccune: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.nenan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.nenan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp une double %0, %1 ret i1 %3 @@ -141,10 +141,10 @@ define zeroext i1 @setccune(double, double) { define zeroext i1 @setccugt(double, double) { ; CHECK-LABEL: setccugt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.gtnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.gtnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ugt double %0, %1 ret i1 %3 @@ -153,10 +153,10 @@ define zeroext i1 @setccugt(double, double) { define zeroext i1 @setccuge(double, double) { ; CHECK-LABEL: setccuge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.genan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.genan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp uge double %0, %1 ret i1 %3 @@ -165,10 +165,10 @@ define zeroext i1 @setccuge(double, double) { define zeroext i1 @setccult(double, double) { ; CHECK-LABEL: setccult: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.ltnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.ltnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ult double %0, %1 ret i1 %3 @@ -177,10 +177,10 @@ define zeroext i1 @setccult(double, double) { define zeroext i1 @setccule(double, double) { ; CHECK-LABEL: setccule: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.lenan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.lenan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ule double %0, %1 ret i1 %3 diff --git a/llvm/test/CodeGen/VE/setccf64i.ll b/llvm/test/CodeGen/VE/setccf64i.ll index 59af1b4103f6b..f3a8d2f35f6f7 100644 --- a/llvm/test/CodeGen/VE/setccf64i.ll +++ b/llvm/test/CodeGen/VE/setccf64i.ll @@ -22,10 +22,10 @@ define zeroext i1 @setccoeq(double, double) { ; CHECK-LABEL: setccoeq: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.eq %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp oeq double %0, 0.0 ret i1 %3 @@ -35,10 +35,10 @@ define zeroext i1 @setccone(double, double) { ; CHECK-LABEL: setccone: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.ne %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.ne %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp one double %0, 0.0 ret i1 %3 @@ -48,10 +48,10 @@ define zeroext i1 @setccogt(double, double) { ; CHECK-LABEL: setccogt: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ogt double %0, 0.0 ret i1 %3 @@ -61,10 +61,10 @@ define zeroext i1 @setccoge(double, double) { ; CHECK-LABEL: setccoge: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.ge %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.ge %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp oge double %0, 0.0 ret i1 %3 @@ -74,10 +74,10 @@ define zeroext i1 @setccolt(double, double) { ; CHECK-LABEL: setccolt: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp olt double %0, 0.0 ret i1 %3 @@ -87,10 +87,10 @@ define zeroext i1 @setccole(double, double) { ; CHECK-LABEL: setccole: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.le %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.le %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ole double %0, 0.0 ret i1 %3 @@ -99,10 +99,10 @@ define zeroext i1 @setccole(double, double) { define zeroext i1 @setccord(double, double) { ; CHECK-LABEL: setccord: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s0 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.num %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.num %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ord double %0, 0.0 ret i1 %3 @@ -111,10 +111,10 @@ define zeroext i1 @setccord(double, double) { define zeroext i1 @setccuno(double, double) { ; CHECK-LABEL: setccuno: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: fcmp.d %s1, %s0, %s0 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.nan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.nan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp uno double %0, 0.0 ret i1 %3 @@ -124,10 +124,10 @@ define zeroext i1 @setccueq(double, double) { ; CHECK-LABEL: setccueq: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.eqnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.eqnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ueq double %0, 0.0 ret i1 %3 @@ -137,10 +137,10 @@ define zeroext i1 @setccune(double, double) { ; CHECK-LABEL: setccune: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.nenan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.nenan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp une double %0, 0.0 ret i1 %3 @@ -150,10 +150,10 @@ define zeroext i1 @setccugt(double, double) { ; CHECK-LABEL: setccugt: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.gtnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.gtnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ugt double %0, 0.0 ret i1 %3 @@ -163,10 +163,10 @@ define zeroext i1 @setccuge(double, double) { ; CHECK-LABEL: setccuge: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.genan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.genan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp uge double %0, 0.0 ret i1 %3 @@ -176,10 +176,10 @@ define zeroext i1 @setccult(double, double) { ; CHECK-LABEL: setccult: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.ltnan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.ltnan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ult double %0, 0.0 ret i1 %3 @@ -189,10 +189,10 @@ define zeroext i1 @setccule(double, double) { ; CHECK-LABEL: setccule: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea.sl %s1, 0 -; CHECK-NEXT: fcmp.d %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.d.lenan %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: fcmp.d %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.d.lenan %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = fcmp ule double %0, 0.0 ret i1 %3 diff --git a/llvm/test/CodeGen/VE/setcci32.ll b/llvm/test/CodeGen/VE/setcci32.ll index 7e92a2c7f5ede..15bf130fec8b4 100644 --- a/llvm/test/CodeGen/VE/setcci32.ll +++ b/llvm/test/CodeGen/VE/setcci32.ll @@ -3,10 +3,12 @@ define zeroext i1 @setcceq(i32, i32) { ; CHECK-LABEL: setcceq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp eq i32 %0, %1 ret i1 %3 @@ -15,10 +17,12 @@ define zeroext i1 @setcceq(i32, i32) { define zeroext i1 @setccne(i32, i32) { ; CHECK-LABEL: setccne: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.ne %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.ne %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ne i32 %0, %1 ret i1 %3 @@ -27,10 +31,12 @@ define zeroext i1 @setccne(i32, i32) { define zeroext i1 @setccugt(i32, i32) { ; CHECK-LABEL: setccugt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmpu.w %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmpu.w %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ugt i32 %0, %1 ret i1 %3 @@ -39,10 +45,12 @@ define zeroext i1 @setccugt(i32, i32) { define zeroext i1 @setccuge(i32, i32) { ; CHECK-LABEL: setccuge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmpu.w %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.ge %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmpu.w %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.ge %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp uge i32 %0, %1 ret i1 %3 @@ -51,10 +59,12 @@ define zeroext i1 @setccuge(i32, i32) { define zeroext i1 @setccult(i32, i32) { ; CHECK-LABEL: setccult: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmpu.w %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmpu.w %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ult i32 %0, %1 ret i1 %3 @@ -63,10 +73,12 @@ define zeroext i1 @setccult(i32, i32) { define zeroext i1 @setccule(i32, i32) { ; CHECK-LABEL: setccule: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmpu.w %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.le %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmpu.w %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.le %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ule i32 %0, %1 ret i1 %3 @@ -75,10 +87,12 @@ define zeroext i1 @setccule(i32, i32) { define zeroext i1 @setccsgt(i32, i32) { ; CHECK-LABEL: setccsgt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sgt i32 %0, %1 ret i1 %3 @@ -87,10 +101,12 @@ define zeroext i1 @setccsgt(i32, i32) { define zeroext i1 @setccsge(i32, i32) { ; CHECK-LABEL: setccsge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.ge %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.ge %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sge i32 %0, %1 ret i1 %3 @@ -99,10 +115,12 @@ define zeroext i1 @setccsge(i32, i32) { define zeroext i1 @setccslt(i32, i32) { ; CHECK-LABEL: setccslt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp slt i32 %0, %1 ret i1 %3 @@ -111,10 +129,12 @@ define zeroext i1 @setccslt(i32, i32) { define zeroext i1 @setccsle(i32, i32) { ; CHECK-LABEL: setccsle: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.le %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.le %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sle i32 %0, %1 ret i1 %3 diff --git a/llvm/test/CodeGen/VE/setcci32i.ll b/llvm/test/CodeGen/VE/setcci32i.ll index c0d1fb0a67e4a..ac226190ae44f 100644 --- a/llvm/test/CodeGen/VE/setcci32i.ll +++ b/llvm/test/CodeGen/VE/setcci32i.ll @@ -3,11 +3,12 @@ define zeroext i1 @setcceq(i32, i32) { ; CHECK-LABEL: setcceq: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp eq i32 %0, 12 ret i1 %3 @@ -16,11 +17,12 @@ define zeroext i1 @setcceq(i32, i32) { define zeroext i1 @setccne(i32, i32) { ; CHECK-LABEL: setccne: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.ne %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.ne %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ne i32 %0, 12 ret i1 %3 @@ -29,11 +31,12 @@ define zeroext i1 @setccne(i32, i32) { define zeroext i1 @setccugt(i32, i32) { ; CHECK-LABEL: setccugt: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmpu.w %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.w %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ugt i32 %0, 12 ret i1 %3 @@ -42,11 +45,12 @@ define zeroext i1 @setccugt(i32, i32) { define zeroext i1 @setccuge(i32, i32) { ; CHECK-LABEL: setccuge: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 11, (0)1 -; CHECK-NEXT: cmpu.w %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.w %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp uge i32 %0, 12 ret i1 %3 @@ -55,11 +59,12 @@ define zeroext i1 @setccuge(i32, i32) { define zeroext i1 @setccult(i32, i32) { ; CHECK-LABEL: setccult: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmpu.w %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.w %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ult i32 %0, 12 ret i1 %3 @@ -68,11 +73,12 @@ define zeroext i1 @setccult(i32, i32) { define zeroext i1 @setccule(i32, i32) { ; CHECK-LABEL: setccule: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 13, (0)1 -; CHECK-NEXT: cmpu.w %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.w %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ule i32 %0, 12 ret i1 %3 @@ -81,11 +87,12 @@ define zeroext i1 @setccule(i32, i32) { define zeroext i1 @setccsgt(i32, i32) { ; CHECK-LABEL: setccsgt: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sgt i32 %0, 12 ret i1 %3 @@ -94,11 +101,12 @@ define zeroext i1 @setccsgt(i32, i32) { define zeroext i1 @setccsge(i32, i32) { ; CHECK-LABEL: setccsge: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 11, (0)1 -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sge i32 %0, 12 ret i1 %3 @@ -107,11 +115,12 @@ define zeroext i1 @setccsge(i32, i32) { define zeroext i1 @setccslt(i32, i32) { ; CHECK-LABEL: setccslt: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp slt i32 %0, 12 ret i1 %3 @@ -120,11 +129,12 @@ define zeroext i1 @setccslt(i32, i32) { define zeroext i1 @setccsle(i32, i32) { ; CHECK-LABEL: setccsle: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s1, 13, (0)1 -; CHECK-NEXT: cmps.w.sx %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.w.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.w.sx %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.w.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sle i32 %0, 12 ret i1 %3 diff --git a/llvm/test/CodeGen/VE/setcci64.ll b/llvm/test/CodeGen/VE/setcci64.ll index 8b86601594da9..5cae80a60f06d 100644 --- a/llvm/test/CodeGen/VE/setcci64.ll +++ b/llvm/test/CodeGen/VE/setcci64.ll @@ -3,10 +3,10 @@ define zeroext i1 @setcceq(i64, i64) { ; CHECK-LABEL: setcceq: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.eq %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp eq i64 %0, %1 ret i1 %3 @@ -15,10 +15,10 @@ define zeroext i1 @setcceq(i64, i64) { define zeroext i1 @setccne(i64, i64) { ; CHECK-LABEL: setccne: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.ne %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.ne %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ne i64 %0, %1 ret i1 %3 @@ -27,10 +27,10 @@ define zeroext i1 @setccne(i64, i64) { define zeroext i1 @setccugt(i64, i64) { ; CHECK-LABEL: setccugt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmpu.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ugt i64 %0, %1 ret i1 %3 @@ -39,10 +39,10 @@ define zeroext i1 @setccugt(i64, i64) { define zeroext i1 @setccuge(i64, i64) { ; CHECK-LABEL: setccuge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmpu.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.ge %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.ge %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp uge i64 %0, %1 ret i1 %3 @@ -51,10 +51,10 @@ define zeroext i1 @setccuge(i64, i64) { define zeroext i1 @setccult(i64, i64) { ; CHECK-LABEL: setccult: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmpu.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ult i64 %0, %1 ret i1 %3 @@ -63,10 +63,10 @@ define zeroext i1 @setccult(i64, i64) { define zeroext i1 @setccule(i64, i64) { ; CHECK-LABEL: setccule: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmpu.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.le %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.le %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ule i64 %0, %1 ret i1 %3 @@ -75,10 +75,10 @@ define zeroext i1 @setccule(i64, i64) { define zeroext i1 @setccsgt(i64, i64) { ; CHECK-LABEL: setccsgt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sgt i64 %0, %1 ret i1 %3 @@ -87,10 +87,10 @@ define zeroext i1 @setccsgt(i64, i64) { define zeroext i1 @setccsge(i64, i64) { ; CHECK-LABEL: setccsge: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.ge %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.ge %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sge i64 %0, %1 ret i1 %3 @@ -99,10 +99,10 @@ define zeroext i1 @setccsge(i64, i64) { define zeroext i1 @setccslt(i64, i64) { ; CHECK-LABEL: setccslt: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp slt i64 %0, %1 ret i1 %3 @@ -111,10 +111,10 @@ define zeroext i1 @setccslt(i64, i64) { define zeroext i1 @setccsle(i64, i64) { ; CHECK-LABEL: setccsle: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.le %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.le %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sle i64 %0, %1 ret i1 %3 diff --git a/llvm/test/CodeGen/VE/setcci64i.ll b/llvm/test/CodeGen/VE/setcci64i.ll index aecbe40b0a34d..c73db4c72276d 100644 --- a/llvm/test/CodeGen/VE/setcci64i.ll +++ b/llvm/test/CodeGen/VE/setcci64i.ll @@ -4,10 +4,10 @@ define zeroext i1 @setcceq(i64, i64) { ; CHECK-LABEL: setcceq: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.eq %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp eq i64 %0, 12 ret i1 %3 @@ -17,10 +17,10 @@ define zeroext i1 @setccne(i64, i64) { ; CHECK-LABEL: setccne: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.ne %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.ne %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ne i64 %0, 12 ret i1 %3 @@ -30,10 +30,10 @@ define zeroext i1 @setccugt(i64, i64) { ; CHECK-LABEL: setccugt: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmpu.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ugt i64 %0, 12 ret i1 %3 @@ -43,10 +43,10 @@ define zeroext i1 @setccuge(i64, i64) { ; CHECK-LABEL: setccuge: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 11, (0)1 -; CHECK-NEXT: cmpu.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp uge i64 %0, 12 ret i1 %3 @@ -56,10 +56,10 @@ define zeroext i1 @setccult(i64, i64) { ; CHECK-LABEL: setccult: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmpu.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ult i64 %0, 12 ret i1 %3 @@ -69,10 +69,10 @@ define zeroext i1 @setccule(i64, i64) { ; CHECK-LABEL: setccule: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 13, (0)1 -; CHECK-NEXT: cmpu.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmpu.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp ule i64 %0, 12 ret i1 %3 @@ -82,10 +82,10 @@ define zeroext i1 @setccsgt(i64, i64) { ; CHECK-LABEL: setccsgt: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sgt i64 %0, 12 ret i1 %3 @@ -95,10 +95,10 @@ define zeroext i1 @setccsge(i64, i64) { ; CHECK-LABEL: setccsge: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 11, (0)1 -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.gt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.gt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sge i64 %0, 12 ret i1 %3 @@ -108,10 +108,10 @@ define zeroext i1 @setccslt(i64, i64) { ; CHECK-LABEL: setccslt: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 12, (0)1 -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp slt i64 %0, 12 ret i1 %3 @@ -121,10 +121,10 @@ define zeroext i1 @setccsle(i64, i64) { ; CHECK-LABEL: setccsle: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s1, 13, (0)1 -; CHECK-NEXT: cmps.l %s1, %s0, %s1 -; CHECK-NEXT: or %s0, 0, (0)1 -; CHECK-NEXT: cmov.l.lt %s0, (63)0, %s1 -; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: cmps.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = icmp sle i64 %0, 12 ret i1 %3 diff --git a/llvm/test/CodeGen/VE/sext_zext_load.ll b/llvm/test/CodeGen/VE/sext_zext_load.ll index b9fc6bc4daf74..600a02a5b1303 100644 --- a/llvm/test/CodeGen/VE/sext_zext_load.ll +++ b/llvm/test/CodeGen/VE/sext_zext_load.ll @@ -267,8 +267,8 @@ define signext i8 @func37() { ; CHECK-LABEL: func37: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: ld1b.zx %s0, 191(, %s11) -; CHECK-NEXT: sla.w.sx %s0, %s0, 31 -; CHECK-NEXT: sra.w.sx %s0, %s0, 31 +; CHECK-NEXT: sll %s0, %s0, 63 +; CHECK-NEXT: sra.l %s0, %s0, 63 ; CHECK-NEXT: or %s11, 0, %s9 %a = alloca i1, align 1 %a.val = load i1, i1* %a, align 1 @@ -280,8 +280,8 @@ define signext i16 @func38() { ; CHECK-LABEL: func38: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: ld1b.zx %s0, 191(, %s11) -; CHECK-NEXT: sla.w.sx %s0, %s0, 31 -; CHECK-NEXT: sra.w.sx %s0, %s0, 31 +; CHECK-NEXT: sll %s0, %s0, 63 +; CHECK-NEXT: sra.l %s0, %s0, 63 ; CHECK-NEXT: or %s11, 0, %s9 %a = alloca i1, align 1 %a.val = load i1, i1* %a, align 1 @@ -293,8 +293,8 @@ define signext i32 @func39() { ; CHECK-LABEL: func39: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: ld1b.zx %s0, 191(, %s11) -; CHECK-NEXT: sla.w.sx %s0, %s0, 31 -; CHECK-NEXT: sra.w.sx %s0, %s0, 31 +; CHECK-NEXT: sll %s0, %s0, 63 +; CHECK-NEXT: sra.l %s0, %s0, 63 ; CHECK-NEXT: or %s11, 0, %s9 %a = alloca i1, align 1 %a.val = load i1, i1* %a, align 1 diff --git a/llvm/test/CodeGen/VE/subtraction.ll b/llvm/test/CodeGen/VE/subtraction.ll index 1bd85d4290929..43a30bfe1e1b7 100644 --- a/llvm/test/CodeGen/VE/subtraction.ll +++ b/llvm/test/CodeGen/VE/subtraction.ll @@ -3,9 +3,11 @@ define signext i8 @func1(i8 signext %0, i8 signext %1) { ; CHECK-LABEL: func1: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %3 = sub i8 %0, %1 ret i8 %3 @@ -14,9 +16,11 @@ define signext i8 @func1(i8 signext %0, i8 signext %1) { define signext i16 @func2(i16 signext %0, i16 signext %1) { ; CHECK-LABEL: func2: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %3 = sub i16 %0, %1 ret i16 %3 @@ -25,6 +29,8 @@ define signext i16 @func2(i16 signext %0, i16 signext %1) { define i32 @func3(i32 %0, i32 %1) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = sub nsw i32 %0, %1 @@ -43,6 +49,8 @@ define i64 @func4(i64 %0, i64 %1) { define zeroext i8 @func6(i8 zeroext %0, i8 zeroext %1) { ; CHECK-LABEL: func6: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -53,6 +61,8 @@ define zeroext i8 @func6(i8 zeroext %0, i8 zeroext %1) { define zeroext i16 @func7(i16 zeroext %0, i16 zeroext %1) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -63,6 +73,8 @@ define zeroext i16 @func7(i16 zeroext %0, i16 zeroext %1) { define i32 @func8(i32 %0, i32 %1) { ; CHECK-LABEL: func8: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s1, %s1, (0)1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: or %s11, 0, %s9 %3 = sub i32 %0, %1 @@ -81,9 +93,10 @@ define i64 @func9(i64 %0, i64 %1) { define signext i8 @func13(i8 signext %0, i8 signext %1) { ; CHECK-LABEL: func13: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, -5, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 24 -; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: sll %s0, %s0, 56 +; CHECK-NEXT: sra.l %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 %3 = add i8 %0, -5 ret i8 %3 @@ -92,9 +105,10 @@ define signext i8 @func13(i8 signext %0, i8 signext %1) { define signext i16 @func14(i16 signext %0, i16 signext %1) { ; CHECK-LABEL: func14: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, -5, %s0 -; CHECK-NEXT: sla.w.sx %s0, %s0, 16 -; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: sll %s0, %s0, 48 +; CHECK-NEXT: sra.l %s0, %s0, 48 ; CHECK-NEXT: or %s11, 0, %s9 %3 = add i16 %0, -5 ret i16 %3 @@ -103,6 +117,7 @@ define signext i16 @func14(i16 signext %0, i16 signext %1) { define i32 @func15(i32 %0, i32 %1) { ; CHECK-LABEL: func15: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, -5, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %3 = add nsw i32 %0, -5 @@ -121,6 +136,7 @@ define i64 @func16(i64 %0, i64 %1) { define zeroext i8 @func18(i8 zeroext %0, i8 zeroext %1) { ; CHECK-LABEL: func18: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, -5, %s0 ; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -131,6 +147,7 @@ define zeroext i8 @func18(i8 zeroext %0, i8 zeroext %1) { define zeroext i16 @func19(i16 zeroext %0, i16 zeroext %1) { ; CHECK-LABEL: func19: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, -5, %s0 ; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -141,6 +158,7 @@ define zeroext i16 @func19(i16 zeroext %0, i16 zeroext %1) { define i32 @func20(i32 %0, i32 %1) { ; CHECK-LABEL: func20: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s0, -5, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %3 = add i32 %0, -5 diff --git a/llvm/test/CodeGen/VE/truncstore.ll b/llvm/test/CodeGen/VE/truncstore.ll index 357cc6b117914..97a4da4cd93be 100644 --- a/llvm/test/CodeGen/VE/truncstore.ll +++ b/llvm/test/CodeGen/VE/truncstore.ll @@ -33,7 +33,6 @@ define void @func2(i8 signext %p, i32* %a) { define void @func3(i8 signext %p, i64* %a) { ; CHECK-LABEL: func3: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: st %s0, (, %s1) ; CHECK-NEXT: or %s11, 0, %s9 %p.conv = sext i8 %p to i64 @@ -54,7 +53,6 @@ define void @func5(i16 signext %p, i32* %a) { define void @func6(i16 signext %p, i64* %a) { ; CHECK-LABEL: func6: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: st %s0, (, %s1) ; CHECK-NEXT: or %s11, 0, %s9 %p.conv = sext i16 %p to i64 diff --git a/llvm/test/CodeGen/VE/va_caller.ll b/llvm/test/CodeGen/VE/va_caller.ll index 345ab80867f1f..b43ce999c589d 100644 --- a/llvm/test/CodeGen/VE/va_caller.ll +++ b/llvm/test/CodeGen/VE/va_caller.ll @@ -6,38 +6,36 @@ define i32 @caller() { ; CHECK-LABEL: caller: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill -; CHECK-NEXT: or %s7, 0, (0)1 -; CHECK-NEXT: st %s7, 280(, %s11) +; CHECK-NEXT: or %s18, 0, (0)1 +; CHECK-NEXT: st %s18, 280(, %s11) ; CHECK-NEXT: or %s0, 11, (0)1 ; CHECK-NEXT: st %s0, 272(, %s11) -; CHECK-NEXT: st %s7, 264(, %s11) +; CHECK-NEXT: st %s18, 264(, %s11) ; CHECK-NEXT: or %s0, 10, (0)1 ; CHECK-NEXT: st %s0, 256(, %s11) ; CHECK-NEXT: lea.sl %s0, 1075970048 ; CHECK-NEXT: st %s0, 248(, %s11) ; CHECK-NEXT: or %s0, 8, (0)1 ; CHECK-NEXT: st %s0, 240(, %s11) -; CHECK-NEXT: st %s7, 232(, %s11) -; CHECK-NEXT: lea %s0, 1086324736 -; CHECK-NEXT: stl %s0, 228(, %s11) +; CHECK-NEXT: st %s18, 232(, %s11) ; CHECK-NEXT: or %s5, 5, (0)1 -; CHECK-NEXT: stl %s5, 216(, %s11) +; CHECK-NEXT: st %s5, 216(, %s11) ; CHECK-NEXT: or %s4, 4, (0)1 -; CHECK-NEXT: stl %s4, 208(, %s11) +; CHECK-NEXT: st %s4, 208(, %s11) ; CHECK-NEXT: or %s3, 3, (0)1 -; CHECK-NEXT: stl %s3, 200(, %s11) +; CHECK-NEXT: st %s3, 200(, %s11) ; CHECK-NEXT: or %s2, 2, (0)1 -; CHECK-NEXT: stl %s2, 192(, %s11) +; CHECK-NEXT: st %s2, 192(, %s11) ; CHECK-NEXT: or %s1, 1, (0)1 -; CHECK-NEXT: stl %s1, 184(, %s11) -; CHECK-NEXT: or %s18, 0, (0)1 +; CHECK-NEXT: st %s1, 184(, %s11) +; CHECK-NEXT: st %s18, 176(, %s11) +; CHECK-NEXT: lea.sl %s6, 1086324736 ; CHECK-NEXT: lea %s0, func@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, func@hi(, %s0) -; CHECK-NEXT: lea.sl %s6, 1086324736 -; CHECK-NEXT: stl %s18, 176(, %s11) +; CHECK-NEXT: st %s6, 224(, %s11) ; CHECK-NEXT: or %s0, 0, %s18 -; CHECK-NEXT: # kill: def $sf6 killed $sf6 killed $sx6 +; CHECK-NEXT: or %s7, 0, %s18 ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s0, 0, %s18 ; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/VE/xor.ll b/llvm/test/CodeGen/VE/xor.ll index d1701f6ef5017..b3336bb72ff5a 100644 --- a/llvm/test/CodeGen/VE/xor.ll +++ b/llvm/test/CodeGen/VE/xor.ll @@ -21,7 +21,9 @@ define zeroext i8 @func8z(i8 zeroext %a, i8 zeroext %b) { define signext i8 @funci8s(i8 signext %a) { ; CHECK-LABEL: funci8s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, 5, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = xor i8 %a, 5 ret i8 %res @@ -30,8 +32,10 @@ define signext i8 @funci8s(i8 signext %a) { define zeroext i8 @funci8z(i8 zeroext %a) { ; CHECK-LABEL: funci8z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: lea %s1, 251 ; CHECK-NEXT: xor %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = xor i8 -5, %a ret i8 %res @@ -58,7 +62,9 @@ define zeroext i16 @func16z(i16 zeroext %a, i16 zeroext %b) { define signext i16 @funci16s(i16 signext %a) { ; CHECK-LABEL: funci16s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = xor i16 %a, 65535 ret i16 %res @@ -67,7 +73,9 @@ define signext i16 @funci16s(i16 signext %a) { define zeroext i16 @funci16z(i16 zeroext %a) { ; CHECK-LABEL: funci16z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, %s0, (52)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = xor i16 4095, %a ret i16 %res @@ -94,7 +102,9 @@ define zeroext i32 @func32z(i32 zeroext %a, i32 zeroext %b) { define signext i32 @funci32s(i32 signext %a) { ; CHECK-LABEL: funci32s: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, %s0, (36)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = xor i32 %a, 268435455 ret i32 %res @@ -103,7 +113,9 @@ define signext i32 @funci32s(i32 signext %a) { define zeroext i32 @funci32z(i32 zeroext %a) { ; CHECK-LABEL: funci32z: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, %s0, (36)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %res = xor i32 %a, 268435455 ret i32 %res @@ -112,6 +124,7 @@ define zeroext i32 @funci32z(i32 zeroext %a) { define i32 @funci32_another(i32 %0) { ; CHECK-LABEL: funci32_another: ; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: xor %s0, %s0, (33)1 ; CHECK-NEXT: or %s11, 0, %s9 %2 = xor i32 %0, -2147483648 From 914dbf4808d46632cc7b8dda861a11f978083416 Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Thu, 16 Apr 2020 09:42:39 -0500 Subject: [PATCH 078/600] PowerPC: Fix SPE extloadf32 handling. The patterns were incorrect copies from the FPU code, and are unnecessary, since there's no extended load for SPE. Just let LLVM itself do the work by marking it expand. Reviewed By: #powerpc, lkail Differential Revision: https://reviews.llvm.org/D78670 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 + llvm/lib/Target/PowerPC/PPCInstrSPE.td | 10 ---- llvm/test/CodeGen/PowerPC/spe.ll | 61 +++++++++++++++++++++ 3 files changed, 64 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 13dbcbe10d337..69afed8fb41a7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -339,6 +339,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FMA , MVT::f32, Legal); } + if (Subtarget.hasSPE()) + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td index 858eb0c9fe500..299b34ca82836 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td +++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td @@ -819,16 +819,6 @@ def SPESTWX : XForm_8<31, 151, (outs), (ins spe4rc:$rS, memrr:$dst), } // HasSPE -let Predicates = [HasSPE] in { -def : Pat<(f64 (extloadf32 iaddr:$src)), - (COPY_TO_REGCLASS (SPELWZ iaddr:$src), SPERC)>; -def : Pat<(f64 (extloadf32 xaddr:$src)), - (COPY_TO_REGCLASS (SPELWZX xaddr:$src), SPERC)>; - -def : Pat<(f64 (fpextend f32:$src)), - (COPY_TO_REGCLASS $src, SPERC)>; -} - let Predicates = [HasSPE] in { def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst), (ins crrc:$cond, spe4rc:$T, spe4rc:$F, diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index 1c4c7a3398177..1e4cecb1d136d 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1422,3 +1422,64 @@ for.body: ; preds = %for.body, %entry declare float @llvm.fma.f32(float, float, float) #1 attributes #1 = { nounwind readnone speculatable willreturn } + +%struct.a = type { float, float } + +define void @d(%struct.a* %e, %struct.a* %f) { +; CHECK-LABEL: d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stw 0, 4(1) +; CHECK-NEXT: stwu 1, -48(1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 4 +; CHECK-NEXT: .cfi_offset r29, -12 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: .cfi_offset r29, -40 +; CHECK-NEXT: .cfi_offset r30, -32 +; CHECK-NEXT: lwz 4, 0(4) +; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill +; CHECK-NEXT: efdcfs 29, 4 +; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; CHECK-NEXT: mr 4, 29 +; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill +; CHECK-NEXT: efdcfs 30, 3 +; CHECK-NEXT: evmergehi 3, 29, 29 +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 +; CHECK-NEXT: bctrl +; CHECK-NEXT: evmergehi 3, 30, 30 +; CHECK-NEXT: mr 4, 30 +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 +; CHECK-NEXT: bctrl +; CHECK-NEXT: li 3, .LCPI58_0@l +; CHECK-NEXT: lis 4, .LCPI58_0@ha +; CHECK-NEXT: evlddx 3, 4, 3 +; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload +; CHECK-NEXT: efdmul 3, 29, 3 +; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; CHECK-NEXT: efscfd 3, 3 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 0, 52(1) +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %0 = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0 + %1 = load float, float* undef + %conv = fpext float %1 to double + %2 = load float, float* %0 + %g = fpext float %2 to double + %3 = call i32 undef(double %g) + %h = call i32 undef(double %conv) + %n = sitofp i32 %3 to double + %k = fmul double %g, %n + %l = fptrunc double %k to float + store float %l, float* undef + ret void +} From 7e9153e940e21a937ff3a0e7425eb1b24bd1bb76 Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Wed, 8 Apr 2020 21:29:40 -0500 Subject: [PATCH 079/600] PowerPC: Don't lower SELECT_CC to PPCISD::FSEL on SPE SPE doesn't have a fsel instruction, so don't try to lower to it. This fixes a "Cannot select: tN: f64 = PPCISD::FSEL tX, tY, tZ" error. Reviewed By: #powerpc, lkail Differential Revision: https://reviews.llvm.org/D77773 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 +-- llvm/test/CodeGen/PowerPC/spe-fastmath.ll | 31 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/spe-fastmath.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 69afed8fb41a7..85f1630d8e223 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7797,9 +7797,9 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op, /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - // Not FP? Not a fsel. + // Not FP, or using SPE? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || - !Op.getOperand(2).getValueType().isFloatingPoint()) + !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE()) return Op; ISD::CondCode CC = cast(Op.getOperand(4))->get(); diff --git a/llvm/test/CodeGen/PowerPC/spe-fastmath.ll b/llvm/test/CodeGen/PowerPC/spe-fastmath.ll new file mode 100644 index 0000000000000..d2b83f7ee1da2 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/spe-fastmath.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %s + +define void @no_fsel(i32 %e) #0 { +; CHECK-LABEL: no_fsel: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li 4, .LCPI0_0@l +; CHECK-NEXT: lis 5, .LCPI0_0@ha +; CHECK-NEXT: evlddx 4, 5, 4 +; CHECK-NEXT: efdcfui 3, 3 +; CHECK-NEXT: efdmul 5, 3, 3 +; CHECK-NEXT: efdcmpeq 0, 5, 4 +; CHECK-NEXT: ble 0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: evor 3, 4, 4 +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: efdctsiz 3, 3 +; CHECK-NEXT: sth 3, 0(3) +; CHECK-NEXT: blr +entry: + %conv = uitofp i32 %e to double + %mul = fmul double %conv, %conv + %tobool = fcmp une double %mul, 0.000000e+00 + %cond = select i1 %tobool, double %conv, double 0.000000e+00 + %conv3 = fptosi double %cond to i16 + store i16 %conv3, i16* undef + ret void +} + +attributes #0 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" } From 1bd7046e4ce0102adef6096a12a289d7f94b8c73 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Jul 2020 21:38:59 -0700 Subject: [PATCH 080/600] [X86] Use TargetLowering::getRegClassFor to simplify some code in tryVPTESTM. NFCI --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 3b333496dd748..bb04690e04d16 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4336,18 +4336,6 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, } } - auto getMaskRC = [](MVT MaskVT) { - switch (MaskVT.SimpleTy) { - default: llvm_unreachable("Unexpected VT!"); - case MVT::v2i1: return X86::VK2RegClassID; - case MVT::v4i1: return X86::VK4RegClassID; - case MVT::v8i1: return X86::VK8RegClassID; - case MVT::v16i1: return X86::VK16RegClassID; - case MVT::v32i1: return X86::VK32RegClassID; - case MVT::v64i1: return X86::VK64RegClassID; - } - }; - bool IsMasked = InMask.getNode() != nullptr; SDLoc dl(Root); @@ -4371,7 +4359,7 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, if (IsMasked) { // Widen the mask. - unsigned RegClass = getMaskRC(MaskVT); + unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID(); SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, MaskVT, InMask, RC), 0); @@ -4409,7 +4397,7 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, // If we widened, we need to shrink the mask VT. if (Widen) { - unsigned RegClass = getMaskRC(ResVT); + unsigned RegClass = TLI->getRegClassFor(ResVT)->getID(); SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, ResVT, SDValue(CNode, 0), RC); From 8c5edf50234f52e0de37df3e4e7cec92bdb70e12 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Sat, 1 Aug 2020 01:28:15 -0400 Subject: [PATCH 081/600] [SCEV] don't query getSCEV() for incomplete phis querying getSCEV() for incomplete phis leads to wrong cache value in `ExprToIVMap`, because incomplete phis may be simplified to same value before get SCEV expression. Reviewed By: lebedev.ri, mkazantsev Differential Revision: https://reviews.llvm.org/D77560 --- llvm/include/llvm/IR/Instructions.h | 10 +++ .../Utils/ScalarEvolutionExpander.cpp | 10 +++ .../Power/incomplete-phi.ll | 66 +++++++++++++++++++ .../LoopStrengthReduce/Power/lit.local.cfg | 2 + 4 files changed, 88 insertions(+) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/Power/incomplete-phi.ll create mode 100644 llvm/test/Transforms/LoopStrengthReduce/Power/lit.local.cfg diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 0afc585dfbe5a..63194fa93cbca 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -27,6 +27,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -2743,6 +2744,15 @@ class PHINode : public Instruction { /// non-undef value. bool hasConstantOrUndefValue() const; + /// If the PHI node is complete which means all of its parent's predecessors + /// have incoming value in this PHI, return true, otherwise return false. + bool isComplete() const { + return llvm::all_of(predecessors(getParent()), + [this](const BasicBlock *Pred) { + return getBasicBlockIndex(Pred) >= 0; + }); + } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::PHI; diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index d31bf6791075a..555da5df65e15 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1187,6 +1187,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (!SE.isSCEVable(PN.getType())) continue; + // We should not look for a incomplete PHI. Getting SCEV for a incomplete + // PHI has no meaning at all. + if (!PN.isComplete()) { + DEBUG_WITH_TYPE( + DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n"); + continue; + } + const SCEVAddRecExpr *PhiSCEV = dyn_cast(SE.getSCEV(&PN)); if (!PhiSCEV) continue; @@ -2102,6 +2110,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, } DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); + DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Original iv: " + << *OrigPhiRef << '\n'); ++NumElim; Value *NewIV = OrigPhiRef; if (OrigPhiRef->getType() != Phi->getType()) { diff --git a/llvm/test/Transforms/LoopStrengthReduce/Power/incomplete-phi.ll b/llvm/test/Transforms/LoopStrengthReduce/Power/incomplete-phi.ll new file mode 100644 index 0000000000000..2abdc54bbad71 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/Power/incomplete-phi.ll @@ -0,0 +1,66 @@ +; REQUIRES: asserts +; RUN: opt -loop-reduce -debug-only=loop-reduce -S < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" +; +; %lsr.iv2 and %lsr.iv10 are in same bb, but they are not equal since start +; value are different. +; +; %scevgep = getelementptr [0 x %0], [0 x %0]* %arg, i64 0, i64 99 +; %scevgep1 = bitcast %0* %scevgep to [0 x %0]* +; %lsr.iv2 = phi [0 x %0]* [ %1, %bb18 ], [ %scevgep1, %bb ] +; +; %lsr.iv10 = phi [0 x %0]* [ %2, %bb18 ], [ %arg, %bb ] +; +; Make sure two incomplete phis will not be marked as congruent. +; +; CHECK: One incomplete PHI is found: %[[IV:.*]] = phi [0 x %0]* +; CHECK: One incomplete PHI is found: %[[IV2:.*]] = phi [0 x %0]* +; CHECK-NOT: Eliminated congruent iv: %[[IV]] +; CHECK-NOT: Original iv: %[[IV2]] +; CHECK-NOT: Eliminated congruent iv: %[[IV2]] +; CHECK-NOT: Original iv: %[[IV]] + +%0 = type <{ float }> + +define void @foo([0 x %0]* %arg) { +bb: + %i = getelementptr inbounds [0 x %0], [0 x %0]* %arg, i64 0, i64 -1 + %i1 = bitcast %0* %i to i8* + %i2 = getelementptr i8, i8* %i1, i64 4 + br label %bb3 + +bb3: ; preds = %bb18, %bb + %i4 = phi i64 [ %i20, %bb18 ], [ 0, %bb ] + %i5 = phi i64 [ %i21, %bb18 ], [ 1, %bb ] + br i1 undef, label %bb22, label %bb9 + +bb9: ; preds = %bb9, %bb3 + %i10 = phi i64 [ 0, %bb3 ], [ %i16, %bb9 ] + %i11 = add i64 %i10, %i4 + %i12 = shl i64 %i11, 2 + %i13 = getelementptr i8, i8* %i2, i64 %i12 + %i14 = bitcast i8* %i13 to float* + %i15 = bitcast float* %i14 to <4 x float>* + store <4 x float> undef, <4 x float>* %i15, align 4 + %i16 = add i64 %i10, 32 + br i1 true, label %bb17, label %bb9 + +bb17: ; preds = %bb9 + br i1 undef, label %bb18, label %bb22 + +bb18: ; preds = %bb17 + %i19 = add i64 undef, %i4 + %i20 = add i64 %i19, %i5 + %i21 = add nuw nsw i64 %i5, 1 + br label %bb3 + +bb22: ; preds = %bb22, %bb17, %bb3 + %i23 = phi i64 [ %i26, %bb22 ], [ undef, %bb17 ], [ 100, %bb3 ] + %i24 = add nsw i64 %i23, %i4 + %i25 = getelementptr %0, %0* %i, i64 %i24, i32 0 + store float undef, float* %i25, align 4 + %i26 = add nuw nsw i64 %i23, 1 + br label %bb22 +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/Power/lit.local.cfg b/llvm/test/Transforms/LoopStrengthReduce/Power/lit.local.cfg new file mode 100644 index 0000000000000..091332439b186 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/Power/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True From 75f134eec1ca30a4080e38d6d364ed932c0322ab Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 31 Jul 2020 23:10:47 -0700 Subject: [PATCH 082/600] [X86] Refactor the broadcast and load folding in tryVPTESTM to reduce some code. Now we try to load and broadcast together for operand 1. Followed by load and broadcast for operand 1. Previously we tried load operand 1, load operand 1, broadcast operand 0, broadcast operand 1. Now we have a single helper that tries load and broadcast for one operand that we can just call twice. --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 104 +++++++++++------------- 1 file changed, 46 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index bb04690e04d16..58424892535a7 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4207,15 +4207,15 @@ VPTESTM_CASE(v16i16, WZ256##SUFFIX) \ VPTESTM_CASE(v64i8, BZ##SUFFIX) \ VPTESTM_CASE(v32i16, WZ##SUFFIX) - if (FoldedLoad) { + if (FoldedBCast) { switch (TestVT.SimpleTy) { - VPTESTM_FULL_CASES(rm) + VPTESTM_BROADCAST_CASES(rmb) } } - if (FoldedBCast) { + if (FoldedLoad) { switch (TestVT.SimpleTy) { - VPTESTM_BROADCAST_CASES(rmb) + VPTESTM_FULL_CASES(rm) } } @@ -4274,68 +4274,57 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, } } - // Without VLX we need to widen the load. + // Without VLX we need to widen the operation. bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector(); - // We can only fold loads if the sources are unique. - bool CanFoldLoads = Src0 != Src1; + auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L, + SDValue &Base, SDValue &Scale, SDValue &Index, + SDValue &Disp, SDValue &Segment) { + // If we need to widen, we can't fold the load. + if (!Widen) + if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment)) + return true; - // Try to fold loads unless we need to widen. - bool FoldedLoad = false; - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Load; - if (!Widen && CanFoldLoads) { - Load = Src1; - FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, Tmp3, - Tmp4); - if (!FoldedLoad) { - // And is computative. - Load = Src0; - FoldedLoad = tryFoldLoad(Root, N0.getNode(), Load, Tmp0, Tmp1, Tmp2, - Tmp3, Tmp4); - if (FoldedLoad) - std::swap(Src0, Src1); - } - } + // If we didn't fold a load, try to match broadcast. No widening limitation + // for this. But only 32 and 64 bit types are supported. + if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64) + return false; - auto findBroadcastedOp = [](SDValue Src, MVT CmpSVT, SDNode *&Parent) { // Look through single use bitcasts. - if (Src.getOpcode() == ISD::BITCAST && Src.hasOneUse()) { - Parent = Src.getNode(); - Src = Src.getOperand(0); + if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { + P = L.getNode(); + L = L.getOperand(0); } - if (Src.getOpcode() == X86ISD::VBROADCAST_LOAD && Src.hasOneUse()) { - auto *MemIntr = cast(Src); - if (MemIntr->getMemoryVT().getSizeInBits() == CmpSVT.getSizeInBits()) - return Src; - } + if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) + return false; - return SDValue(); + auto *MemIntr = cast(L); + if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits()) + return false; + + return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment); }; - // If we didn't fold a load, try to match broadcast. No widening limitation - // for this. But only 32 and 64 bit types are supported. - bool FoldedBCast = false; - if (!FoldedLoad && CanFoldLoads && - (CmpSVT == MVT::i32 || CmpSVT == MVT::i64)) { - SDNode *ParentNode = N0.getNode(); - if ((Load = findBroadcastedOp(Src1, CmpSVT, ParentNode))) { - FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0, - Tmp1, Tmp2, Tmp3, Tmp4); - } + // We can only fold loads if the sources are unique. + bool CanFoldLoads = Src0 != Src1; - // Try the other operand. - if (!FoldedBCast) { - SDNode *ParentNode = N0.getNode(); - if ((Load = findBroadcastedOp(Src0, CmpSVT, ParentNode))) { - FoldedBCast = tryFoldBroadcast(Root, ParentNode, Load, Tmp0, - Tmp1, Tmp2, Tmp3, Tmp4); - if (FoldedBCast) - std::swap(Src0, Src1); - } + bool FoldedLoad = false; + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + if (CanFoldLoads) { + FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2, + Tmp3, Tmp4); + if (!FoldedLoad) { + // And is commutative. + FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1, + Tmp2, Tmp3, Tmp4); + if (FoldedLoad) + std::swap(Src0, Src1); } } + bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD; + bool IsMasked = InMask.getNode() != nullptr; SDLoc dl(Root); @@ -4353,7 +4342,6 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, CmpVT), 0); Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0); - assert(!FoldedLoad && "Shouldn't have folded the load"); if (!FoldedBCast) Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1); @@ -4371,23 +4359,23 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, IsMasked); MachineSDNode *CNode; - if (FoldedLoad || FoldedBCast) { + if (FoldedLoad) { SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other); if (IsMasked) { SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, - Load.getOperand(0) }; + Src1.getOperand(0) }; CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); } else { SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, - Load.getOperand(0) }; + Src1.getOperand(0) }; CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); } // Update the chain. - ReplaceUses(Load.getValue(1), SDValue(CNode, 1)); + ReplaceUses(Src1.getValue(1), SDValue(CNode, 1)); // Record the mem-refs - CurDAG->setNodeMemRefs(CNode, {cast(Load)->getMemOperand()}); + CurDAG->setNodeMemRefs(CNode, {cast(Src1)->getMemOperand()}); } else { if (IsMasked) CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1); From 4888c9ce97d8c20d988212b10f1045e3c4022b8e Mon Sep 17 00:00:00 2001 From: Nathan James Date: Sat, 1 Aug 2020 10:35:13 +0100 Subject: [PATCH 083/600] [clang-tidy] readability-identifier-naming checks configs for included files When checking for the style of a decl that isn't in the main file, the check will now search for the configuration that the included files uses to gather the style for its decls. This can be useful to silence warnings in header files that follow a different naming convention without using header-filter to silence all warnings(even from other checks) in the header file. Reviewed By: aaron.ballman, gribozavr2 Differential Revision: https://reviews.llvm.org/D84814 --- .../readability/IdentifierNamingCheck.cpp | 147 ++++++++++-------- .../readability/IdentifierNamingCheck.h | 13 +- clang-tools-extra/docs/ReleaseNotes.rst | 9 +- .../checks/readability-identifier-naming.rst | 8 + .../global-style-disabled/.clang-tidy | 5 + .../global-style-disabled/header.h | 3 + .../global-style1/.clang-tidy | 5 + .../global-style1/header.h | 5 + .../global-style2/.clang-tidy | 5 + .../global-style2/header.h | 5 + ...lity-identifier-naming-multiple-styles.cpp | 64 ++++++++ 11 files changed, 201 insertions(+), 68 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/.clang-tidy create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/header.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/.clang-tidy create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/header.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/.clang-tidy create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/header.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming-multiple-styles.cpp diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index c885aac89072a..e004ce6fbd208 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -8,6 +8,7 @@ #include "IdentifierNamingCheck.h" +#include "../GlobList.h" #include "clang/AST/CXXInheritance.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" @@ -15,7 +16,8 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" -#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" #define DEBUG_TYPE "clang-tidy" @@ -119,41 +121,47 @@ static StringRef const StyleNames[] = { #undef NAMING_KEYS // clang-format on +static std::vector> +getNamingStyles(const ClangTidyCheck::OptionsView &Options) { + std::vector> Styles; + Styles.reserve(StyleNames->size()); + for (auto const &StyleName : StyleNames) { + auto CaseOptional = Options.getOptional( + (StyleName + "Case").str()); + auto Prefix = Options.get((StyleName + "Prefix").str(), ""); + auto Postfix = Options.get((StyleName + "Suffix").str(), ""); + + if (CaseOptional || !Prefix.empty() || !Postfix.empty()) + Styles.emplace_back(IdentifierNamingCheck::NamingStyle{ + std::move(CaseOptional), std::move(Prefix), std::move(Postfix)}); + else + Styles.emplace_back(llvm::None); + } + return Styles; +} + IdentifierNamingCheck::IdentifierNamingCheck(StringRef Name, ClangTidyContext *Context) - : RenamerClangTidyCheck(Name, Context), + : RenamerClangTidyCheck(Name, Context), Context(Context), CheckName(Name), + GetConfigPerFile(Options.get("GetConfigPerFile", true)), IgnoreFailedSplit(Options.get("IgnoreFailedSplit", false)), IgnoreMainLikeFunctions(Options.get("IgnoreMainLikeFunctions", false)) { - for (auto const &Name : StyleNames) { - auto CaseOptional = [&]() -> llvm::Optional { - auto ValueOr = Options.get((Name + "Case").str()); - if (ValueOr) - return *ValueOr; - llvm::logAllUnhandledErrors( - llvm::handleErrors(ValueOr.takeError(), - [](const MissingOptionError &) -> llvm::Error { - return llvm::Error::success(); - }), - llvm::errs(), "warning: "); - return llvm::None; - }(); - - auto prefix = Options.get((Name + "Prefix").str(), ""); - auto postfix = Options.get((Name + "Suffix").str(), ""); - - if (CaseOptional || !prefix.empty() || !postfix.empty()) { - NamingStyles.push_back(NamingStyle(CaseOptional, prefix, postfix)); - } else { - NamingStyles.push_back(llvm::None); - } - } + auto IterAndInserted = NamingStylesCache.try_emplace( + llvm::sys::path::parent_path(Context->getCurrentFile()), + getNamingStyles(Options)); + assert(IterAndInserted.second && "Couldn't insert Style"); + // Holding a reference to the data in the vector is safe as it should never + // move. + MainFileStyle = IterAndInserted.first->getValue(); } IdentifierNamingCheck::~IdentifierNamingCheck() = default; void IdentifierNamingCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { RenamerClangTidyCheck::storeOptions(Opts); + ArrayRef> NamingStyles = + getStyleForFile(Context->getCurrentFile()); for (size_t i = 0; i < SK_Count; ++i) { if (NamingStyles[i]) { if (NamingStyles[i]->Case) { @@ -166,7 +174,7 @@ void IdentifierNamingCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { NamingStyles[i]->Suffix); } } - + Options.store(Opts, "GetConfigPerFile", GetConfigPerFile); Options.store(Opts, "IgnoreFailedSplit", IgnoreFailedSplit); Options.store(Opts, "IgnoreMainLikeFunctions", IgnoreMainLikeFunctions); } @@ -374,8 +382,7 @@ fixupWithStyle(StringRef Name, static StyleKind findStyleKind( const NamedDecl *D, - const std::vector> - &NamingStyles, + ArrayRef> NamingStyles, bool IgnoreMainLikeFunctions) { assert(D && D->getIdentifier() && !D->getName().empty() && !D->isImplicit() && "Decl must be an explicit identifier with a name."); @@ -652,63 +659,56 @@ static StyleKind findStyleKind( return SK_Invalid; } -llvm::Optional -IdentifierNamingCheck::GetDeclFailureInfo(const NamedDecl *Decl, - const SourceManager &SM) const { - StyleKind SK = findStyleKind(Decl, NamingStyles, IgnoreMainLikeFunctions); - if (SK == SK_Invalid) +static llvm::Optional getFailureInfo( + StringRef Name, SourceLocation Location, + ArrayRef> NamingStyles, + StyleKind SK, const SourceManager &SM, bool IgnoreFailedSplit) { + if (SK == SK_Invalid || !NamingStyles[SK]) return None; - if (!NamingStyles[SK]) - return None; - - const NamingStyle &Style = *NamingStyles[SK]; - StringRef Name = Decl->getName(); + const IdentifierNamingCheck::NamingStyle &Style = *NamingStyles[SK]; if (matchesStyle(Name, Style)) return None; - std::string KindName = fixupWithCase(StyleNames[SK], CT_LowerCase); + std::string KindName = + fixupWithCase(StyleNames[SK], IdentifierNamingCheck::CT_LowerCase); std::replace(KindName.begin(), KindName.end(), '_', ' '); std::string Fixup = fixupWithStyle(Name, Style); if (StringRef(Fixup).equals(Name)) { if (!IgnoreFailedSplit) { - LLVM_DEBUG(llvm::dbgs() - << Decl->getBeginLoc().printToString(SM) - << llvm::format(": unable to split words for %s '%s'\n", - KindName.c_str(), Name.str().c_str())); + LLVM_DEBUG(Location.print(llvm::dbgs(), SM); + llvm::dbgs() + << llvm::formatv(": unable to split words for {0} '{1}'\n", + KindName, Name)); } return None; } - return FailureInfo{std::move(KindName), std::move(Fixup)}; + return RenamerClangTidyCheck::FailureInfo{std::move(KindName), + std::move(Fixup)}; +} + +llvm::Optional +IdentifierNamingCheck::GetDeclFailureInfo(const NamedDecl *Decl, + const SourceManager &SM) const { + SourceLocation Loc = Decl->getLocation(); + ArrayRef> NamingStyles = + getStyleForFile(SM.getFilename(Loc)); + + return getFailureInfo( + Decl->getName(), Loc, NamingStyles, + findStyleKind(Decl, NamingStyles, IgnoreMainLikeFunctions), SM, + IgnoreFailedSplit); } llvm::Optional IdentifierNamingCheck::GetMacroFailureInfo(const Token &MacroNameTok, const SourceManager &SM) const { - if (!NamingStyles[SK_MacroDefinition]) - return None; - - StringRef Name = MacroNameTok.getIdentifierInfo()->getName(); - const NamingStyle &Style = *NamingStyles[SK_MacroDefinition]; - if (matchesStyle(Name, Style)) - return None; + SourceLocation Loc = MacroNameTok.getLocation(); - std::string KindName = - fixupWithCase(StyleNames[SK_MacroDefinition], CT_LowerCase); - std::replace(KindName.begin(), KindName.end(), '_', ' '); - - std::string Fixup = fixupWithStyle(Name, Style); - if (StringRef(Fixup).equals(Name)) { - if (!IgnoreFailedSplit) { - LLVM_DEBUG(llvm::dbgs() - << MacroNameTok.getLocation().printToString(SM) - << llvm::format(": unable to split words for %s '%s'\n", - KindName.c_str(), Name.str().c_str())); - } - return None; - } - return FailureInfo{std::move(KindName), std::move(Fixup)}; + return getFailureInfo(MacroNameTok.getIdentifierInfo()->getName(), Loc, + getStyleForFile(SM.getFilename(Loc)), + SK_MacroDefinition, SM, IgnoreFailedSplit); } RenamerClangTidyCheck::DiagInfo @@ -720,6 +720,21 @@ IdentifierNamingCheck::GetDiagInfo(const NamingCheckId &ID, }}; } +ArrayRef> +IdentifierNamingCheck::getStyleForFile(StringRef FileName) const { + if (!GetConfigPerFile) + return MainFileStyle; + auto &Styles = NamingStylesCache[llvm::sys::path::parent_path(FileName)]; + if (Styles.empty()) { + ClangTidyOptions Options = Context->getOptionsForFile(FileName); + if (Options.Checks && GlobList(*Options.Checks).contains(CheckName)) + Styles = getNamingStyles({CheckName, Options.CheckOptions}); + else + Styles.resize(SK_Count, None); + } + return Styles; +} + } // namespace readability } // namespace tidy } // namespace clang diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h index 0f6c77b2c9a86..ad1c582d100bc 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_IDENTIFIERNAMINGCHECK_H #include "../utils/RenamerClangTidyCheck.h" +#include "llvm/ADT/Optional.h" namespace clang { class MacroInfo; @@ -69,7 +70,17 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck { DiagInfo GetDiagInfo(const NamingCheckId &ID, const NamingCheckFailure &Failure) const override; - std::vector> NamingStyles; + ArrayRef> + getStyleForFile(StringRef FileName) const; + + /// Stores the style options as a vector, indexed by the specified \ref + /// StyleKind, for a given directory. + mutable llvm::StringMap>> + NamingStylesCache; + ArrayRef> MainFileStyle; + ClangTidyContext *const Context; + const std::string CheckName; + const bool GetConfigPerFile; const bool IgnoreFailedSplit; const bool IgnoreMainLikeFunctions; }; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 1d447938eae0c..89f6a50888ccc 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -67,7 +67,14 @@ The improvements are... Improvements to clang-tidy -------------------------- -The improvements are... +Changes in existing checks +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Improved :doc:`readability-identifier-naming + ` check. + + Added an option `GetConfigPerFile` to support including files which use + different naming styles. Improvements to include-fixer ----------------------------- diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-identifier-naming.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-identifier-naming.rst index eefa5234fb211..9eec3c03f7d7d 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability-identifier-naming.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-identifier-naming.rst @@ -51,6 +51,7 @@ The following options are describe below: - :option:`EnumCase`, :option:`EnumPrefix`, :option:`EnumSuffix` - :option:`EnumConstantCase`, :option:`EnumConstantPrefix`, :option:`EnumConstantSuffix` - :option:`FunctionCase`, :option:`FunctionPrefix`, :option:`FunctionSuffix` + - :option:`GetConfigPerFile` - :option:`GlobalConstantCase`, :option:`GlobalConstantPrefix`, :option:`GlobalConstantSuffix` - :option:`GlobalConstantPointerCase`, :option:`GlobalConstantPointerPrefix`, :option:`GlobalConstantPointerSuffix` - :option:`GlobalFunctionCase`, :option:`GlobalFunctionPrefix`, :option:`GlobalFunctionSuffix` @@ -713,6 +714,13 @@ After: char pre_my_function_string_post(); +.. option:: GetConfigPerFile + + When `true` the check will look for the configuration for where an + identifier is declared. Useful for when included header files use a + different style. + Default value is `true`. + .. option:: GlobalConstantCase When defined, the check will ensure global constant names conform to the diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/.clang-tidy b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/.clang-tidy new file mode 100644 index 0000000000000..6a704df8b7b19 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/.clang-tidy @@ -0,0 +1,5 @@ +Checks: -readability-identifier-naming +CheckOptions: + - key: readability-identifier-naming.GlobalFunctionCase + value: lower_case + diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/header.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/header.h new file mode 100644 index 0000000000000..e863f70f7fcb2 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style-disabled/header.h @@ -0,0 +1,3 @@ +void disabled_style_1(); +void disabledStyle2(); +void DISABLED_STYLE_3(); diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/.clang-tidy b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/.clang-tidy new file mode 100644 index 0000000000000..85af9672b61d3 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/.clang-tidy @@ -0,0 +1,5 @@ +Checks: readability-identifier-naming +CheckOptions: + - key: readability-identifier-naming.GlobalFunctionCase + value: lower_case + diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/header.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/header.h new file mode 100644 index 0000000000000..b170bed7c3f60 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style1/header.h @@ -0,0 +1,5 @@ + + +void style_first_good(); + +void styleFirstBad(); diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/.clang-tidy b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/.clang-tidy new file mode 100644 index 0000000000000..b2e67ea9c87b5 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/.clang-tidy @@ -0,0 +1,5 @@ +Checks: readability-identifier-naming +CheckOptions: + - key: readability-identifier-naming.GlobalFunctionCase + value: UPPER_CASE + diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/header.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/header.h new file mode 100644 index 0000000000000..6b78ad82a1fdd --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/global-style2/header.h @@ -0,0 +1,5 @@ + + +void STYLE_SECOND_GOOD(); + +void styleSecondBad(); diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming-multiple-styles.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming-multiple-styles.cpp new file mode 100644 index 0000000000000..54880d2ca3d0a --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming-multiple-styles.cpp @@ -0,0 +1,64 @@ +// Setup header directory + +// RUN: rm -rf %theaders +// RUN: mkdir %theaders +// RUN: cp -R %S/Inputs/readability-identifier-naming/. %theaders + +// C++11 isn't explicitly required, but failing to specify a standard means the +// check will run multiple times for different standards. This will cause the +// second test to fail as the header file will be changed during the first run. +// InheritParentConfig is needed to look for the clang-tidy configuration files. + +// RUN: %check_clang_tidy -check-suffixes=ENABLED,SHARED -std=c++11 %s \ +// RUN: readability-identifier-naming %t -- \ +// RUN: -config='{ InheritParentConfig: true, CheckOptions: [ \ +// RUN: {key: readability-identifier-naming.FunctionCase, value: camelBack}, \ +// RUN: {key: readability-identifier-naming.GetConfigPerFile, value: true} \ +// RUN: ]}' -header-filter='.*' -- -I%theaders + +// On DISABLED run, everything should be made 'camelBack'. + +// RUN: cp -R %S/Inputs/readability-identifier-naming/. %theaders +// RUN: %check_clang_tidy -check-suffixes=DISABLED,SHARED -std=c++11 %s \ +// RUN: readability-identifier-naming %t -- \ +// RUN: -config='{ InheritParentConfig: true, CheckOptions: [ \ +// RUN: {key: readability-identifier-naming.FunctionCase, value: camelBack}, \ +// RUN: {key: readability-identifier-naming.GetConfigPerFile, value: false} \ +// RUN: ]}' -header-filter='.*' -- -I%theaders + +#include "global-style-disabled/header.h" +#include "global-style1/header.h" +#include "global-style2/header.h" +// CHECK-MESSAGES-ENABLED-DAG: global-style1/header.h:5:6: warning: invalid case style for global function 'styleFirstBad' +// CHECK-MESSAGES-ENABLED-DAG: global-style2/header.h:5:6: warning: invalid case style for global function 'styleSecondBad' +// CHECK-MESSAGES-DISABLED-DAG: global-style1/header.h:3:6: warning: invalid case style for function 'style_first_good' +// CHECK-MESSAGES-DISABLED-DAG: global-style2/header.h:3:6: warning: invalid case style for function 'STYLE_SECOND_GOOD' +// CHECK-MESSAGES-DISABLED-DAG: global-style-disabled/header.h:1:6: warning: invalid case style for function 'disabled_style_1' +// CHECK-MESSAGES-DISABLED-DAG: global-style-disabled/header.h:3:6: warning: invalid case style for function 'DISABLED_STYLE_3' + +void goodStyle() { + style_first_good(); + STYLE_SECOND_GOOD(); + // CHECK-FIXES-DISABLED: styleFirstGood(); + // CHECK-FIXES-DISABLED-NEXT: styleSecondGood(); +} +// CHECK-MESSAGES-SHARED-DAG: :[[@LINE+1]]:6: warning: invalid case style for function 'bad_style' +void bad_style() { + styleFirstBad(); + styleSecondBad(); +} +// CHECK-FIXES-SHARED: void badStyle() { +// CHECK-FIXES-DISABLED-NEXT: styleFirstBad(); +// CHECK-FIXES-ENABLED-NEXT: style_first_bad(); +// CHECK-FIXES-DISABLED-NEXT: styleSecondBad(); +// CHECK-FIXES-ENABLED-NEXT: STYLE_SECOND_BAD(); +// CHECK-FIXES-SHARED-NEXT: } + +void expectNoStyle() { + disabled_style_1(); + disabledStyle2(); + DISABLED_STYLE_3(); + // CHECK-FIXES-DISABLED: disabledStyle1(); + // CHECK-FIXES-DISABLED-NEXT: disabledStyle2(); + // CHECK-FIXES-DISABLED-NEXT: disabledStyle3(); +} From e73f5d86f179644b8d66d4141d8d359cd6f0435b Mon Sep 17 00:00:00 2001 From: Evgeny Leviant Date: Sat, 1 Aug 2020 12:58:52 +0300 Subject: [PATCH 084/600] [MachineVerifier] Refactor calcRegsPassed. NFC Patch improves performance of verify-machineinstrs pass up to 10x. Differential revision: https://reviews.llvm.org/D84105 --- llvm/lib/CodeGen/MachineVerifier.cpp | 69 +++++++--------------------- 1 file changed, 17 insertions(+), 52 deletions(-) diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index e45e6963edb11..238df43da9f5c 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -2230,63 +2230,28 @@ class FilteringVRegSet { // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. void MachineVerifier::calcRegsPassed() { - // This is a forward dataflow, doing it in RPO. A standard map serves as a - // priority (sorting by RPO number) queue, deduplicating worklist, and an RPO - // number to MBB mapping all at once. - std::map RPOWorklist; - DenseMap RPONumbers; - if (MF->empty()) { + if (MF->empty()) // ReversePostOrderTraversal doesn't handle empty functions. return; - } - std::vector VRegsPassedSets(MF->size()); - for (const MachineBasicBlock *MBB : - ReversePostOrderTraversal(MF)) { - // Careful with the evaluation order, fetch next number before allocating. - unsigned Number = RPONumbers.size(); - RPONumbers[MBB] = Number; - // Set-up the transfer functions for all blocks. - const BBInfo &MInfo = MBBInfoMap[MBB]; - VRegsPassedSets[Number].addToFilter(MInfo.regsKilled); - VRegsPassedSets[Number].addToFilter(MInfo.regsLiveOut); - } - // First push live-out regs to successors' vregsPassed. Remember the MBBs that - // have any vregsPassed. - for (const MachineBasicBlock &MBB : *MF) { - const BBInfo &MInfo = MBBInfoMap[&MBB]; - if (!MInfo.reachable) - continue; - for (const MachineBasicBlock *Succ : MBB.successors()) { - unsigned SuccNumber = RPONumbers[Succ]; - FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber]; - if (SuccSet.add(MInfo.regsLiveOut)) - RPOWorklist.emplace(SuccNumber, Succ); - } - } - // Iteratively push vregsPassed to successors. - while (!RPOWorklist.empty()) { - auto Next = RPOWorklist.begin(); - const MachineBasicBlock *MBB = Next->second; - RPOWorklist.erase(Next); - FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[MBB]]; - for (const MachineBasicBlock *Succ : MBB->successors()) { - if (Succ == MBB) + for (const MachineBasicBlock *MB : + ReversePostOrderTraversal(MF)) { + FilteringVRegSet VRegs; + BBInfo &Info = MBBInfoMap[MB]; + assert(Info.reachable); + + VRegs.addToFilter(Info.regsKilled); + VRegs.addToFilter(Info.regsLiveOut); + for (const MachineBasicBlock *Pred : MB->predecessors()) { + const BBInfo &PredInfo = MBBInfoMap[Pred]; + if (!PredInfo.reachable) continue; - unsigned SuccNumber = RPONumbers[Succ]; - FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber]; - if (SuccSet.add(MSet)) - RPOWorklist.emplace(SuccNumber, Succ); + + VRegs.add(PredInfo.regsLiveOut); + VRegs.add(PredInfo.vregsPassed); } - } - // Copy the results back to BBInfos. - for (const MachineBasicBlock &MBB : *MF) { - BBInfo &MInfo = MBBInfoMap[&MBB]; - if (!MInfo.reachable) - continue; - const FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[&MBB]]; - MInfo.vregsPassed.reserve(MSet.size()); - MInfo.vregsPassed.insert(MSet.begin(), MSet.end()); + Info.vregsPassed.reserve(VRegs.size()); + Info.vregsPassed.insert(VRegs.begin(), VRegs.end()); } } From 9f21947a331203ee2579db87f1d1ec22a949e20a Mon Sep 17 00:00:00 2001 From: Nathan James Date: Sat, 1 Aug 2020 11:04:29 +0100 Subject: [PATCH 085/600] [clang-tidy][NFC] Small refactor --- .../readability/IdentifierNamingCheck.cpp | 55 +++++++++---------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index e004ce6fbd208..e7fe25d8e2214 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -162,17 +162,16 @@ void IdentifierNamingCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { RenamerClangTidyCheck::storeOptions(Opts); ArrayRef> NamingStyles = getStyleForFile(Context->getCurrentFile()); - for (size_t i = 0; i < SK_Count; ++i) { - if (NamingStyles[i]) { - if (NamingStyles[i]->Case) { - Options.store(Opts, (StyleNames[i] + "Case").str(), - *NamingStyles[i]->Case); - } - Options.store(Opts, (StyleNames[i] + "Prefix").str(), - NamingStyles[i]->Prefix); - Options.store(Opts, (StyleNames[i] + "Suffix").str(), - NamingStyles[i]->Suffix); - } + for (size_t I = 0; I < SK_Count; ++I) { + if (!NamingStyles[I]) + continue; + if (NamingStyles[I]->Case) + Options.store(Opts, (StyleNames[I] + "Case").str(), + *NamingStyles[I]->Case); + Options.store(Opts, (StyleNames[I] + "Prefix").str(), + NamingStyles[I]->Prefix); + Options.store(Opts, (StyleNames[I] + "Suffix").str(), + NamingStyles[I]->Suffix); } Options.store(Opts, "GetConfigPerFile", GetConfigPerFile); Options.store(Opts, "IgnoreFailedSplit", IgnoreFailedSplit); @@ -191,14 +190,9 @@ static bool matchesStyle(StringRef Name, llvm::Regex("^[a-z]([a-z0-9]*(_[A-Z])?)*"), }; - if (Name.startswith(Style.Prefix)) - Name = Name.drop_front(Style.Prefix.size()); - else + if (!Name.consume_front(Style.Prefix)) return false; - - if (Name.endswith(Style.Suffix)) - Name = Name.drop_back(Style.Suffix.size()); - else + if (!Name.consume_back(Style.Suffix)) return false; // Ensure the name doesn't have any extra underscores beyond those specified @@ -221,9 +215,10 @@ static std::string fixupWithCase(StringRef Name, Name.split(Substrs, "_", -1, false); SmallVector Words; + SmallVector Groups; for (auto Substr : Substrs) { while (!Substr.empty()) { - SmallVector Groups; + Groups.clear(); if (!Splitter.match(Substr, &Groups)) break; @@ -241,12 +236,12 @@ static std::string fixupWithCase(StringRef Name, } if (Words.empty()) - return std::string(Name); + return Name.str(); - std::string Fixup; + SmallString<128> Fixup; switch (Case) { case IdentifierNamingCheck::CT_AnyCase: - Fixup += Name; + return Name.str(); break; case IdentifierNamingCheck::CT_LowerCase: @@ -267,7 +262,7 @@ static std::string fixupWithCase(StringRef Name, case IdentifierNamingCheck::CT_CamelCase: for (auto const &Word : Words) { - Fixup += Word.substr(0, 1).upper(); + Fixup += toupper(Word.front()); Fixup += Word.substr(1).lower(); } break; @@ -277,7 +272,7 @@ static std::string fixupWithCase(StringRef Name, if (&Word == &Words.front()) { Fixup += Word.lower(); } else { - Fixup += Word.substr(0, 1).upper(); + Fixup += toupper(Word.front()); Fixup += Word.substr(1).lower(); } } @@ -287,7 +282,7 @@ static std::string fixupWithCase(StringRef Name, for (auto const &Word : Words) { if (&Word != &Words.front()) Fixup += "_"; - Fixup += Word.substr(0, 1).upper(); + Fixup += toupper(Word.front()); Fixup += Word.substr(1).lower(); } break; @@ -296,16 +291,16 @@ static std::string fixupWithCase(StringRef Name, for (auto const &Word : Words) { if (&Word != &Words.front()) { Fixup += "_"; - Fixup += Word.substr(0, 1).upper(); + Fixup += toupper(Word.front()); } else { - Fixup += Word.substr(0, 1).lower(); + Fixup += tolower(Word.front()); } Fixup += Word.substr(1).lower(); } break; } - return Fixup; + return Fixup.str().str(); } static bool isParamInMainLikeFunction(const ParmVarDecl &ParmDecl, @@ -715,8 +710,8 @@ RenamerClangTidyCheck::DiagInfo IdentifierNamingCheck::GetDiagInfo(const NamingCheckId &ID, const NamingCheckFailure &Failure) const { return DiagInfo{"invalid case style for %0 '%1'", - [&](DiagnosticBuilder &diag) { - diag << Failure.Info.KindName << ID.second; + [&](DiagnosticBuilder &Diag) { + Diag << Failure.Info.KindName << ID.second; }}; } From 1b1901536a7db8a074e794f004f9d94b941f7068 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 1 Aug 2020 12:28:10 +0100 Subject: [PATCH 086/600] [X86][AVX] Extend v2f64 BROADCAST(LOAD) -> BROADCAST_LOAD to v2i64/v4f32/v4i32 Minor precursor fix for D66004, but helps the SSE41 tests as well as they run with -disable-peephole --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 +++++---- llvm/test/CodeGen/X86/sse41.ll | 6 ++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cb1067a06239e..d628cdfc1fdf1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36144,15 +36144,16 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, } // vbroadcast(vector load X) -> vbroadcast_load - if (SrcVT == MVT::v2f64 && Src.hasOneUse() && - ISD::isNormalLoad(Src.getNode())) { + if ((SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v2i64 || + SrcVT == MVT::v4i32) && + Src.hasOneUse() && ISD::isNormalLoad(Src.getNode())) { LoadSDNode *LN = cast(Src); // Unless the load is volatile or atomic. if (LN->isSimple()) { SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; + SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; SDValue BcastLd = DAG.getMemIntrinsicNode( - X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64, + X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SrcVT.getScalarType(), LN->getPointerInfo(), LN->getOriginalAlign(), LN->getMemOperand()->getFlags()); DCI.CombineTo(N.getNode(), BcastLd); diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index 99cd686f50bd5..ef503760890f7 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -654,8 +654,7 @@ define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocaptu ; X86-AVX512-LABEL: pinsrd_from_shufflevector_i32: ; X86-AVX512: ## %bb.0: ## %entry ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovaps (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x08] -; X86-AVX512-NEXT: vbroadcastss %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc9] +; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x08] ; X86-AVX512-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08] ; X86-AVX512-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3] ; X86-AVX512-NEXT: retl ## encoding: [0xc3] @@ -678,8 +677,7 @@ define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocaptu ; ; X64-AVX512-LABEL: pinsrd_from_shufflevector_i32: ; X64-AVX512: ## %bb.0: ## %entry -; X64-AVX512-NEXT: vmovaps (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0f] -; X64-AVX512-NEXT: vbroadcastss %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc9] +; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x0f] ; X64-AVX512-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08] ; X64-AVX512-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3] ; X64-AVX512-NEXT: retq ## encoding: [0xc3] From 1aa52d67d1c1a8eb174d4a85391a17c5f0731116 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 1 Aug 2020 07:44:33 -0400 Subject: [PATCH 087/600] [InstSimplify] add abs test with assume; NFC --- llvm/test/Transforms/InstSimplify/call.ll | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index e8c39dbda8f98..e4fcf72327cc4 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -51,6 +51,21 @@ define <3 x i82> @select_abs(<3 x i1> %cond) { ret <3 x i82> %abs } +declare void @llvm.assume(i1) + +define i32 @assume_abs(i32 %x) { +; CHECK-LABEL: @assume_abs( +; CHECK-NEXT: [[ASSUME:%.*]] = icmp sge i32 [[X:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[ASSUME]]) +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 true) +; CHECK-NEXT: ret i32 [[ABS]] +; + %assume = icmp sge i32 %x, 0 + call void @llvm.assume(i1 %assume) + %abs = call i32 @llvm.abs.i32(i32 %x, i1 true) + ret i32 %abs +} + declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) declare {i8, i1} @llvm.sadd.with.overflow.i8(i8 %a, i8 %b) declare {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b) @@ -1092,7 +1107,7 @@ define i32 @call_undef_musttail() { define float @nobuiltin_fmax() { ; CHECK-LABEL: @nobuiltin_fmax( -; CHECK-NEXT: [[M:%.*]] = call float @fmaxf(float 0.000000e+00, float 1.000000e+00) #3 +; CHECK-NEXT: [[M:%.*]] = call float @fmaxf(float 0.000000e+00, float 1.000000e+00) #4 ; CHECK-NEXT: [[R:%.*]] = call float @llvm.fabs.f32(float [[M]]) ; CHECK-NEXT: ret float [[R]] ; From 04b99a4d18cf13c13f1d76c5698696bcaef4e4b6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 1 Aug 2020 07:46:23 -0400 Subject: [PATCH 088/600] [InstSimplify] simplify abs if operand is known non-negative abs() should be rare enough that using value tracking is not going to be a compile-time cost burden, so use it to reduce a variety of potential patterns. We do this in DAGCombiner too. Differential Revision: https://reviews.llvm.org/D85043 --- llvm/lib/Analysis/InstructionSimplify.cpp | 6 ++++++ llvm/test/Transforms/InstSimplify/call.ll | 17 ++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 38f3bbf4c6f53..d3928a502965b 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5255,6 +5255,12 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, Type *ReturnType = F->getReturnType(); unsigned BitWidth = ReturnType->getScalarSizeInBits(); switch (IID) { + case Intrinsic::abs: + // If the sign bit is clear already, then abs does not do anything. + if (isKnownNonNegative(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + return Op0; + break; + case Intrinsic::smax: case Intrinsic::smin: case Intrinsic::umax: diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index e4fcf72327cc4..2325dccd17a85 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -5,13 +5,12 @@ declare i32 @llvm.abs.i32(i32, i1) declare <3 x i82> @llvm.abs.v3i82(<3 x i82>, i1) -; TODO: If the sign bit is known zero, the abs is not needed. +; If the sign bit is known zero, the abs is not needed. define i32 @zext_abs(i31 %x) { ; CHECK-LABEL: @zext_abs( ; CHECK-NEXT: [[ZEXT:%.*]] = zext i31 [[X:%.*]] to i32 -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[ZEXT]], i1 false) -; CHECK-NEXT: ret i32 [[ABS]] +; CHECK-NEXT: ret i32 [[ZEXT]] ; %zext = zext i31 %x to i32 %abs = call i32 @llvm.abs.i32(i32 %zext, i1 false) @@ -21,8 +20,7 @@ define i32 @zext_abs(i31 %x) { define <3 x i82> @lshr_abs(<3 x i82> %x) { ; CHECK-LABEL: @lshr_abs( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], -; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[LSHR]], i1 true) -; CHECK-NEXT: ret <3 x i82> [[ABS]] +; CHECK-NEXT: ret <3 x i82> [[LSHR]] ; %lshr = lshr <3 x i82> %x, %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %lshr, i1 true) @@ -32,8 +30,7 @@ define <3 x i82> @lshr_abs(<3 x i82> %x) { define i32 @and_abs(i32 %x) { ; CHECK-LABEL: @and_abs( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 2147483644 -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[AND]], i1 true) -; CHECK-NEXT: ret i32 [[ABS]] +; CHECK-NEXT: ret i32 [[AND]] ; %and = and i32 %x, 2147483644 %abs = call i32 @llvm.abs.i32(i32 %and, i1 true) @@ -43,8 +40,7 @@ define i32 @and_abs(i32 %x) { define <3 x i82> @select_abs(<3 x i1> %cond) { ; CHECK-LABEL: @select_abs( ; CHECK-NEXT: [[SEL:%.*]] = select <3 x i1> [[COND:%.*]], <3 x i82> zeroinitializer, <3 x i82> -; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[SEL]], i1 false) -; CHECK-NEXT: ret <3 x i82> [[ABS]] +; CHECK-NEXT: ret <3 x i82> [[SEL]] ; %sel = select <3 x i1> %cond, <3 x i82> zeroinitializer, <3 x i82> %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %sel, i1 false) @@ -57,8 +53,7 @@ define i32 @assume_abs(i32 %x) { ; CHECK-LABEL: @assume_abs( ; CHECK-NEXT: [[ASSUME:%.*]] = icmp sge i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[ASSUME]]) -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 true) -; CHECK-NEXT: ret i32 [[ABS]] +; CHECK-NEXT: ret i32 [[X]] ; %assume = icmp sge i32 %x, 0 call void @llvm.assume(i1 %assume) From eb41f9edde1070d68fce4a4eb31118e0ec1ca36d Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 1 Aug 2020 14:48:42 +0200 Subject: [PATCH 089/600] [mlir][Vector] Simplify code a bit. NFCI. --- mlir/lib/Dialect/Vector/VectorOps.cpp | 26 ++++++++------------ mlir/lib/Dialect/Vector/VectorTransforms.cpp | 18 +++++++------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index d16c7c3d6fdbe..c788d4ccb4a08 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -184,9 +184,9 @@ static ParseResult parseContractionOp(OpAsmParser &parser, auto lhsType = types[0].cast(); auto rhsType = types[1].cast(); auto maskElementType = parser.getBuilder().getI1Type(); - SmallVector maskTypes; - maskTypes.push_back(VectorType::get(lhsType.getShape(), maskElementType)); - maskTypes.push_back(VectorType::get(rhsType.getShape(), maskElementType)); + std::array maskTypes = { + VectorType::get(lhsType.getShape(), maskElementType), + VectorType::get(rhsType.getShape(), maskElementType)}; if (parser.resolveOperands(masksInfo, maskTypes, loc, result.operands)) return failure(); return success(); @@ -462,12 +462,10 @@ std::vector> ContractionOp::getBatchDimMap() { } SmallVector ContractionOp::getIndexingMaps() { - SmallVector res; - auto mapAttrs = indexing_maps().getValue(); - res.reserve(mapAttrs.size()); - for (auto mapAttr : mapAttrs) - res.push_back(mapAttr.cast().getValue()); - return res; + return llvm::to_vector<4>( + llvm::map_range(indexing_maps().getValue(), [](Attribute mapAttr) { + return mapAttr.cast().getValue(); + })); } Optional> ContractionOp::getShapeForUnroll() { @@ -1854,8 +1852,7 @@ LogicalResult TransferWriteOp::fold(ArrayRef, } Optional> TransferWriteOp::getShapeForUnroll() { - auto s = getVectorType().getShape(); - return SmallVector{s.begin(), s.end()}; + return llvm::to_vector<4>(getVectorType().getShape()); } //===----------------------------------------------------------------------===// @@ -2014,11 +2011,8 @@ static SmallVector extractShape(MemRefType memRefType) { auto vectorType = memRefType.getElementType().dyn_cast(); SmallVector res(memRefType.getShape().begin(), memRefType.getShape().end()); - if (vectorType) { - res.reserve(memRefType.getRank() + vectorType.getRank()); - for (auto s : vectorType.getShape()) - res.push_back(s); - } + if (vectorType) + res.append(vectorType.getShape().begin(), vectorType.getShape().end()); return res; } diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index ab93ef406024e..197b1c62274b2 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -1707,7 +1707,7 @@ void ContractionOpToOuterProductOpLowering::rewrite( auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; AffineExpr m, n, k; bindDims(rewriter.getContext(), m, n, k); - SmallVector perm{1, 0}; + static constexpr std::array perm = {1, 0}; auto iteratorTypes = op.iterator_types().getValue(); SmallVector maps = op.getIndexingMaps(); if (isParallelIterator(iteratorTypes[0]) && @@ -1911,10 +1911,10 @@ Value ContractionOpLowering::lowerParallel(vector::ContractionOp op, assert(lookup.hasValue() && "parallel index not listed in reduction"); int64_t resIndex = lookup.getValue(); // Construct new iterator types and affine map array attribute. - SmallVector lowIndexingMaps; - lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter)); - lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter)); - lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter)); + std::array lowIndexingMaps = { + adjustMap(iMap[0], iterIndex, rewriter), + adjustMap(iMap[1], iterIndex, rewriter), + adjustMap(iMap[2], iterIndex, rewriter)}; auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); auto lowIter = rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex)); @@ -1962,10 +1962,10 @@ Value ContractionOpLowering::lowerReduction(vector::ContractionOp op, op.acc()); } // Construct new iterator types and affine map array attribute. - SmallVector lowIndexingMaps; - lowIndexingMaps.push_back(adjustMap(iMap[0], iterIndex, rewriter)); - lowIndexingMaps.push_back(adjustMap(iMap[1], iterIndex, rewriter)); - lowIndexingMaps.push_back(adjustMap(iMap[2], iterIndex, rewriter)); + std::array lowIndexingMaps = { + adjustMap(iMap[0], iterIndex, rewriter), + adjustMap(iMap[1], iterIndex, rewriter), + adjustMap(iMap[2], iterIndex, rewriter)}; auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); auto lowIter = rewriter.getArrayAttr(adjustIter(op.iterator_types(), iterIndex)); From fd69df62ed1091f25ba4749cc5152e9ce2fe3af4 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 1 Aug 2020 14:01:18 +0100 Subject: [PATCH 090/600] [ARM] Distribute post-inc for Thumb2 sign/zero extending loads/stores This adds sign/zero extending scalar loads/stores to the MVE instructions added in D77813, allowing us to create up more post-inc instructions. These are comparatively simple, compared to LDR/STR (which may be better turned into an LDRD/LDM), but still require some additions over MVE instructions. Because there are i12 and i8 variants of the offset loads/stores dealing with different signs, we may need to convert an i12 address to a i8 negative instruction. t2LDRBi12 can also be shrunk to a tLDRi under the right conditions, so we need to be careful with codesize too. Differential Revision: https://reviews.llvm.org/D78625 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 4 + llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 169 ++++++++++++++++-- .../LowOverheadLoops/mve-float-loops.ll | 15 +- .../LowOverheadLoops/mve-tail-data-types.ll | 20 +-- .../CodeGen/Thumb2/mve-float16regloops.ll | 17 +- .../CodeGen/Thumb2/mve-postinc-distribute.ll | 12 +- .../CodeGen/Thumb2/postinc-distribute.mir | 94 ++++++++-- 7 files changed, 261 insertions(+), 70 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 1a75b011ca59d..f71445cf59c35 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -829,6 +829,10 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm, return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0; case ARMII::AddrModeT2_i7s4: return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0; + case ARMII::AddrModeT2_i8: + return std::abs(Imm) < (((1 << 8) * 1) - 1); + case ARMII::AddrModeT2_i12: + return Imm >= 0 && Imm < (((1 << 12) * 1) - 1); default: llvm_unreachable("Unhandled Addressing mode"); } diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a84d23d3bb96a..09bb3b3c6f728 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1382,9 +1382,27 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_POST; + case ARM::t2LDRBi8: + case ARM::t2LDRBi12: + return ARM::t2LDRB_POST; + case ARM::t2LDRSBi8: + case ARM::t2LDRSBi12: + return ARM::t2LDRSB_POST; + case ARM::t2LDRHi8: + case ARM::t2LDRHi12: + return ARM::t2LDRH_POST; + case ARM::t2LDRSHi8: + case ARM::t2LDRSHi12: + return ARM::t2LDRSH_POST; case ARM::t2STRi8: case ARM::t2STRi12: return ARM::t2STR_POST; + case ARM::t2STRBi8: + case ARM::t2STRBi12: + return ARM::t2STRB_POST; + case ARM::t2STRHi8: + case ARM::t2STRHi12: + return ARM::t2STRH_POST; case ARM::MVE_VLDRBS16: return ARM::MVE_VLDRBS16_post; @@ -2539,11 +2557,94 @@ static int getBaseOperandIndex(MachineInstr &MI) { case ARM::MVE_VSTRBU8: case ARM::MVE_VSTRHU16: case ARM::MVE_VSTRWU32: + case ARM::t2LDRHi8: + case ARM::t2LDRHi12: + case ARM::t2LDRSHi8: + case ARM::t2LDRSHi12: + case ARM::t2LDRBi8: + case ARM::t2LDRBi12: + case ARM::t2LDRSBi8: + case ARM::t2LDRSBi12: + case ARM::t2STRBi8: + case ARM::t2STRBi12: + case ARM::t2STRHi8: + case ARM::t2STRHi12: return 1; } return -1; } +// Given a memory access Opcode, check that the give Imm would be a valid Offset +// for this instruction (same as isLegalAddressImm), Or if the instruction +// could be easily converted to one where that was valid. For example converting +// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with +// AdjustBaseAndOffset below. +static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm, + const TargetInstrInfo *TII, + int &CodesizeEstimate) { + if (isLegalAddressImm(Opcode, Imm, TII)) + return true; + + // We can convert AddrModeT2_i12 to AddrModeT2_i8. + const MCInstrDesc &Desc = TII->get(Opcode); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + switch (AddrMode) { + case ARMII::AddrModeT2_i12: + CodesizeEstimate += 1; + return std::abs(Imm) < (((1 << 8) * 1) - 1); + } + return false; +} + +// Given an MI adjust its address BaseReg to use NewBaseReg and address offset +// by -Offset. This can either happen in-place or be a replacement as MI is +// converted to another instruction type. +static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg, + int Offset, const TargetInstrInfo *TII) { + unsigned BaseOp = getBaseOperandIndex(*MI); + MI->getOperand(BaseOp).setReg(NewBaseReg); + int OldOffset = MI->getOperand(BaseOp + 1).getImm(); + if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII)) + MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset); + else { + unsigned ConvOpcode; + switch (MI->getOpcode()) { + case ARM::t2LDRHi12: + ConvOpcode = ARM::t2LDRHi8; + break; + case ARM::t2LDRSHi12: + ConvOpcode = ARM::t2LDRSHi8; + break; + case ARM::t2LDRBi12: + ConvOpcode = ARM::t2LDRBi8; + break; + case ARM::t2LDRSBi12: + ConvOpcode = ARM::t2LDRSBi8; + break; + case ARM::t2STRHi12: + ConvOpcode = ARM::t2STRHi8; + break; + case ARM::t2STRBi12: + ConvOpcode = ARM::t2STRBi8; + break; + default: + llvm_unreachable("Unhandled convertable opcode"); + } + assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) && + "Illegal Address Immediate after convert!"); + + const MCInstrDesc &MCID = TII->get(ConvOpcode); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(OldOffset - Offset) + .add(MI->getOperand(3)) + .add(MI->getOperand(4)) + .cloneMemRefs(*MI); + MI->eraseFromParent(); + } +} + static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset, Register NewReg, const TargetInstrInfo *TII, @@ -2562,14 +2663,43 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset, TRC = TII->getRegClass(MCID, 2, TRI, *MF); MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC); - return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) - .addReg(NewReg, RegState::Define) - .add(MI->getOperand(0)) - .add(MI->getOperand(1)) - .addImm(Offset) - .add(MI->getOperand(3)) - .add(MI->getOperand(4)) - .cloneMemRefs(*MI); + unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask); + switch (AddrMode) { + case ARMII::AddrModeT2_i7: + case ARMII::AddrModeT2_i7s2: + case ARMII::AddrModeT2_i7s4: + // Any MVE load/store + return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) + .addReg(NewReg, RegState::Define) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(Offset) + .add(MI->getOperand(3)) + .add(MI->getOperand(4)) + .cloneMemRefs(*MI); + case ARMII::AddrModeT2_i8: + if (MI->mayLoad()) { + return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) + .add(MI->getOperand(0)) + .addReg(NewReg, RegState::Define) + .add(MI->getOperand(1)) + .addImm(Offset) + .add(MI->getOperand(3)) + .add(MI->getOperand(4)) + .cloneMemRefs(*MI); + } else { + return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) + .addReg(NewReg, RegState::Define) + .add(MI->getOperand(0)) + .add(MI->getOperand(1)) + .addImm(Offset) + .add(MI->getOperand(3)) + .add(MI->getOperand(4)) + .cloneMemRefs(*MI); + } + default: + llvm_unreachable("Unhandled createPostIncLoadStore"); + } } // Given a Base Register, optimise the load/store uses to attempt to create more @@ -2589,7 +2719,7 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) { // An increment that can be folded in MachineInstr *Increment = nullptr; // Other accesses after BaseAccess that will need to be updated to use the - // postinc value + // postinc value. SmallPtrSet OtherAccesses; for (auto &Use : MRI->use_nodbg_instructions(Base)) { if (!Increment && getAddSubImmediate(Use) != 0) { @@ -2643,14 +2773,20 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) { // other offsets after the BaseAccess. We rely on either // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess) // to keep things simple. + // This also adds a simple codesize metric, to detect if an instruction (like + // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi) + // cannot because it is converted to something else (t2LDRBi8). We start this + // at -1 for the gain from removing the increment. SmallPtrSet SuccessorAccesses; + int CodesizeEstimate = -1; for (auto *Use : OtherAccesses) { if (DT->dominates(BaseAccess, Use)) { SuccessorAccesses.insert(Use); unsigned BaseOp = getBaseOperandIndex(*Use); - if (!isLegalAddressImm( - Use->getOpcode(), - Use->getOperand(BaseOp + 1).getImm() - IncrementOffset, TII)) { + if (!isLegalOrConvertableAddressImm(Use->getOpcode(), + Use->getOperand(BaseOp + 1).getImm() - + IncrementOffset, + TII, CodesizeEstimate)) { LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n"); return false; } @@ -2660,6 +2796,10 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) { return false; } } + if (STI->hasMinSize() && CodesizeEstimate > 0) { + LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n"); + return false; + } // Replace BaseAccess with a post inc LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump()); @@ -2674,10 +2814,7 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) { for (auto *Use : SuccessorAccesses) { LLVM_DEBUG(dbgs() << "Changing: "; Use->dump()); - unsigned BaseOp = getBaseOperandIndex(*Use); - Use->getOperand(BaseOp).setReg(NewBaseReg); - int OldOffset = Use->getOperand(BaseOp + 1).getImm(); - Use->getOperand(BaseOp + 1).setImm(OldOffset - IncrementOffset); + AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII); LLVM_DEBUG(dbgs() << " To : "; Use->dump()); } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index e69610f9df2a9..261222f60f17a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -1798,20 +1798,20 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh.w r4, [r3, #2] ; CHECK-NEXT: vldr.16 s2, [r2, #2] -; CHECK-NEXT: ldrsh r5, [r3, #-2] ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov s4, r4 -; CHECK-NEXT: ldrsh.w r4, [r3] +; CHECK-NEXT: ldrsh r4, [r3], #8 ; CHECK-NEXT: vcvt.f16.s32 s4, s4 -; CHECK-NEXT: vmov s8, r5 +; CHECK-NEXT: ldrsh r5, [r3, #-10] ; CHECK-NEXT: vmul.f16 s2, s2, s4 -; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vmov s6, r4 -; CHECK-NEXT: ldrsh r4, [r3, #-4] +; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vcvt.f16.s32 s6, s6 -; CHECK-NEXT: vcvt.f16.s32 s8, s8 +; CHECK-NEXT: ldrsh r4, [r3, #-12] ; CHECK-NEXT: vmul.f16 s4, s4, s6 +; CHECK-NEXT: vmov s8, r5 ; CHECK-NEXT: vldr.16 s6, [r2, #-2] +; CHECK-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-NEXT: vmov s10, r4 ; CHECK-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-NEXT: vmul.f16 s6, s6, s8 @@ -1821,9 +1821,8 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n ; CHECK-NEXT: vmul.f16 s8, s8, s10 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8 -; CHECK-NEXT: adds r3, #8 -; CHECK-NEXT: vadd.f32 s0, s0, s8 ; CHECK-NEXT: adds r2, #8 +; CHECK-NEXT: vadd.f32 s0, s0, s8 ; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s4 ; CHECK-NEXT: vadd.f32 s0, s0, s2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 428c703dd341e..69039f9a4eaa2 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -437,17 +437,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #-8] ; CHECK-NEXT: ldrb r8, [r5, #-2] -; CHECK-NEXT: ldrb r7, [r6] +; CHECK-NEXT: ldrb r7, [r6], #4 ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #-4] ; CHECK-NEXT: ldrb r8, [r5, #-1] -; CHECK-NEXT: ldrb r7, [r6, #1] +; CHECK-NEXT: ldrb r7, [r6, #-3] ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4] -; CHECK-NEXT: ldrb.w r8, [r5] -; CHECK-NEXT: adds r5, #4 -; CHECK-NEXT: ldrb r7, [r6, #2] -; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: ldrb r8, [r5], #4 +; CHECK-NEXT: ldrb r7, [r6, #-2] ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #4] ; CHECK-NEXT: adds r4, #16 @@ -740,17 +738,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #-8] ; CHECK-NEXT: ldrb r8, [r5, #-2] -; CHECK-NEXT: ldrb r7, [r6] +; CHECK-NEXT: ldrb r7, [r6], #4 ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #-4] ; CHECK-NEXT: ldrb r8, [r5, #-1] -; CHECK-NEXT: ldrb r7, [r6, #1] +; CHECK-NEXT: ldrb r7, [r6, #-3] ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4] -; CHECK-NEXT: ldrb.w r8, [r5] -; CHECK-NEXT: adds r5, #4 -; CHECK-NEXT: ldrb r7, [r6, #2] -; CHECK-NEXT: adds r6, #4 +; CHECK-NEXT: ldrb r8, [r5], #4 +; CHECK-NEXT: ldrb r7, [r6, #-2] ; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: str r7, [r4, #4] ; CHECK-NEXT: adds r4, #16 diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index e8ab7792b6dfc..1a53561388138 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1147,31 +1147,30 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca ; CHECK-NEXT: .LBB16_6: @ %for.body ; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldrh r0, [r6] +; CHECK-NEXT: ldrh r0, [r6], #16 ; CHECK-NEXT: vldrw.u32 q1, [r5] ; CHECK-NEXT: adds r1, r5, #2 ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: ldrh r0, [r6, #2] +; CHECK-NEXT: ldrh r0, [r6, #-14] ; CHECK-NEXT: adds r1, r5, #6 ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: ldrh r0, [r6, #4] +; CHECK-NEXT: ldrh r0, [r6, #-12] ; CHECK-NEXT: vldrw.u32 q1, [r5, #4] ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: ldrh r0, [r6, #6] +; CHECK-NEXT: ldrh r0, [r6, #-10] ; CHECK-NEXT: add.w r1, r5, #10 ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: ldrh r0, [r6, #8] +; CHECK-NEXT: ldrh r0, [r6, #-8] ; CHECK-NEXT: vldrw.u32 q1, [r5, #8] ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: ldrh r0, [r6, #10] -; CHECK-NEXT: ldrh r1, [r6, #14] +; CHECK-NEXT: ldrh r0, [r6, #-6] +; CHECK-NEXT: ldrh r1, [r6, #-2] ; CHECK-NEXT: vfma.f16 q0, q1, r0 -; CHECK-NEXT: ldrh r0, [r6, #12] +; CHECK-NEXT: ldrh r0, [r6, #-4] ; CHECK-NEXT: vldrw.u32 q1, [r5, #12] -; CHECK-NEXT: adds r6, #16 ; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: add.w r0, r5, #14 ; CHECK-NEXT: vldrw.u32 q1, [r0] diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll index fe9e7d197ca26..2db5bf59ecfae 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll @@ -106,14 +106,12 @@ define void @arm_cmplx_dot_prod_q15(i16* nocapture readonly %pSrcA, i16* nocaptu ; CHECK-NEXT: wls lr, lr, .LBB1_7 ; CHECK-NEXT: .LBB1_5: @ %while.body11 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh.w r5, [r0, #2] -; CHECK-NEXT: ldrsh.w r6, [r1] -; CHECK-NEXT: ldrsh.w r9, [r0] -; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: ldrsh.w r2, [r1, #2] -; CHECK-NEXT: adds r1, #4 -; CHECK-NEXT: smlalbb r4, r11, r6, r5 +; CHECK-NEXT: ldrsh r9, [r0], #4 +; CHECK-NEXT: ldrsh r6, [r1], #4 +; CHECK-NEXT: ldrsh r5, [r0, #-2] +; CHECK-NEXT: ldrsh r2, [r1, #-2] ; CHECK-NEXT: smlalbb r12, r7, r6, r9 +; CHECK-NEXT: smlalbb r4, r11, r6, r5 ; CHECK-NEXT: muls r5, r2, r5 ; CHECK-NEXT: smlalbb r4, r11, r2, r9 ; CHECK-NEXT: subs.w r12, r12, r5 diff --git a/llvm/test/CodeGen/Thumb2/postinc-distribute.mir b/llvm/test/CodeGen/Thumb2/postinc-distribute.mir index af39cced110a0..d08ac4754c703 100644 --- a/llvm/test/CodeGen/Thumb2/postinc-distribute.mir +++ b/llvm/test/CodeGen/Thumb2/postinc-distribute.mir @@ -12,6 +12,8 @@ define i32* @t2STRBi12(i32* %x, i32 %y) { unreachable } define i32* @storedadd(i32* %x, i32 %y) { unreachable } + define i32* @minsize2(i32* %x, i32 %y) minsize optsize { unreachable } + define i32* @minsize3(i32* %x, i32 %y) minsize optsize { unreachable } ... --- @@ -57,9 +59,8 @@ body: | ; CHECK-LABEL: name: t2LDRHi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRHi12_:%[0-9]+]]:rgpr = t2LDRHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: [[t2LDRH_POST:%[0-9]+]]:rgpr, [[t2LDRH_POST1:%[0-9]+]]:rgpr = t2LDRH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r0 = COPY [[t2LDRH_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = t2LDRHi12 %0, 0, 14, $noreg :: (load 4, align 4) @@ -84,9 +85,8 @@ body: | ; CHECK-LABEL: name: t2LDRSHi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRSHi12_:%[0-9]+]]:rgpr = t2LDRSHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: [[t2LDRSH_POST:%[0-9]+]]:rgpr, [[t2LDRSH_POST1:%[0-9]+]]:rgpr = t2LDRSH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r0 = COPY [[t2LDRSH_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = t2LDRSHi12 %0, 0, 14, $noreg :: (load 4, align 4) @@ -111,9 +111,8 @@ body: | ; CHECK-LABEL: name: t2LDRBi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r0 = COPY [[t2LDRB_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4) @@ -138,9 +137,8 @@ body: | ; CHECK-LABEL: name: t2LDRSBi12 ; CHECK: liveins: $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 - ; CHECK: [[t2LDRSBi12_:%[0-9]+]]:rgpr = t2LDRSBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: [[t2LDRSB_POST:%[0-9]+]]:rgpr, [[t2LDRSB_POST1:%[0-9]+]]:rgpr = t2LDRSB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r0 = COPY [[t2LDRSB_POST1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = t2LDRSBi12 %0, 0, 14, $noreg :: (load 4, align 4) @@ -197,9 +195,8 @@ body: | ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1 - ; CHECK: t2STRHi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: early-clobber %2:rgpr = t2STRH_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: $r0 = COPY %2 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = COPY $r1 @@ -227,9 +224,8 @@ body: | ; CHECK: liveins: $r0, $r1 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1 - ; CHECK: t2STRBi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) - ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: early-clobber %2:rgpr = t2STRB_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4) + ; CHECK: $r0 = COPY %2 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 %1:rgpr = COPY $r1 @@ -265,3 +261,65 @@ body: | tBX_RET 14, $noreg, implicit $r0 ... +--- +name: minsize2 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnopc, preferred-register: '' } + - { id: 1, class: rgpr, preferred-register: '' } + - { id: 2, class: rgpr, preferred-register: '' } + - { id: 3, class: rgpr, preferred-register: '' } +liveins: + - { reg: '$r0', virtual-reg: '%0' } +body: | + bb.0: + liveins: $r0 + + ; CHECK-LABEL: name: minsize2 + ; CHECK: liveins: $r0 + ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 + ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRBi8_:%[0-9]+]]:rgpr = t2LDRBi8 [[t2LDRB_POST1]], -30, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: $r0 = COPY [[t2LDRB_POST1]] + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + %0:gprnopc = COPY $r0 + %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4) + %3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4) + %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg + $r0 = COPY %2 + tBX_RET 14, $noreg, implicit $r0 + +... +--- +name: minsize3 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnopc, preferred-register: '' } + - { id: 1, class: rgpr, preferred-register: '' } + - { id: 2, class: rgpr, preferred-register: '' } + - { id: 3, class: rgpr, preferred-register: '' } + - { id: 4, class: rgpr, preferred-register: '' } +liveins: + - { reg: '$r0', virtual-reg: '%0' } +body: | + bb.0: + liveins: $r0 + + ; CHECK-LABEL: name: minsize3 + ; CHECK: liveins: $r0 + ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 + ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRBi12_1:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 2, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2LDRBi12_2:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load 4) + ; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg + ; CHECK: $r0 = COPY [[t2ADDri]] + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + %0:gprnopc = COPY $r0 + %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4) + %3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4) + %4:rgpr = t2LDRBi12 %0, 4, 14, $noreg :: (load 4, align 4) + %2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg + $r0 = COPY %2 + tBX_RET 14, $noreg, implicit $r0 + +... From fa30adecc766eb627a85f746b0f6f22b0eadbda8 Mon Sep 17 00:00:00 2001 From: Luofan Chen Date: Sat, 1 Aug 2020 21:27:16 +0800 Subject: [PATCH 091/600] [Attributor][NFC] Update description for the dependency graph The word "dependency graph" is a bit misleading. When there is an edge from node A to B (A -> B), it actually mean that B depends on A and when the state of A is updated, B should also be updated. So I update the comment to make the description clearer. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D85065 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index bd6ff03a31fad..418a007319679 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -155,8 +155,8 @@ struct AADepGraphNode { using DepTy = PointerIntPair; protected: - /// Set of dependency graph nodes which this one depends on. - /// The bit encodes if it is optional. + /// Set of dependency graph nodes which should be updated if this one + /// is updated. The bit encodes if it is optional. TinyPtrVector Deps; static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } @@ -184,6 +184,11 @@ struct AADepGraphNode { friend struct AADepGraph; }; +/// The data structure for the dependency graph +/// +/// Note that in this graph if there is an edge from A to B (A -> B), +/// then it means that B depends on A, and when the state of A is +/// updated, node B should also be updated struct AADepGraph { AADepGraph() {} ~AADepGraph() {} @@ -197,7 +202,6 @@ struct AADepGraph { /// requires a single entry point, so we maintain a fake("synthetic") root /// node that depends on every node. AADepGraphNode SyntheticRoot; - AADepGraphNode *GetEntryNode() { return &SyntheticRoot; } iterator begin() { return SyntheticRoot.child_begin(); } From d620a6fe98f74d9b305a0d45d4c6804b0e46bf6c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 1 Aug 2020 08:38:28 -0400 Subject: [PATCH 092/600] [VectorCombine] add tests for non-zero gep offsets; NFC --- .../test/Transforms/VectorCombine/X86/load.ll | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index d560f671cfc03..4d8f3e5cd0ee5 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -55,8 +55,8 @@ define float @matching_fp_vector(<4 x float>* align 16 dereferenceable(16) %p) { ret float %r } -define float @matching_fp_vector_gep0(<4 x float>* align 16 dereferenceable(16) %p) { -; CHECK-LABEL: @matching_fp_vector_gep0( +define float @matching_fp_vector_gep00(<4 x float>* align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @matching_fp_vector_gep00( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 16 ; CHECK-NEXT: ret float [[R]] @@ -66,6 +66,50 @@ define float @matching_fp_vector_gep0(<4 x float>* align 16 dereferenceable(16) ret float %r } +define float @matching_fp_vector_gep01(<4 x float>* align 16 dereferenceable(20) %p) { +; CHECK-LABEL: @matching_fp_vector_gep01( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 4 +; CHECK-NEXT: ret float [[R]] +; + %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 1 + %r = load float, float* %gep, align 4 + ret float %r +} + +define float @matching_fp_vector_gep01_deref(<4 x float>* align 16 dereferenceable(19) %p) { +; CHECK-LABEL: @matching_fp_vector_gep01_deref( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 4 +; CHECK-NEXT: ret float [[R]] +; + %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 0, i64 1 + %r = load float, float* %gep, align 4 + ret float %r +} + +define float @matching_fp_vector_gep10(<4 x float>* align 16 dereferenceable(32) %p) { +; CHECK-LABEL: @matching_fp_vector_gep10( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 16 +; CHECK-NEXT: ret float [[R]] +; + %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 1, i64 0 + %r = load float, float* %gep, align 16 + ret float %r +} + +define float @matching_fp_vector_gep10_deref(<4 x float>* align 16 dereferenceable(31) %p) { +; CHECK-LABEL: @matching_fp_vector_gep10_deref( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[P:%.*]], i64 1, i64 0 +; CHECK-NEXT: [[R:%.*]] = load float, float* [[GEP]], align 16 +; CHECK-NEXT: ret float [[R]] +; + %gep = getelementptr inbounds <4 x float>, <4 x float>* %p, i64 1, i64 0 + %r = load float, float* %gep, align 16 + ret float %r +} + define float @nonmatching_int_vector(<2 x i64>* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @nonmatching_int_vector( ; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64>* [[P:%.*]] to float* From a9b06a2c14f9a38ba16165f0343faaa9ae713fec Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 1 Aug 2020 17:54:23 +0100 Subject: [PATCH 093/600] [LCSSA] Use IRBuilder for PHI creation. Use IRBuilder instead PHINode::Create. This should not impact the generated code, but IRBuilder provides a way to register callbacks for inserted instructions, which is convenient for some users. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D85037 --- llvm/include/llvm/Transforms/Utils/LoopUtils.h | 2 +- llvm/lib/Transforms/Utils/LCSSA.cpp | 16 +++++++++++----- .../Transforms/Utils/ScalarEvolutionExpander.cpp | 9 +++------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 60446bca53174..c6a8b27811ed1 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -76,7 +76,7 @@ bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, /// Returns true if any modifications are made. bool formLCSSAForInstructions(SmallVectorImpl &Worklist, const DominatorTree &DT, const LoopInfo &LI, - ScalarEvolution *SE); + ScalarEvolution *SE, IRBuilderBase &Builder); /// Put loop into LCSSA form. /// diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp index b1a1c564d2171..9c606251ae0f8 100644 --- a/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PredIteratorCache.h" @@ -77,12 +78,15 @@ static bool isExitBlock(BasicBlock *BB, /// rewrite the uses. bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, const DominatorTree &DT, const LoopInfo &LI, - ScalarEvolution *SE) { + ScalarEvolution *SE, + IRBuilderBase &Builder) { SmallVector UsesToRewrite; SmallSetVector PHIsToRemove; PredIteratorCache PredCache; bool Changed = false; + IRBuilderBase::InsertPointGuard InsertPtGuard(Builder); + // Cache the Loop ExitBlocks across this loop. We expect to get a lot of // instructions within the same loops, computing the exit blocks is // expensive, and we're not mutating the loop structure. @@ -151,9 +155,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - - PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB), - I->getName() + ".lcssa", &ExitBB->front()); + Builder.SetInsertPoint(&ExitBB->front()); + PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB), + I->getName() + ".lcssa"); // Get the debug location from the original instruction. PN->setDebugLoc(I->getDebugLoc()); // Add inputs from inside the loop for this PHI. @@ -369,7 +373,9 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, Worklist.push_back(&I); } } - Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE); + + IRBuilder<> Builder(L.getHeader()->getContext()); + Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder); // If we modified the code, remove any caches about the loop from SCEV to // avoid dangling entries. diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index 555da5df65e15..a8302b7ccfc1a 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1951,11 +1951,8 @@ void SCEVExpander::rememberInstruction(Value *I) { // a defining loop. Fix LCSSA from for each operand of the new instruction, // if required. for (unsigned OpIdx = 0, OpEnd = Inst->getNumOperands(); OpIdx != OpEnd; - OpIdx++) { - auto *V = fixupLCSSAFormFor(Inst, OpIdx); - if (V != I) - DoInsert(V); - } + OpIdx++) + fixupLCSSAFormFor(Inst, OpIdx); } } @@ -2540,7 +2537,7 @@ Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) { return OpV; ToUpdate.push_back(OpI); - formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE); + formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder); return User->getOperand(OpIdx); } From bb13c34c3aa100006461c972319abfef0af70603 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 1 Aug 2020 14:54:21 +0100 Subject: [PATCH 094/600] [X86][AVX] Ensure we only combine to PSHUFLW/PSHUFHW on supporting targets Noticed while investigating combining from concatenated shuffle vectors, we weren't checking that PSHUFLW/PSHUFHW was legal - we were depending on lowering splitting to subvectors. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d628cdfc1fdf1..b89502dc70204 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34146,7 +34146,10 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef Mask, } // Handle PSHUFLW/PSHUFHW vXi16 repeated patterns. - if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { + if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 && + ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || + (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || + (MaskVT.is512BitVector() && Subtarget.hasBWI()))) { SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { ArrayRef LoMask(RepeatedMask.data() + 0, 4); From 8697d6cfa8947c4033dfe1d2ac708482d75a28d4 Mon Sep 17 00:00:00 2001 From: zoecarver Date: Sat, 1 Aug 2020 12:06:31 -0700 Subject: [PATCH 095/600] [libcxx] Add compatible with constraint tests for some shared_ptr constructors. Add shared_ptr tests where the element type and pointer type aren't 'convertible' but are 'compatible'. Responding to a comment from D81414. Differential Revision: https://reviews.llvm.org/D81532 --- .../pointer_deleter.pass.cpp | 9 +++++++++ .../pointer_deleter_allocator.pass.cpp | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp index bb41fa89be372..e736eb57e0569 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp @@ -28,6 +28,9 @@ struct A int A::count = 0; +struct Base { }; +struct Derived : Base { }; + int main(int, char**) { { @@ -46,5 +49,11 @@ int main(int, char**) assert(test_deleter::count == 0); assert(test_deleter::dealloc_count == 1); + { + // Make sure that we can construct a shared_ptr where the element type and pointer type + // aren't "convertible" but are "compatible". + static_assert(!std::is_constructible, Base[4], test_deleter >::value, ""); + } + return 0; } diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp index e17ae6f34312d..af9bfe9ab057b 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp @@ -28,6 +28,8 @@ struct A int A::count = 0; +struct Base { }; +struct Derived : Base { }; int main(int, char**) { @@ -87,5 +89,13 @@ int main(int, char**) assert(test_deleter::dealloc_count == 1); #endif + { + // Make sure that we can construct a shared_ptr where the element type and pointer type + // aren't "convertible" but are "compatible". + static_assert(!std::is_constructible, + Base[4], test_deleter, + test_allocator >::value, ""); + } + return 0; } From 82a5c848e7f531ee636f643450072059397ac90c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 1 Aug 2020 20:34:39 +0100 Subject: [PATCH 096/600] [X86][AVX512] Fold concat(and(x,y),and(z,w)) -> and(concat(x,z),concat(y,w)) for 512-bit vectors Helps vpternlog folding on non-AVX512BW targets --- llvm/lib/Target/X86/X86ISelLowering.cpp | 19 ++ .../test/CodeGen/X86/avx512-insert-extract.ll | 246 +++++++++--------- llvm/test/CodeGen/X86/vector-bitreverse.ll | 56 ++-- llvm/test/CodeGen/X86/vector-fshl-512.ll | 106 ++++---- llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 70 ++--- llvm/test/CodeGen/X86/vector-fshr-512.ll | 134 +++++----- llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 74 +++--- llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll | 54 ++-- llvm/test/CodeGen/X86/vector-rotate-512.ll | 134 ++++------ .../test/CodeGen/X86/vector-shift-lshr-512.ll | 22 +- llvm/test/CodeGen/X86/vector-shift-shl-512.ll | 20 +- 11 files changed, 431 insertions(+), 504 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b89502dc70204..c135b91620724 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48090,6 +48090,25 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, Op0.getOperand(1)); } break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case X86ISD::ANDNP: + // TODO: Add 256-bit support. + if (!IsSplat && VT.is512BitVector()) { + SmallVector LHS, RHS; + for (unsigned i = 0; i != NumOps; ++i) { + LHS.push_back(Ops[i].getOperand(0)); + RHS.push_back(Ops[i].getOperand(1)); + } + MVT SrcVT = Op0.getOperand(0).getSimpleValueType(); + SrcVT = MVT::getVectorVT(SrcVT.getScalarType(), + NumOps * SrcVT.getVectorNumElements()); + return DAG.getNode(Op0.getOpcode(), DL, VT, + DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, LHS), + DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcVT, RHS)); + } + break; case X86ISD::PACKSS: case X86ISD::PACKUS: if (!IsSplat && NumOps == 2 && VT.is256BitVector() && diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index f6ffd6419c13a..41bdaf21baa38 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -1692,16 +1692,15 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) { ; KNL-NEXT: andq $-64, %rsp ; KNL-NEXT: subq $128, %rsp ; KNL-NEXT: ## kill: def $esi killed $esi def $rsi -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm2 -; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; KNL-NEXT: andl $63, %esi ; KNL-NEXT: testb %dil, %dil -; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm2, (%rsp) +; KNL-NEXT: vmovdqa64 %zmm0, (%rsp) ; KNL-NEXT: setne (%rsp,%rsi) ; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 @@ -1772,116 +1771,115 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) { ; KNL-NEXT: subq $192, %rsp ; KNL-NEXT: movl 744(%rbp), %eax ; KNL-NEXT: andl $127, %eax -; KNL-NEXT: vmovd %edi, %xmm0 -; KNL-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $4, %r8d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $5, %r9d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm0, %xmm0 ; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm1 +; KNL-NEXT: vmovd %edi, %xmm2 +; KNL-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm2, %xmm2 ; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm3, %xmm3 ; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; KNL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm2 -; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 -; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm3, %xmm3 -; KNL-NEXT: vmovd {{.*#+}} xmm4 = mem[0],zero,zero,zero -; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm4, %xmm4 -; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 -; KNL-NEXT: vpcmpeqb %ymm1, %ymm3, %ymm1 +; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm2 +; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 ; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 +; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm3, %xmm3 +; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0 +; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; KNL-NEXT: cmpb $0, 736(%rbp) -; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm0, (%rsp) +; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) +; KNL-NEXT: vmovdqa64 %zmm1, (%rsp) ; KNL-NEXT: setne (%rsp,%rax) ; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 @@ -2079,23 +2077,21 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index ; KNL-NEXT: andq $-64, %rsp ; KNL-NEXT: subq $192, %rsp ; KNL-NEXT: ## kill: def $esi killed $esi def $rsi -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm3 -; KNL-NEXT: vpternlogq $15, %zmm3, %zmm3, %zmm3 -; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 +; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm4 -; KNL-NEXT: vpternlogq $15, %zmm4, %zmm4, %zmm4 -; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 +; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 ; KNL-NEXT: andl $127, %esi ; KNL-NEXT: testb %dil, %dil -; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm4, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm3, (%rsp) +; KNL-NEXT: vmovdqa64 %zmm1, {{[0-9]+}}(%rsp) +; KNL-NEXT: vmovdqa64 %zmm0, (%rsp) ; KNL-NEXT: setne (%rsp,%rsi) ; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll index 5d9cd1643aece..f99fc38c6625e 100644 --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -1314,18 +1314,18 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] ; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 +; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm5 +; AVX512F-NEXT: vpshufb %ymm5, %ymm4, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 ; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512F-NEXT: vpshufb %ymm1, %ymm5, %ymm1 -; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3 -; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm5, %ymm0 -; AVX512F-NEXT: vpor %ymm0, %ymm3, %ymm0 +; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: test_bitreverse_v64i8: @@ -1591,19 +1591,19 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind { ; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] ; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1 -; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2 ; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0 -; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: test_bitreverse_v32i16: @@ -1887,19 +1887,19 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind { ; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] ; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1 -; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2 ; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0 -; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: test_bitreverse_v16i32: @@ -2191,19 +2191,19 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind { ; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm4 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] ; AVX512F-NEXT: vpshufb %ymm4, %ymm5, %ymm4 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512F-NEXT: vpshufb %ymm1, %ymm6, %ymm1 -; AVX512F-NEXT: vpor %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpshufb %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm2 ; AVX512F-NEXT: vpshufb %ymm2, %ymm5, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb %ymm0, %ymm6, %ymm0 -; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: test_bitreverse_v8i64: diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll index 088a590a2e07f..60406c45ba899 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -821,31 +821,30 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm5 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm5, %zmm3 ; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm6 -; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3 -; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4 +; AVX512F-NEXT: vpandq %zmm4, %zmm3, %zmm3 ; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm6 ; AVX512F-NEXT: vpsrlw %xmm4, %ymm6, %ymm6 -; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 -; AVX512F-NEXT: vpsrlw $8, %xmm5, %xmm5 -; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5 -; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6 ; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1 -; AVX512F-NEXT: vporq %zmm1, %zmm3, %zmm1 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512F-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4 +; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4 +; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm4 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 +; AVX512F-NEXT: vpternlogq $226, %zmm4, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v64i8: @@ -854,31 +853,30 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm4, %ymm3, %ymm3 +; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm5 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm5, %zmm3 ; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm6 -; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm4 -; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4 +; AVX512VL-NEXT: vpandq %zmm4, %zmm3, %zmm3 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm6 ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm6, %ymm6 -; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm5 -; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5 -; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 -; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6 ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 -; AVX512VL-NEXT: vpand %ymm5, %ymm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1 -; AVX512VL-NEXT: vporq %zmm1, %zmm3, %zmm1 -; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 -; AVX512VL-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0 +; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4 +; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4 +; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm4 +; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 +; AVX512VL-NEXT: vpternlogq $226, %zmm4, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v64i8: @@ -1510,40 +1508,28 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; AVX512F-LABEL: splatconstant_funnnel_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2 +; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index 2481b8ebfe25d..6671f3ec4c0f8 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -483,14 +483,13 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm4 -; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 -; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 +; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v64i8: @@ -529,14 +528,13 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm2, %ymm1, %ymm1 -; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm4 -; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 -; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 +; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $236, %zmm1, %zmm3, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v64i8: @@ -886,38 +884,26 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind { define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind { ; AVX512F-LABEL: splatconstant_funnnel_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll index fa70e840081f5..9aa74f165bddb 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -805,68 +805,66 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm4, %ymm3, %ymm3 -; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm6 -; AVX512F-NEXT: vpsrlw $8, %xmm6, %xmm6 -; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3 -; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm4 -; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512F-NEXT: vpsubb %xmm3, %xmm4, %xmm4 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm6 -; AVX512F-NEXT: vpsllw %xmm4, %ymm6, %ymm6 -; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm5 -; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5 -; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5 +; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm5 ; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0 +; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4 +; AVX512F-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm2, %ymm4, %ymm4 +; AVX512F-NEXT: vpsrlw %xmm2, %ymm1, %ymm6 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm6, %zmm4 +; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm2 +; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 +; AVX512F-NEXT: vpternlogq $236, %zmm4, %zmm0, %zmm2 ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vpcmpeqb %ymm0, %ymm3, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm4, %ymm3, %ymm3 -; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm6 -; AVX512VL-NEXT: vpsrlw $8, %xmm6, %xmm6 -; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm4 -; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VL-NEXT: vpsubb %xmm3, %xmm4, %xmm4 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm6 -; AVX512VL-NEXT: vpsllw %xmm4, %ymm6, %ymm6 -; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm5 -; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 -; AVX512VL-NEXT: vpand %ymm5, %ymm6, %ymm6 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm5 +; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm5 ; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm6, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0 +; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm4, %zmm4 +; AVX512VL-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm2, %ymm4, %ymm4 +; AVX512VL-NEXT: vpsrlw %xmm2, %ymm1, %ymm6 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm6, %zmm4 +; AVX512VL-NEXT: vpsrlw %xmm2, %xmm5, %xmm2 +; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 +; AVX512VL-NEXT: vpternlogq $236, %zmm4, %zmm0, %zmm2 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0 +; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm3, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512VL-NEXT: vpternlogq $202, %zmm3, %zmm1, %zmm0 +; AVX512VL-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v64i8: @@ -1494,40 +1492,28 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; AVX512F-LABEL: splatconstant_funnnel_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 -; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm2 +; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index 04a883171a7ce..0084702b7fd76 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -480,15 +480,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm4 -; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 -; AVX512F-NEXT: vpand %ymm4, %ymm2, %ymm2 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2 +; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v64i8: @@ -524,15 +523,14 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm4 -; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 -; AVX512VL-NEXT: vpand %ymm4, %ymm2, %ymm2 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2 +; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v64i8: @@ -882,38 +880,26 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind { define <64 x i8> @splatconstant_funnnel_v64i8(<64 x i8> %x) nounwind { ; AVX512F-LABEL: splatconstant_funnnel_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4 -; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v64i8: diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll index 5b88eaec05969..e756f3ecc3538 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll @@ -133,38 +133,36 @@ define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind { define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512F-LABEL: test_div7_64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] -; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3 -; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] -; AVX512F-NEXT: vpmullw %ymm4, %ymm5, %ymm5 -; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 -; AVX512F-NEXT: vpackuswb %ymm3, %ymm5, %ymm3 -; AVX512F-NEXT: vpsubb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31] -; AVX512F-NEXT: vpmullw %ymm4, %ymm6, %ymm6 -; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6 -; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23] -; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm2 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] +; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vpackuswb %ymm6, %ymm2, %ymm2 -; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] +; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4 +; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 +; AVX512F-NEXT: vpackuswb %ymm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpsubb %ymm2, %ymm0, %ymm4 +; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm4 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4 +; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4 +; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 +; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] +; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512F-NEXT: vpackuswb %ymm4, %ymm1, %ymm1 +; AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0 -; AVX512F-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 +; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: test_div7_64i8: diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index 831c03f03825a..690d9f721bb20 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -418,22 +418,21 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm4 +; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm5 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4 ; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsllw %xmm2, %xmm5, %xmm6 -; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512F-NEXT: vpsllw %xmm2, %xmm5, %xmm2 +; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 +; AVX512F-NEXT: vpandq %zmm2, %zmm4, %zmm2 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm3, %ymm3 -; AVX512F-NEXT: vpsrlw %xmm1, %xmm5, %xmm4 -; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 -; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3 +; AVX512F-NEXT: vpsrlw %xmm1, %xmm5, %xmm0 +; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_rotate_v64i8: @@ -445,22 +444,21 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm4 +; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm5 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4 ; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512VL-NEXT: vpsllw %xmm2, %xmm5, %xmm6 -; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vpand %ymm6, %ymm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512VL-NEXT: vpsllw %xmm2, %xmm5, %xmm2 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2 +; AVX512VL-NEXT: vpandq %zmm2, %zmm4, %zmm2 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm4 -; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 -; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm3 +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm0 +; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_rotate_v64i8: @@ -809,38 +807,26 @@ define <32 x i16> @splatconstant_rotate_v32i16(<32 x i16> %a) nounwind { define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind { ; AVX512F-LABEL: splatconstant_rotate_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4 -; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_v64i8: @@ -947,40 +933,28 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind { define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind { ; AVX512F-LABEL: splatconstant_rotate_mask_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vpsllw $4, %ymm1, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 -; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm0 -; AVX512F-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2 +; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0 +; AVX512F-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_mask_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4 -; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 -; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm0 -; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2 +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0 +; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8: diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll index 7cdeb29d35842..c311f138e789b 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -156,14 +156,14 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm3, %xmm3 -; AVX512DQ-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 +; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v64i8: @@ -308,13 +308,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { ; AVX512DQ-LABEL: splatconstant_shift_v64i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] -; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_shift_v64i8: diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll index 91146cfdf1d42..1bf878739bf8b 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-512.ll @@ -151,13 +151,13 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX512DQ-NEXT: vpsllw %xmm1, %xmm3, %xmm3 -; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1 +; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v64i8: @@ -306,13 +306,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind { define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { ; AVX512DQ-LABEL: splatconstant_shift_v64i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248] -; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512DQ-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_shift_v64i8: From 05b44f7eaebfbca19999fde149c4c586fc965015 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 1 Aug 2020 20:38:38 +0100 Subject: [PATCH 097/600] [LCSSA] Provide option for caller to clean up unused PHIs. formLCSSAForInstructions is used by SCEVExpander, which tracks all inserted instructions including LCSSA phis using asserting value handles. This means cleanup needs to happen in the caller. Extend formLCSSAForInstructions to take an optional pointer to a vector. If this argument is non-nullptr, instead of directly deleting the phis, add them to the vector, so the caller can process them. This should address various PPC buildbot failures, including http://lab.llvm.org:8011/builders/clang-ppc64be-linux-lnt/builds/40567 --- .../include/llvm/Transforms/Utils/LoopUtils.h | 11 +- llvm/lib/Transforms/Utils/LCSSA.cpp | 34 +++--- .../Utils/ScalarEvolutionExpander.cpp | 11 +- .../CodeGen/PowerPC/hardware-loops-crash.ll | 101 ++++++++++++++++++ 4 files changed, 139 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index c6a8b27811ed1..70c8c84c857bf 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -74,9 +74,14 @@ bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, /// changes to CFG, preserved. /// /// Returns true if any modifications are made. -bool formLCSSAForInstructions(SmallVectorImpl &Worklist, - const DominatorTree &DT, const LoopInfo &LI, - ScalarEvolution *SE, IRBuilderBase &Builder); +/// +/// This function may introduce unused PHI nodes. If \p PHIsToRemove is not +/// nullptr, those are added to it (before removing, the caller has to check if +/// they still do not have any uses). Otherwise the PHIs are directly removed. +bool formLCSSAForInstructions( + SmallVectorImpl &Worklist, const DominatorTree &DT, + const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, + SmallVectorImpl *PHIsToRemove = nullptr); /// Put loop into LCSSA form. /// diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp index 9c606251ae0f8..630aadadbbce6 100644 --- a/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -78,10 +78,10 @@ static bool isExitBlock(BasicBlock *BB, /// rewrite the uses. bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, const DominatorTree &DT, const LoopInfo &LI, - ScalarEvolution *SE, - IRBuilderBase &Builder) { + ScalarEvolution *SE, IRBuilderBase &Builder, + SmallVectorImpl *PHIsToRemove) { SmallVector UsesToRewrite; - SmallSetVector PHIsToRemove; + SmallSetVector LocalPHIsToRemove; PredIteratorCache PredCache; bool Changed = false; @@ -257,22 +257,28 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, SmallVector NeedDbgValues; for (PHINode *PN : AddedPHIs) if (PN->use_empty()) - PHIsToRemove.insert(PN); + LocalPHIsToRemove.insert(PN); else NeedDbgValues.push_back(PN); insertDebugValuesForPHIs(InstBB, NeedDbgValues); Changed = true; } - // Remove PHI nodes that did not have any uses rewritten. We need to redo the - // use_empty() check here, because even if the PHI node wasn't used when added - // to PHIsToRemove, later added PHI nodes can be using it. This cleanup is - // not guaranteed to handle trees/cycles of PHI nodes that only are used by - // each other. Such situations has only been noticed when the input IR - // contains unreachable code, and leaving some extra redundant PHI nodes in - // such situations is considered a minor problem. - for (PHINode *PN : PHIsToRemove) - if (PN->use_empty()) - PN->eraseFromParent(); + + // Remove PHI nodes that did not have any uses rewritten or add them to + // PHIsToRemove, so the caller can remove them after some additional cleanup. + // We need to redo the use_empty() check here, because even if the PHI node + // wasn't used when added to LocalPHIsToRemove, later added PHI nodes can be + // using it. This cleanup is not guaranteed to handle trees/cycles of PHI + // nodes that only are used by each other. Such situations has only been + // noticed when the input IR contains unreachable code, and leaving some extra + // redundant PHI nodes in such situations is considered a minor problem. + if (PHIsToRemove) { + PHIsToRemove->append(LocalPHIsToRemove.begin(), LocalPHIsToRemove.end()); + } else { + for (PHINode *PN : LocalPHIsToRemove) + if (PN->use_empty()) + PN->eraseFromParent(); + } return Changed; } diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index a8302b7ccfc1a..aaa28feb32b86 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2537,7 +2537,16 @@ Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) { return OpV; ToUpdate.push_back(OpI); - formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder); + SmallVector PHIsToRemove; + formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove); + for (PHINode *PN : PHIsToRemove) { + if (!PN->use_empty()) + continue; + InsertedValues.erase(PN); + InsertedPostIncValues.erase(PN); + PN->eraseFromParent(); + } + return User->getOperand(OpIdx); } diff --git a/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll new file mode 100644 index 0000000000000..24e9592a6c2be --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -hardware-loops -S -verify-loop-lcssa | FileCheck %s + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "ppc64-unknown-linux-elf" + +declare i1 @cond() readnone + +; Make sure we do not crash on the test. + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[C_0:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_0]], label [[WHILE_COND25:%.*]], label [[FOR_BODY]] +; CHECK: while.cond25: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[LAND_RHS:%.*]] ], [ 0, [[FOR_INC]] ] +; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ 50, [[FOR_INC]] ] +; CHECK-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[INDVARS_IV349]], 0 +; CHECK-NEXT: br i1 [[CMP26_NOT]], label [[WHILE_END187:%.*]], label [[LAND_RHS]] +; CHECK: land.rhs: +; CHECK-NEXT: [[INDVARS_IV_NEXT350]] = add nsw i64 [[INDVARS_IV349]], -1 +; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond() +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: br i1 [[C_1]], label [[WHILE_COND25]], label [[WHILE_END:%.*]] +; CHECK: while.end: +; CHECK-NEXT: [[INDVAR_LCSSA1:%.*]] = phi i64 [ [[INDVAR]], [[LAND_RHS]] ] +; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_2]], label [[WHILE_END187]], label [[WHILE_COND35_PREHEADER:%.*]] +; CHECK: while.cond35.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVAR_LCSSA1]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 51 +; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 [[TMP1]]) +; CHECK-NEXT: br label [[WHILE_COND35:%.*]] +; CHECK: while.cond35: +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1) +; CHECK-NEXT: br i1 [[TMP2]], label [[LAND_RHS37:%.*]], label [[IF_END51:%.*]] +; CHECK: land.rhs37: +; CHECK-NEXT: br label [[WHILE_COND35]] +; CHECK: if.end51: +; CHECK-NEXT: br label [[WHILE_COND_BACKEDGE:%.*]] +; CHECK: while.cond.backedge: +; CHECK-NEXT: br label [[WHILE_COND]] +; CHECK: while.end187: +; CHECK-NEXT: ret void +; +entry: + br label %while.cond + +while.cond: ; preds = %while.cond.backedge, %entry + br label %for.body + +for.body: ; preds = %for.inc, %while.cond + br label %for.inc + +for.inc: ; preds = %for.body + %c.0 = call i1 @cond() + br i1 %c.0, label %while.cond25, label %for.body + +while.cond25: ; preds = %land.rhs, %for.inc + %indvars.iv349 = phi i64 [ %indvars.iv.next350, %land.rhs ], [ 50, %for.inc ] + %cmp26.not = icmp eq i64 %indvars.iv349, 0 + br i1 %cmp26.not, label %while.end187, label %land.rhs + +land.rhs: ; preds = %while.cond25 + %indvars.iv.next350 = add nsw i64 %indvars.iv349, -1 + %c.1 = call i1 @cond() + br i1 %c.1, label %while.cond25, label %while.end + +while.end: ; preds = %land.rhs + %c.2 = call i1 @cond() + br i1 %c.2, label %while.end187, label %while.cond35.preheader + +while.cond35.preheader: ; preds = %while.end + %0 = and i64 %indvars.iv349, 4294967295 + br label %while.cond35 + +while.cond35: ; preds = %land.rhs37, %while.cond35.preheader + %indvars.iv351 = phi i64 [ %0, %while.cond35.preheader ], [ %indvars.iv.next352, %land.rhs37 ] + %cmp36 = icmp sgt i64 %indvars.iv351, 0 + br i1 %cmp36, label %land.rhs37, label %if.end51 + +land.rhs37: ; preds = %while.cond35 + %indvars.iv.next352 = add nsw i64 %indvars.iv351, -1 + br label %while.cond35 + +if.end51: ; preds = %while.cond35 + br label %while.cond.backedge + +while.cond.backedge: ; preds = %if.end51 + br label %while.cond + +while.end187: ; preds = %while.end, %while.cond25 + ret void +} From 95ddb9ff673001b2745c871c5751d165c2a27546 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 1 Aug 2020 20:58:05 +0100 Subject: [PATCH 098/600] [PPC] Adjust run line for hardware-loops-crash.ll Looks like %s was accidentally dropped. --- llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll index 24e9592a6c2be..23e5b44fa87d1 100644 --- a/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll +++ b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -hardware-loops -S -verify-loop-lcssa | FileCheck %s +; RUN: opt -hardware-loops -S -verify-loop-lcssa %s | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "ppc64-unknown-linux-elf" From 4a19e6156ed5b6e87d708e6de29b675be69c574f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 1 Aug 2020 09:59:09 -0700 Subject: [PATCH 099/600] [InstCombine] Fold abs(-x) -> abs(x) Negating the input doesn't matter. I left a FIXME to copy the nsw flag if its present on the neg but not on the abs. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D85055 --- .../InstCombine/InstCombineCalls.cpp | 10 ++++++++++ .../Transforms/InstCombine/abs-intrinsic.ll | 20 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index ed93c33c1a597..4eb3e2e4434fd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -769,6 +769,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Value *V = lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false)) return replaceInstUsesWith(CI, V); return nullptr; + case Intrinsic::abs: { + Value *IIOperand = II->getArgOperand(0); + // abs(-x) -> abs(x) + // TODO: Copy nsw if it was present on the neg? + Value *X; + if (match(IIOperand, m_Neg(m_Value(X)))) + return replaceOperand(*II, 0, X); + + break; + } case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index ed845cc842677..9e64aea9dabe5 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -165,3 +165,23 @@ define <4 x i1> @abs_known_not_int_min_vec(<4 x i32> %x) { %c2 = icmp sge <4 x i32> %abs, zeroinitializer ret <4 x i1> %c2 } + +define i32 @abs_of_neg(i32 %x) { +; CHECK-LABEL: @abs_of_neg( +; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: ret i32 [[B]] +; + %a = sub i32 0, %x + %b = call i32 @llvm.abs.i32(i32 %a, i1 false) + ret i32 %b +} + +define <4 x i32> @abs_of_neg_vec(<4 x i32> %x) { +; CHECK-LABEL: @abs_of_neg_vec( +; CHECK-NEXT: [[B:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[X:%.*]], i1 false) +; CHECK-NEXT: ret <4 x i32> [[B]] +; + %a = sub nsw <4 x i32> zeroinitializer, %x + %b = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 false) + ret <4 x i32> %b +} From 85b5315dbe9d52766ab326e702d638fcf58579b5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 1 Aug 2020 10:01:58 -0700 Subject: [PATCH 100/600] [InstSimplify] Fold abs(abs(x)) -> abs(x) It's always safe to pick the earlier abs regardless of the nsw flag. We'll just lose it if it is on the outer abs but not the inner abs. Differential Revision: https://reviews.llvm.org/D85053 --- llvm/lib/Analysis/InstructionSimplify.cpp | 5 +++ llvm/test/Transforms/InstSimplify/call.ll | 40 +++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index d3928a502965b..b1438b416d56b 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5256,6 +5256,11 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, unsigned BitWidth = ReturnType->getScalarSizeInBits(); switch (IID) { case Intrinsic::abs: + // abs(abs(x)) -> abs(x). We don't need to worry about the nsw arg here. + // It is always ok to pick the earlier abs. We'll just lose nsw if its only + // on the outer abs. + if (match(Op0, m_Intrinsic(m_Value(), m_Value()))) + return Op0; // If the sign bit is clear already, then abs does not do anything. if (isKnownNonNegative(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return Op0; diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index 2325dccd17a85..344cefd5b2c3a 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -5,6 +5,46 @@ declare i32 @llvm.abs.i32(i32, i1) declare <3 x i82> @llvm.abs.v3i82(<3 x i82>, i1) +define i32 @test_abs_abs_0(i32 %x) { +; CHECK-LABEL: @test_abs_abs_0( +; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: ret i32 [[A]] +; + %a = call i32 @llvm.abs.i32(i32 %x, i1 false) + %b = call i32 @llvm.abs.i32(i32 %a, i1 false) + ret i32 %b +} + +define i32 @test_abs_abs_1(i32 %x) { +; CHECK-LABEL: @test_abs_abs_1( +; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) +; CHECK-NEXT: ret i32 [[A]] +; + %a = call i32 @llvm.abs.i32(i32 %x, i1 true) + %b = call i32 @llvm.abs.i32(i32 %a, i1 false) + ret i32 %b +} + +define i32 @test_abs_abs_2(i32 %x) { +; CHECK-LABEL: @test_abs_abs_2( +; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 false) +; CHECK-NEXT: ret i32 [[A]] +; + %a = call i32 @llvm.abs.i32(i32 %x, i1 false) + %b = call i32 @llvm.abs.i32(i32 %a, i1 true) + ret i32 %b +} + +define i32 @test_abs_abs_3(i32 %x) { +; CHECK-LABEL: @test_abs_abs_3( +; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) +; CHECK-NEXT: ret i32 [[A]] +; + %a = call i32 @llvm.abs.i32(i32 %x, i1 true) + %b = call i32 @llvm.abs.i32(i32 %a, i1 true) + ret i32 %b +} + ; If the sign bit is known zero, the abs is not needed. define i32 @zext_abs(i31 %x) { From e297d928dcde31ac92eff72532095f4f657f2ebd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 1 Aug 2020 13:25:18 -0700 Subject: [PATCH 101/600] [X86] Add assembler support for {disp8} and {disp32} to control the size of displacement used for memory operands. These prefixes should override the default behavior and force a larger immediate size. I don't believe gas issues any warning if you use {disp8} when a 32-bit displacement is already required. And this patch doesn't either. This completes the {disp8} and {disp32} support from PR46650. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D84793 --- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 12 +++ .../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 2 + .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 59 ++++++++----- llvm/test/MC/X86/x86-64.s | 88 +++++++++++++++++++ 4 files changed, 137 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6b4f2e33f67de..49c01d7b9ef0b 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3572,6 +3572,12 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, if (ForcedVEXEncoding == VEXEncoding_VEX3) Prefixes |= X86::IP_USE_VEX3; + // Set encoded flags for {disp8} and {disp32}. + if (ForcedDispEncoding == DispEncoding_Disp8) + Prefixes |= X86::IP_USE_DISP8; + else if (ForcedDispEncoding == DispEncoding_Disp32) + Prefixes |= X86::IP_USE_DISP32; + if (Prefixes) Inst.setFlags(Prefixes); @@ -3806,6 +3812,12 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, if (ForcedVEXEncoding == VEXEncoding_VEX3) Prefixes |= X86::IP_USE_VEX3; + // Set encoded flags for {disp8} and {disp32}. + if (ForcedDispEncoding == DispEncoding_Disp8) + Prefixes |= X86::IP_USE_DISP8; + else if (ForcedDispEncoding == DispEncoding_Disp32) + Prefixes |= X86::IP_USE_DISP32; + if (Prefixes) Inst.setFlags(Prefixes); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 79f07d3c7792a..b6e8d4813d4ca 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -62,6 +62,8 @@ namespace X86 { IP_HAS_LOCK = 16, IP_HAS_NOTRACK = 32, IP_USE_VEX3 = 64, + IP_USE_DISP8 = 128, + IP_USE_DISP32 = 256, }; enum OperandType : unsigned { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index abdc0f156b9f9..0de94cda2d739 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -505,12 +505,18 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, return; } - // Determine whether a SIB byte is needed. - // If no BaseReg, issue a RIP relative instruction only if the MCE can - // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table - // 2-7) and absolute references. + // Check for presence of {disp8} or {disp32} pseudo prefixes. + bool UseDisp8 = MI.getFlags() & X86::IP_USE_DISP8; + bool UseDisp32 = MI.getFlags() & X86::IP_USE_DISP32; + + // We only allow no displacement if no pseudo prefix is present. + bool AllowNoDisp = !UseDisp8 && !UseDisp32; + // Disp8 is allowed unless the {disp32} prefix is present. + bool AllowDisp8 = !UseDisp32; - if ( // The SIB byte must be used if there is an index register. + // Determine whether a SIB byte is needed. + if (// The SIB byte must be used if there is an index register or the + // encoding requires a SIB byte. !ForceSIB && IndexReg.getReg() == 0 && // The SIB byte must be used if the base is ESP/RSP/R12, all of which // encode to an R/M value of 4, which indicates that a SIB byte is @@ -526,12 +532,12 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, return; } - // If the base is not EBP/ESP and there is no displacement, use simple - // indirect register encoding, this handles addresses like [EAX]. The - // encoding for [EBP] with no displacement means [disp32] so we handle it - // by emitting a displacement of 0 below. + // If the base is not EBP/ESP/R12/R13 and there is no displacement, use + // simple indirect register encoding, this handles addresses like [EAX]. + // The encoding for [EBP] or[R13] with no displacement means [disp32] so we + // handle it by emitting a displacement of 0 later. if (BaseRegNo != N86::EBP) { - if (Disp.isImm() && Disp.getImm() == 0) { + if (Disp.isImm() && Disp.getImm() == 0 && AllowNoDisp) { emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS); return; } @@ -550,7 +556,10 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, } // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. - if (Disp.isImm()) { + // Including a compressed disp8 for EVEX instructions that support it. + // This also handles the 0 displacement for [EBP] or [R13]. We can't use + // disp8 if the {disp32} pseudo prefix is present. + if (Disp.isImm() && AllowDisp8) { int ImmOffset = 0; if (isDispOrCDisp8(TSFlags, Disp.getImm(), ImmOffset)) { emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS); @@ -560,7 +569,9 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, } } - // Otherwise, emit the most general non-SIB encoding: [REG+disp32] + // Otherwise, emit the most general non-SIB encoding: [REG+disp32]. + // Displacement may be 0 for [EBP] or [R13] case if {disp32} pseudo prefix + // prevented using disp8 above. emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), OS); unsigned Opcode = MI.getOpcode(); unsigned FixupKind = Opcode == X86::MOV32rm ? X86::reloc_signed_4byte_relax @@ -580,21 +591,26 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, if (BaseReg == 0) { // If there is no base register, we emit the special case SIB byte with // MOD=0, BASE=5, to JUST get the index, scale, and displacement. + BaseRegNo = 5; emitByte(modRMByte(0, RegOpcodeField, 4), OS); ForceDisp32 = true; - } else if (Disp.isImm() && Disp.getImm() == 0 && - // Base reg can't be anything that ends up with '5' as the base - // reg, it is the magic [*] nomenclature that indicates no base. + } else if (Disp.isImm() && Disp.getImm() == 0 && AllowNoDisp && + // Base reg can't be EBP/RBP/R13 as that would end up with '5' as + // the base field, but that is the magic [*] nomenclature that + // indicates no base when mod=0. For these cases we'll emit a 0 + // displacement instead. BaseRegNo != N86::EBP) { // Emit no displacement ModR/M byte emitByte(modRMByte(0, RegOpcodeField, 4), OS); - } else if (Disp.isImm() && + } else if (Disp.isImm() && AllowDisp8 && isDispOrCDisp8(TSFlags, Disp.getImm(), ImmOffset)) { - // Emit the disp8 encoding. + // Displacement fits in a byte or matches an EVEX compressed disp8, use + // disp8 encoding. This also handles EBP/R13 base with 0 displacement unless + // {disp32} pseudo prefix was used. emitByte(modRMByte(1, RegOpcodeField, 4), OS); - ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + ForceDisp8 = true; } else { - // Emit the normal disp32 encoding. + // Otherwise, emit the normal disp32 encoding. emitByte(modRMByte(2, RegOpcodeField, 4), OS); ForceDisp32 = true; } @@ -605,11 +621,6 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned IndexRegNo = IndexReg.getReg() ? getX86RegNum(IndexReg) : 4; - // Handle the SIB byte for the case where there is no base, see Intel - // Manual 2A, table 2-7. The displacement has already been output. - if (BaseReg == 0) - BaseRegNo = 5; - emitSIBByte(SS, IndexRegNo, BaseRegNo, OS); // Do we need to output a displacement? diff --git a/llvm/test/MC/X86/x86-64.s b/llvm/test/MC/X86/x86-64.s index 38d5c6173f3b4..c61cae69c3ffe 100644 --- a/llvm/test/MC/X86/x86-64.s +++ b/llvm/test/MC/X86/x86-64.s @@ -1904,3 +1904,91 @@ ud1 %rdx, %rdi // CHECK: ud1q (%rbx), %rcx // CHECK: encoding: [0x48,0x0f,0xb9,0x0b] ud2b (%rbx), %rcx + +// Requires no displacement by default +// CHECK: movl $1, (%rax) +// CHECK: encoding: [0xc7,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%rax) +// CHECK: encoding: [0xc7,0x40,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%rax) +// CHECK: encoding: [0xc7,0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +movl $1, (%rax) +{disp8} movl $1, (%rax) +{disp32} movl $1, (%rax) + +// Requires disp8 by default +// CHECK: movl $1, (%rbp) +// CHECK: encoding: [0xc7,0x45,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%rbp) +// CHECK: encoding: [0xc7,0x45,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%rbp) +// CHECK: encoding: [0xc7,0x85,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +movl $1, (%rbp) +{disp8} movl $1, (%rbp) +{disp32} movl $1, (%rbp) + +// Requires disp8 by default +// CHECK: movl $1, (%r13) +// CHECK: encoding: [0x41,0xc7,0x45,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%r13) +// CHECK: encoding: [0x41,0xc7,0x45,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%r13) +// CHECK: encoding: [0x41,0xc7,0x85,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +movl $1, (%r13) +{disp8} movl $1, (%r13) +{disp32} movl $1, (%r13) + +// Requires disp8 by default +// CHECK: movl $1, 8(%rax) +// CHECK: encoding: [0xc7,0x40,0x08,0x01,0x00,0x00,0x00] +// CHECK: movl $1, 8(%rax) +// CHECK: encoding: [0xc7,0x40,0x08,0x01,0x00,0x00,0x00] +// CHECK: movl $1, 8(%rax) +// CHECK: encoding: [0xc7,0x80,0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +movl $1, 8(%rax) +{disp8} movl $1, 8(%rax) +{disp32} movl $1, 8(%rax) + +// Requires no displacement by default +// CHECK: movl $1, (%rax,%rbx,4) +// CHECK: encoding: [0xc7,0x04,0x98,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%rax,%rbx,4) +// CHECK: encoding: [0xc7,0x44,0x98,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%rax,%rbx,4) +// CHECK: encoding: [0xc7,0x84,0x98,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +movl $1, (%rax,%rbx,4) +{disp8} movl $1, (%rax,%rbx,4) +{disp32} movl $1, (%rax,%rbx,4) + +// Requires disp8 by default. +// CHECK: movl $1, 8(%rax,%rbx,4) +// CHECK: encoding: [0xc7,0x44,0x98,0x08,0x01,0x00,0x00,0x00] +// CHECK: movl $1, 8(%rax,%rbx,4) +// CHECK: encoding: [0xc7,0x44,0x98,0x08,0x01,0x00,0x00,0x00] +// CHECK: movl $1, 8(%rax,%rbx,4) +// CHECK: encoding: [0xc7,0x84,0x98,0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +movl $1, 8(%rax,%rbx,4) +{disp8} movl $1, 8(%rax,%rbx,4) +{disp32} movl $1, 8(%rax,%rbx,4) + +// Requires disp8 by default. +// CHECK: movl $1, (%rbp,%rbx,4) +// CHECK: encoding: [0xc7,0x44,0x9d,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%rbp,%rbx,4) +// CHECK: encoding: [0xc7,0x44,0x9d,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%rbp,%rbx,4) +// CHECK: encoding: [0xc7,0x84,0x9d,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +movl $1, (%rbp,%rbx,4) +{disp8} movl $1, (%rbp,%rbx,4) +{disp32} movl $1, (%rbp,%rbx,4) + +// Requires disp8 by default. +// CHECK: movl $1, (%r13,%rbx,4) +// CHECK: encoding: [0x41,0xc7,0x44,0x9d,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%r13,%rbx,4) +// CHECK: encoding: [0x41,0xc7,0x44,0x9d,0x00,0x01,0x00,0x00,0x00] +// CHECK: movl $1, (%r13,%rbx,4) +// CHECK: encoding: [0x41,0xc7,0x84,0x9d,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +movl $1, (%r13,%rbx,4) +{disp8} movl $1, (%r13,%rbx,4) +{disp32} movl $1, (%r13,%rbx,4) From 25af353b0e74907d5d50c8616b885bd1f73a68b3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 30 Jul 2020 19:14:02 +0200 Subject: [PATCH 102/600] [NewPM][LVI] Abandon LVI after CVP As mentioned on D70376, LVI can currently cause performance issues when running under NewPM. The problem is that, unlike the legacy pass manager, NewPM will not immediately discard the LVI analysis if the following pass does not need it. This is a problem, because LVI has a high memory requirement, and mass invalidation of LVI values is very inefficient. LVI should only be alive during passes that actively interact with it. This patch addresses the issue by explicitly abandoning LVI after CVP, which gets us back to the LegacyPM behavior. Differential Revision: https://reviews.llvm.org/D84959 --- .../test/CodeGen/thinlto-distributed-newpm.ll | 2 +- .../Scalar/CorrelatedValuePropagation.cpp | 18 +++++++++++++----- llvm/test/Other/new-pm-defaults.ll | 3 +++ llvm/test/Other/new-pm-thinlto-defaults.ll | 3 +++ .../new-pm-thinlto-postlink-pgo-defaults.ll | 3 +++ ...-pm-thinlto-postlink-samplepgo-defaults.ll | 3 +++ .../new-pm-thinlto-prelink-pgo-defaults.ll | 19 +++++++++++++++++++ ...w-pm-thinlto-prelink-samplepgo-defaults.ll | 3 +++ 8 files changed, 48 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll index caf294df8eb85..9f9a8bec4ef5d 100644 --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -97,6 +97,7 @@ ; CHECK-O: Running pass: JumpThreadingPass on main ; CHECK-O: Running analysis: LazyValueAnalysis on main ; CHECK-O: Running pass: CorrelatedValuePropagationPass on main +; CHECK-O: Invalidating analysis: LazyValueAnalysis on main ; CHECK-O: Running pass: SimplifyCFGPass on main ; CHECK-O3: Running pass: AggressiveInstCombinePass on main ; CHECK-O: Running pass: InstCombinePass on main @@ -144,7 +145,6 @@ ; CHECK-O: Invalidating analysis: BasicAA on main ; CHECK-O: Invalidating analysis: AAManager on main ; CHECK-O: Invalidating analysis: MemorySSAAnalysis on main -; CHECK-O: Invalidating analysis: LazyValueAnalysis on main ; CHECK-O: Invalidating analysis: LoopAnalysis on main ; CHECK-O: Invalidating analysis: PhiValuesAnalysis on main ; CHECK-O: Invalidating analysis: MemoryDependenceAnalysis on main diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 48968166c605f..397d62a5d21d3 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -973,11 +973,19 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F)); - if (!Changed) - return PreservedAnalyses::all(); PreservedAnalyses PA; - PA.preserve(); - PA.preserve(); - PA.preserve(); + if (!Changed) { + PA = PreservedAnalyses::all(); + } else { + PA.preserve(); + PA.preserve(); + PA.preserve(); + } + + // Keeping LVI alive is expensive, both because it uses a lot of memory, and + // because invalidating values in LVI is expensive. While CVP does preserve + // LVI, we know that passes after JumpThreading+CVP will not need the result + // of this analysis, so we forcefully discard it early. + PA.abandon(); return PA; } diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 8f75e3ce0bf36..59c24acb17f04 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -147,6 +147,7 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -200,7 +201,9 @@ ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run. ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index a39656cd26f66..0b9b52a57e2a5 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -112,6 +112,7 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -173,7 +174,9 @@ ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 333f4c0d413ed..7efc5357253e8 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -86,6 +86,7 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -146,7 +147,9 @@ ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 360f7ee07037f..9c5e36c5886ac 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -94,6 +94,7 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -154,7 +155,9 @@ ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index f94b0992431fd..45bb71a6d304e 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -127,6 +127,7 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -196,7 +197,9 @@ ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass @@ -213,10 +216,26 @@ ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. +; CHECK-O23SZ-NEXT: Clearing all analysis results for: +; CHECK-O23SZ-NEXT: Invalidating analysis: DominatorTreeAnalysis +; CHECK-O23SZ-NEXT: Invalidating analysis: MemorySSAAnalysis +; CHECK-O23SZ-NEXT: Invalidating analysis: LoopAnalysis +; CHECK-O23SZ-NEXT: Invalidating analysis: PostDominatorTreeAnalysis +; CHECK-O23SZ-NEXT: Invalidating analysis: BranchProbabilityAnalysis +; CHECK-O23SZ-NEXT: Invalidating analysis: BlockFrequencyAnalysis +; CHECK-O23SZ-NEXT: Invalidating analysis: ScalarEvolutionAnalysis +; CHECK-O23SZ-NEXT: Invalidating analysis: InnerAnalysisManagerProxy +; CHECK-O23SZ-NEXT: Invalidating analysis: PhiValuesAnalysis +; CHECK-O23SZ-NEXT: Invalidating analysis: MemoryDependenceAnalysis +; CHECK-O23SZ-NEXT: Invalidating analysis: DemandedBitsAnalysis +; CHECK-O3-NEXT: Invalidating analysis: DominanceFrontierAnalysis +; CHECK-O3-NEXT: Invalidating analysis: RegionInfoAnalysis +; CHECK-O23SZ-NEXT: Clearing all analysis results for: foo ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis on bar ; CHECK-EXT: Running pass: {{.*}}::Bye ; CHECK-O-NEXT: Finished {{.*}}Module pass manager run. +; CHECK-O23SZ-NEXT: Clearing all analysis results for: foo ; CHECK-O-NEXT: Running pass: NameAnonGlobalPass ; CHECK-O-NEXT: Running pass: PrintModulePass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index b062afdfb49cd..d97fe18524db8 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -93,6 +93,7 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -152,7 +153,9 @@ ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Starting {{.*}}Function pass manager run ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass From 8dd4e3ceb804a58bcf25e6856fc6fde5e1995a66 Mon Sep 17 00:00:00 2001 From: Andrei Lebedev Date: Sat, 1 Aug 2020 15:54:11 -0700 Subject: [PATCH 103/600] Updated the -I option description. --- clang/docs/ClangCommandLineReference.rst | 4 ++-- clang/include/clang/Driver/Options.td | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 699a0be720368..8eb010eae2659 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -1014,9 +1014,9 @@ Include path management Flags controlling how ``#include``\s are resolved to files. -.. option:: -I, --include-directory , --include-directory= +.. option:: -I, --include-directory , --include-directory= -Add directory to the list of include files search paths. If there are multiple -I options, these directories are searched in the order they are given before the standard system directories are searched. If the same directory is in the SYSTEM include search paths, for example if also specified with -isystem, the -I option will be ignored +Add directory to include search path. If there are multiple -I options, these directories are searched in the order they are given before the standard system directories are searched. If the same directory is in the SYSTEM include search paths, for example if also specified with -isystem, the -I option will be ignored .. option:: -I-, --include-barrier diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 889035a0815e0..16051934c1e0b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -395,7 +395,12 @@ def I_ : Flag<["-"], "I-">, Group, "remove current directory from include path">; def I : JoinedOrSeparate<["-"], "I">, Group, Flags<[CC1Option,CC1AsOption]>, MetaVarName<"">, - HelpText<"Add directory to include search path">; + HelpText<"Add directory to include search path. If there are multiple -I " + "options, these directories are searched in the order they are " + "given before the standard system directories are searched. " + "If the same directory is in the SYSTEM include search paths, for " + "example if also specified with -isystem, the -I option will be " + "ignored">; def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group, MetaVarName<"">, HelpText<"Add directory to library search path">; def MD : Flag<["-"], "MD">, Group, @@ -1244,7 +1249,7 @@ def finline_functions : Flag<["-"], "finline-functions">, Group, def finline_hint_functions: Flag<["-"], "finline-hint-functions">, Group, Flags<[CC1Option]>, HelpText<"Inline functions which are (explicitly or implicitly) marked inline">; def finline : Flag<["-"], "finline">, Group; -def fglobal_isel : Flag<["-"], "fglobal-isel">, Group, +def fglobal_isel : Flag<["-"], "fglobal-isel">, Group, HelpText<"Enables the global instruction selector">; def fexperimental_isel : Flag<["-"], "fexperimental-isel">, Group, Alias; From dc3388b0209d17f7ee2f4dc3e4f072dc397dd75d Mon Sep 17 00:00:00 2001 From: Evgenii Stepanov Date: Fri, 31 Jul 2020 17:18:06 -0700 Subject: [PATCH 104/600] [msan] Respect no_huge_pages_for_shadow. Disable huge pages in the MSan shadow region when no_huge_pages_for_shadow == true (default). Differential Revision: https://reviews.llvm.org/D85061 --- compiler-rt/lib/msan/msan_linux.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/msan/msan_linux.cpp b/compiler-rt/lib/msan/msan_linux.cpp index d61e9dee30654..bfdae0b920c9b 100644 --- a/compiler-rt/lib/msan/msan_linux.cpp +++ b/compiler-rt/lib/msan/msan_linux.cpp @@ -142,7 +142,7 @@ bool InitShadow(bool init_origins) { if (map) { if (!CheckMemoryRangeAvailability(start, size)) return false; - if (!MmapFixedNoReserve(start, size, kMemoryLayout[i].name)) + if (!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name)) return false; if (common_flags()->use_madv_dontdump) DontDumpShadowMemory(start, size); From e281376e996e37fb6411363510e917b5b2c53c89 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 1 Aug 2020 18:19:14 -0700 Subject: [PATCH 105/600] [ELF] --wrap: set isUsedInRegularObj of __wrap_ only if it is defined Fixes PR46169 --- lld/ELF/Driver.cpp | 5 +++-- lld/test/ELF/wrap-dynamic-undef.s | 3 +-- lld/test/ELF/wrap-shlib-undefined.s | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index cdb7355968377..3e60ffdb1dc1d 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1782,7 +1782,7 @@ static std::vector addWrappedSymbols(opt::InputArgList &args) { continue; Symbol *real = addUndefined(saver.save("__real_" + name)); - Symbol *wrap = addUndefined(saver.save("__wrap_" + name)); + Symbol *wrap = addUnusedUndefined(saver.save("__wrap_" + name)); v.push_back({sym, real, wrap}); // We want to tell LTO not to inline symbols to be overwritten @@ -1792,7 +1792,8 @@ static std::vector addWrappedSymbols(opt::InputArgList &args) { // Tell LTO not to eliminate these symbols. sym->isUsedInRegularObj = true; - wrap->isUsedInRegularObj = true; + if (wrap->isDefined()) + wrap->isUsedInRegularObj = true; } return v; } diff --git a/lld/test/ELF/wrap-dynamic-undef.s b/lld/test/ELF/wrap-dynamic-undef.s index 2abb826411b39..af2871cfe6eae 100644 --- a/lld/test/ELF/wrap-dynamic-undef.s +++ b/lld/test/ELF/wrap-dynamic-undef.s @@ -8,10 +8,9 @@ # Test that the dynamic relocation uses foo. We used to produce a # relocation with __real_foo. -# CHECK: Symbol table '.dynsym' contains 3 entries: +# CHECK: Symbol table '.dynsym' contains 2 entries: # CHECK: NOTYPE LOCAL DEFAULT UND # CHECK-NEXT: NOTYPE GLOBAL DEFAULT UND foo -# CHECK-NEXT: NOTYPE GLOBAL DEFAULT UND __wrap_foo .global _start _start: diff --git a/lld/test/ELF/wrap-shlib-undefined.s b/lld/test/ELF/wrap-shlib-undefined.s index 8bbda963f0702..b0451b2865abe 100644 --- a/lld/test/ELF/wrap-shlib-undefined.s +++ b/lld/test/ELF/wrap-shlib-undefined.s @@ -12,12 +12,11 @@ # RUN: ld.lld %t.o %t.so --wrap=foo -o %t # RUN: llvm-readelf --dyn-syms %t | FileCheck %s -## FIXME GNU ld does not export __wrap_foo ## The reference __real_foo from %t.so causes foo to be exported. -# CHECK: Symbol table '.dynsym' contains 4 entries: +## __wrap_foo is not used, thus not exported. +# CHECK: Symbol table '.dynsym' contains 3 entries: # CHECK: NOTYPE LOCAL DEFAULT UND # CHECK-NEXT: NOTYPE GLOBAL DEFAULT UND bar -# CHECK-NEXT: NOTYPE GLOBAL DEFAULT UND __wrap_foo # CHECK-NEXT: NOTYPE GLOBAL DEFAULT 6 foo .globl _start, foo From e12a028ed3ed79fba1e1aa5212c7e22d83949192 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Fri, 31 Jul 2020 21:32:27 -0700 Subject: [PATCH 106/600] [llvm-jitlink] Support promotion of ODR weak symbols in -harness mode. This prevents weak symbols from being immediately dead-stripped when not directly referenced from the test harneess, enabling use of weak symbols from the code under test. --- .../X86/Inputs/MachO_test_harness_test.s | 7 +++ llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 43 ++++++++++++++++--- llvm/tools/llvm-jitlink/llvm-jitlink.h | 1 + 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_test_harness_test.s b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_test_harness_test.s index e0764a93fa435..3ca616a4a7294 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_test_harness_test.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_test_harness_test.s @@ -17,9 +17,16 @@ _public_func_to_interpose: _private_func_to_interpose: retq + .globl _used_weak + .weak_definition _used_weak + .p2align 4, 0x90 +_used_weak: + retq + .globl _public_func_to_test .p2align 4, 0x90 _public_func_to_test: + callq _used_weak jmp _public_func_to_interpose .p2align 4, 0x90 diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 6828944ced238..22d29c12e1923 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -188,10 +188,28 @@ static Error applyHarnessPromotions(Session &S, LinkGraph &G) { if (!Sym->hasName()) continue; - if (S.HarnessExternals.count(Sym->getName())) { + if (Sym->getLinkage() == Linkage::Weak) { + if (!S.CanonicalWeakDefs.count(Sym->getName()) || + S.CanonicalWeakDefs[Sym->getName()] != G.getName()) { + LLVM_DEBUG({ + dbgs() << " Externalizing weak symbol " << Sym->getName() << "\n"; + }); + DefinitionsToRemove.push_back(Sym); + } else { + LLVM_DEBUG({ + dbgs() << " Making weak symbol " << Sym->getName() << " strong\n"; + }); + if (S.HarnessExternals.count(Sym->getName())) + Sym->setScope(Scope::Default); + else + Sym->setScope(Scope::Hidden); + Sym->setLinkage(Linkage::Strong); + } + } else if (S.HarnessExternals.count(Sym->getName())) { LLVM_DEBUG(dbgs() << " Promoting " << Sym->getName() << "\n"); Sym->setScope(Scope::Default); Sym->setLive(true); + continue; } else if (S.HarnessDefinitions.count(Sym->getName())) { LLVM_DEBUG(dbgs() << " Externalizing " << Sym->getName() << "\n"); DefinitionsToRemove.push_back(Sym); @@ -504,10 +522,6 @@ Error LLVMJITLinkObjectLinkingLayer::add(JITDylib &JD, if (!Name) return Name.takeError(); - // Skip symbols that aren't in the HarnessExternals set. - if (!S.HarnessExternals.count(*Name)) - continue; - // Skip symbols that have type SF_File. if (auto SymType = Sym.getType()) { if (*SymType == object::SymbolRef::ST_File) @@ -515,13 +529,28 @@ Error LLVMJITLinkObjectLinkingLayer::add(JITDylib &JD, } else return SymType.takeError(); - auto InternedName = S.ES.intern(*Name); auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym); if (!SymFlags) return SymFlags.takeError(); - *SymFlags |= JITSymbolFlags::Exported; + if (SymFlags->isWeak()) { + // If this is a weak symbol that's not defined in the harness then we + // need to either mark it as strong (if this is the first definition + // that we've seen) or discard it. + if (S.HarnessDefinitions.count(*Name) || S.CanonicalWeakDefs.count(*Name)) + continue; + S.CanonicalWeakDefs[*Name] = O->getBufferIdentifier(); + *SymFlags &= ~JITSymbolFlags::Weak; + if (!S.HarnessExternals.count(*Name)) + *SymFlags &= ~JITSymbolFlags::Exported; + } else if (S.HarnessExternals.count(*Name)) { + *SymFlags |= JITSymbolFlags::Exported; + } else { + // Skip symbols that aren't in the HarnessExternals set. + continue; + } + auto InternedName = S.ES.intern(*Name); SymbolFlags[InternedName] = std::move(*SymFlags); } diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.h b/llvm/tools/llvm-jitlink/llvm-jitlink.h index c16aed9f2b504..227d3a9225c83 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.h +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.h @@ -86,6 +86,7 @@ struct Session { StringSet<> HarnessFiles; StringSet<> HarnessExternals; StringSet<> HarnessDefinitions; + DenseMap CanonicalWeakDefs; private: Session(Triple TT, Error &Err); From 0f5b70769d15d8cc728dcba353a33fb459450381 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sat, 1 Aug 2020 17:44:34 -0700 Subject: [PATCH 107/600] [llvm-jitlink] Add -phony-externals option to suppress unresolved externals. The -phony-externals option adds a generator which explicitly defines any otherwise unresolved externals as null. This transforms link-time unresolved-symbol errors into potential runtime null pointer accesses (if an unresolved external is actually accessed during execution). This option can be useful in -harness mode to avoid having to mock a large number of symbols that are not reachable at runtime (e.g. unused methods referenced by a class vtable). --- .../JITLink/JITLinkGeneric.cpp | 6 ---- .../X86/Inputs/MachO_test_harness_test.s | 1 + .../JITLink/X86/MachO_test_harness_harness.s | 4 ++- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 30 +++++++++++++++++-- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index e0901186347f5..7c60d51677188 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -337,12 +337,6 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { dbgs() << " " << Sym->getName() << ": " << formatv("{0:x16}", Sym->getAddress()) << "\n"; }); - assert(llvm::all_of(G->external_symbols(), - [](Symbol *Sym) { - return Sym->getAddress() != 0 || - Sym->getLinkage() == Linkage::Weak; - }) && - "All strong external symbols should have been resolved by now"); } void JITLinkerBase::copyBlockContentToWorkingMemory( diff --git a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_test_harness_test.s b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_test_harness_test.s index 3ca616a4a7294..337f467d09677 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_test_harness_test.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_test_harness_test.s @@ -27,6 +27,7 @@ _used_weak: .p2align 4, 0x90 _public_func_to_test: callq _used_weak + callq _used_unresolved_external jmp _public_func_to_interpose .p2align 4, 0x90 diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_test_harness_harness.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_test_harness_harness.s index 7fdddf2a64ca7..ee510387b35f2 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_test_harness_harness.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_test_harness_harness.s @@ -3,7 +3,9 @@ # RUN: -o %t/file_to_test.o %S/Inputs/MachO_test_harness_test.s # RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj \ # RUN: -o %t/test_harness.o %s -# RUN: llvm-jitlink -noexec -check %s %t/file_to_test.o \ +# RUN: not llvm-jitlink -noexec -check %s %t/file_to_test.o \ +# RUN: -harness %t/test_harness.o +# RUN: llvm-jitlink -noexec -phony-externals -check %s %t/file_to_test.o \ # RUN: -harness %t/test_harness.o # # Check that we diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 22d29c12e1923..798087d8cae7b 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -132,6 +132,11 @@ static cl::opt ShowRelocatedSectionContents( cl::desc("show section contents after fixups have been applied"), cl::init(false)); +static cl::opt PhonyExternals( + "phony-externals", + cl::desc("resolve all otherwise unresolved externals to null"), + cl::init(false)); + ExitOnError ExitOnErr; namespace llvm { @@ -179,9 +184,9 @@ static Error applyHarnessPromotions(Session &S, LinkGraph &G) { LLVM_DEBUG(dbgs() << "Appling promotions to graph " << G.getName() << "\n"); - // If it isn't then promote any symbols referenced by the harness to default - // scope, remove all symbols that clash with harness definitions, and demote - // all others. + // If this graph is part of the test then promote any symbols referenced by + // the harness to default scope, remove all symbols that clash with harness + // definitions, demote all other definitions. std::vector DefinitionsToRemove; for (auto *Sym : G.defined_symbols()) { @@ -560,6 +565,18 @@ Error LLVMJITLinkObjectLinkingLayer::add(JITDylib &JD, return JD.define(std::move(MU)); } +class PhonyExternalsGenerator : public JITDylib::DefinitionGenerator { +public: + Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &LookupSet) override { + SymbolMap PhonySymbols; + for (auto &KV : LookupSet) + PhonySymbols[KV.first] = JITEvaluatedSymbol(0, JITSymbolFlags::Exported); + return JD.define(absoluteSymbols(std::move(PhonySymbols))); + } +}; + Expected> Session::Create(Triple TT) { Error Err = Error::success(); std::unique_ptr S(new Session(std::move(TT), Err)); @@ -813,6 +830,10 @@ Error loadDylibs() { return Error::success(); } +void addPhonyExternalsGenerator(Session &S) { + S.MainJD->addGenerator(std::make_unique()); +} + Error loadObjects(Session &S) { std::map IdxToJLD; @@ -1039,6 +1060,9 @@ int main(int argc, char *argv[]) { ExitOnErr(loadProcessSymbols(*S)); ExitOnErr(loadDylibs()); + if (PhonyExternals) + addPhonyExternalsGenerator(*S); + { TimeRegion TR(Timers ? &Timers->LoadObjectsTimer : nullptr); ExitOnErr(loadObjects(*S)); From 60434989e5cd718e0f84c7601f648aecd1e8e1eb Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 1 Aug 2020 21:49:38 -0700 Subject: [PATCH 108/600] Use llvm::is_contained where appropriate (NFC) Use llvm::is_contained where appropriate (NFC) Reviewed By: kazu Differential Revision: https://reviews.llvm.org/D85083 --- llvm/include/llvm/IR/PassManager.h | 5 +- llvm/lib/Analysis/BranchProbabilityInfo.cpp | 3 +- llvm/lib/Analysis/MemorySSAUpdater.cpp | 3 +- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 3 +- llvm/lib/Support/FileCheck.cpp | 5 +- llvm/lib/Support/Unix/Signals.inc | 3 +- .../AArch64/AsmParser/AArch64AsmParser.cpp | 8 +--- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 4 +- .../Target/PowerPC/PPCLowerMASSVEntries.cpp | 4 +- .../lib/Transforms/Scalar/LoopInterchange.cpp | 3 +- llvm/tools/dsymutil/dsymutil.cpp | 5 +- llvm/unittests/Support/TargetParserTest.cpp | 46 ++++++++----------- llvm/utils/TableGen/CodeGenSchedule.h | 4 +- 13 files changed, 39 insertions(+), 57 deletions(-) diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index c410818c470ca..f16696d7c2e38 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -38,6 +38,7 @@ #define LLVM_IR_PASSMANAGER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" @@ -1137,9 +1138,7 @@ class OuterAnalysisManagerProxy // analyses that all trigger invalidation on the same outer analysis, // this entire system should be changed to some other deterministic // data structure such as a `SetVector` of a pair of pointers. - auto InvalidatedIt = std::find(InvalidatedIDList.begin(), - InvalidatedIDList.end(), InvalidatedID); - if (InvalidatedIt == InvalidatedIDList.end()) + if (!llvm::is_contained(InvalidatedIDList, InvalidatedID)) InvalidatedIDList.push_back(InvalidatedID); } diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 7e34f3b6c869b..7b24fe9d56c25 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -692,8 +692,7 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, // we can constant-evaluate the compare to see if it makes the branch be // taken or not. Constant *CmpLHSConst = dyn_cast(V); - if (!CmpLHSConst || - std::find(succ_begin(BB), succ_end(BB), B) == succ_end(BB)) + if (!CmpLHSConst || !llvm::is_contained(successors(BB), B)) continue; // First collapse InstChain for (Instruction *I : llvm::reverse(InstChain)) { diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index 21cbdcd67147a..81582413a6042 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -319,8 +319,7 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { bool DefBeforeSameBlock = false; if (DefBefore->getBlock() == MD->getBlock() && !(isa(DefBefore) && - std::find(InsertedPHIs.begin(), InsertedPHIs.end(), DefBefore) != - InsertedPHIs.end())) + llvm::is_contained(InsertedPHIs, DefBefore))) DefBeforeSameBlock = true; // There is a def before us, which means we can replace any store/phi uses diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 5d52e7fb80920..b388e43447835 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -494,8 +494,7 @@ void CodeViewDebug::recordLocalVariable(LocalVariable &&Var, static void addLocIfNotPresent(SmallVectorImpl &Locs, const DILocation *Loc) { - auto B = Locs.begin(), E = Locs.end(); - if (std::find(B, E, Loc) == E) + if (!llvm::is_contained(Locs, Loc)) Locs.push_back(Loc); } diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp index 29a5a1345a58a..137eea0a65a64 100644 --- a/llvm/lib/Support/FileCheck.cpp +++ b/llvm/lib/Support/FileCheck.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/FileCheck.h" #include "FileCheckImpl.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CheckedArithmetic.h" @@ -1578,9 +1579,7 @@ FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) { StringRef Rest = Buffer.drop_front(Prefix.size() + 1); // Check for comment. - if (Req.CommentPrefixes.end() != std::find(Req.CommentPrefixes.begin(), - Req.CommentPrefixes.end(), - Prefix)) { + if (llvm::is_contained(Req.CommentPrefixes, Prefix)) { if (NextChar == ':') return {Check::CheckComment, Rest}; // Ignore a comment prefix if it has a suffix like "-NOT". diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc index f68374d29f023..ce1fccf0b4271 100644 --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -382,8 +382,7 @@ static RETSIGTYPE SignalHandler(int Sig) { OneShotPipeSignalFunction.exchange(nullptr)) return OldOneShotPipeFunction(); - if (std::find(std::begin(IntSigs), std::end(IntSigs), Sig) - != std::end(IntSigs)) { + if (llvm::is_contained(IntSigs, Sig)) { if (auto OldInterruptFunction = InterruptFunction.exchange(nullptr)) return OldInterruptFunction(); diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 0ac09c4f96f04..beae2b059bec5 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -5089,12 +5089,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { static void ExpandCryptoAEK(AArch64::ArchKind ArchKind, SmallVector &RequestedExtensions) { - const bool NoCrypto = - (std::find(RequestedExtensions.begin(), RequestedExtensions.end(), - "nocrypto") != std::end(RequestedExtensions)); - const bool Crypto = - (std::find(RequestedExtensions.begin(), RequestedExtensions.end(), - "crypto") != std::end(RequestedExtensions)); + const bool NoCrypto = llvm::is_contained(RequestedExtensions, "nocrypto"); + const bool Crypto = llvm::is_contained(RequestedExtensions, "crypto"); if (!NoCrypto && Crypto) { switch (ArchKind) { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index f45cc06e0a0a3..916d515af5dac 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -19,6 +19,7 @@ #include "NVPTXTargetObjectFile.h" #include "NVPTXUtilities.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Analysis.h" @@ -2438,8 +2439,7 @@ static bool isImageOrSamplerVal(const Value *arg, const Module *context) { if (!STy || STy->isLiteral()) return false; - return std::find(std::begin(specialTypes), std::end(specialTypes), - STy->getName()) != std::end(specialTypes); + return llvm::is_contained(specialTypes, STy->getName()); } SDValue NVPTXTargetLowering::LowerFormalArguments( diff --git a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp index 2b0e604e0ccde..a61e1f83705e6 100644 --- a/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp +++ b/llvm/lib/Target/PowerPC/PPCLowerMASSVEntries.cpp @@ -16,6 +16,7 @@ #include "PPC.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Instructions.h" @@ -64,8 +65,7 @@ class PPCLowerMASSVEntries : public ModulePass { /// Checks if the specified function name represents an entry in the MASSV /// library. bool PPCLowerMASSVEntries::isMASSVFunc(StringRef Name) { - auto Iter = std::find(std::begin(MASSVFuncs), std::end(MASSVFuncs), Name); - return Iter != std::end(MASSVFuncs); + return llvm::is_contained(MASSVFuncs, Name); } // FIXME: diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 7787c0bccd4ce..3cfe1b595571e 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1521,8 +1521,7 @@ bool LoopInterchangeTransform::adjustLoopBranches() { InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false); // The outer loop header might or might not branch to the outer latch. // We are guaranteed to branch to the inner loop preheader. - if (std::find(succ_begin(OuterLoopHeaderBI), succ_end(OuterLoopHeaderBI), - OuterLoopLatch) != succ_end(OuterLoopHeaderBI)) + if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates, /*MustUpdateOnce=*/false); updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp index 3a32acbec06f9..a7d5e7afca039 100644 --- a/llvm/tools/dsymutil/dsymutil.cpp +++ b/llvm/tools/dsymutil/dsymutil.cpp @@ -17,6 +17,7 @@ #include "LinkUtils.h" #include "MachOUtils.h" #include "Reproducer.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -157,9 +158,7 @@ static Error verifyOptions(const DsymutilOptions &Options) { errc::invalid_argument); } - if (Options.LinkOpts.Update && - std::find(Options.InputFiles.begin(), Options.InputFiles.end(), "-") != - Options.InputFiles.end()) { + if (Options.LinkOpts.Update && llvm::is_contained(Options.InputFiles, "-")) { // FIXME: We cannot use stdin for an update because stdin will be // consumed by the BinaryHolder during the debugmap parsing, and // then we will want to consume it again in DwarfLinker. If we diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 9f923e1358dde..f9392751de4e4 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -610,16 +610,13 @@ TEST(TargetParserTest, ARMExtensionFeatures) { // test +extension Features.clear(); ARM::getExtensionFeatures(E.first, Features); - auto Found = - std::find(std::begin(Features), std::end(Features), E.second.at(0)); - EXPECT_TRUE(Found != std::end(Features)); + EXPECT_TRUE(llvm::is_contained(Features, E.second.at(0))); EXPECT_TRUE(Extensions.size() == Features.size()); // test -extension Features.clear(); ARM::getExtensionFeatures(~E.first, Features); - Found = std::find(std::begin(Features), std::end(Features), E.second.at(1)); - EXPECT_TRUE(Found != std::end(Features)); + EXPECT_TRUE(llvm::is_contained(Features, E.second.at(1))); EXPECT_TRUE(Extensions.size() == Features.size()); } } @@ -1227,27 +1224,24 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::getExtensionFeatures(ExtVal, Features); EXPECT_TRUE(Extensions.size() == Features.size()); - auto B = std::begin(Features); - auto E = std::end(Features); - - EXPECT_TRUE(std::find(B, E, "+crc") != E); - EXPECT_TRUE(std::find(B, E, "+crypto") != E); - EXPECT_TRUE(std::find(B, E, "+fp-armv8") != E); - EXPECT_TRUE(std::find(B, E, "+neon") != E); - EXPECT_TRUE(std::find(B, E, "+fullfp16") != E); - EXPECT_TRUE(std::find(B, E, "+spe") != E); - EXPECT_TRUE(std::find(B, E, "+ras") != E); - EXPECT_TRUE(std::find(B, E, "+lse") != E); - EXPECT_TRUE(std::find(B, E, "+rdm") != E); - EXPECT_TRUE(std::find(B, E, "+dotprod") != E); - EXPECT_TRUE(std::find(B, E, "+rcpc") != E); - EXPECT_TRUE(std::find(B, E, "+fp16fml") != E); - EXPECT_TRUE(std::find(B, E, "+sve") != E); - EXPECT_TRUE(std::find(B, E, "+sve2") != E); - EXPECT_TRUE(std::find(B, E, "+sve2-aes") != E); - EXPECT_TRUE(std::find(B, E, "+sve2-sm4") != E); - EXPECT_TRUE(std::find(B, E, "+sve2-sha3") != E); - EXPECT_TRUE(std::find(B, E, "+sve2-bitperm") != E); + EXPECT_TRUE(llvm::is_contained(Features, "+crc")); + EXPECT_TRUE(llvm::is_contained(Features, "+crypto")); + EXPECT_TRUE(llvm::is_contained(Features, "+fp-armv8")); + EXPECT_TRUE(llvm::is_contained(Features, "+neon")); + EXPECT_TRUE(llvm::is_contained(Features, "+fullfp16")); + EXPECT_TRUE(llvm::is_contained(Features, "+spe")); + EXPECT_TRUE(llvm::is_contained(Features, "+ras")); + EXPECT_TRUE(llvm::is_contained(Features, "+lse")); + EXPECT_TRUE(llvm::is_contained(Features, "+rdm")); + EXPECT_TRUE(llvm::is_contained(Features, "+dotprod")); + EXPECT_TRUE(llvm::is_contained(Features, "+rcpc")); + EXPECT_TRUE(llvm::is_contained(Features, "+fp16fml")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve2")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve2-aes")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sm4")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm")); } TEST(TargetParserTest, AArch64ArchFeatures) { diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h index c487d142d46c4..b60a1b4fa242c 100644 --- a/llvm/utils/TableGen/CodeGenSchedule.h +++ b/llvm/utils/TableGen/CodeGenSchedule.h @@ -16,6 +16,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Record.h" @@ -358,8 +359,7 @@ class OpcodeGroup { OpcodeGroup(OpcodeGroup &&Other) = default; void addOpcode(const Record *Opcode) { - assert(std::find(Opcodes.begin(), Opcodes.end(), Opcode) == Opcodes.end() && - "Opcode already in set!"); + assert(!llvm::is_contained(Opcodes, Opcode) && "Opcode already in set!"); Opcodes.push_back(Opcode); } From 20797989ea190f2ef22d13c5a7a0535fe9afa58b Mon Sep 17 00:00:00 2001 From: AK <1894981+hiraditya@users.noreply.github.com> Date: Sat, 1 Aug 2020 19:15:05 -0700 Subject: [PATCH 109/600] Outline non returning functions unless a longjmp __assert_fail, abort, exit etc. are cold. TODO: outline throw Authored by: rjf (Ruijie Fang) Reviewed by: hiraditya,tejohnson,fhahn Differential Revision: https://reviews.llvm.org/D69257 --- llvm/lib/Transforms/IPO/HotColdSplitting.cpp | 18 ++- .../HotColdSplit/longjmp-nosplit.ll | 97 +++++++++++++ .../Transforms/HotColdSplit/longjmp-split.ll | 132 +++++++++++++++++ .../Transforms/HotColdSplit/sjlj-nosplit.ll | 103 +++++++++++++ .../Transforms/HotColdSplit/sjlj-split.ll | 136 ++++++++++++++++++ .../HotColdSplit/split-assert-fail.ll | 47 ++++++ 6 files changed, 528 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/HotColdSplit/longjmp-nosplit.ll create mode 100644 llvm/test/Transforms/HotColdSplit/longjmp-split.ll create mode 100644 llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll create mode 100644 llvm/test/Transforms/HotColdSplit/sjlj-split.ll create mode 100644 llvm/test/Transforms/HotColdSplit/split-assert-fail.ll diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index d0bd0166534a7..cdfd9879c030a 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -101,7 +101,8 @@ bool blockEndsInUnreachable(const BasicBlock &BB) { return !(isa(I) || isa(I)); } -bool unlikelyExecuted(BasicBlock &BB) { +bool unlikelyExecuted(BasicBlock &BB, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { // Exception handling blocks are unlikely executed. if (BB.isEHPad() || isa(BB.getTerminator())) return true; @@ -114,12 +115,19 @@ bool unlikelyExecuted(BasicBlock &BB) { return true; // The block is cold if it has an unreachable terminator, unless it's - // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp). + // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp); + // in the case of a longjmp, if the block is cold according to + // profile information, we mark it as unlikely to be executed as well. if (blockEndsInUnreachable(BB)) { if (auto *CI = dyn_cast_or_null(BB.getTerminator()->getPrevNode())) - if (CI->hasFnAttr(Attribute::NoReturn)) - return false; + if (CI->hasFnAttr(Attribute::NoReturn)) { + if (IntrinsicInst *II = dyn_cast(CI)) + return (II->getIntrinsicID() != Intrinsic::eh_sjlj_longjmp) || + (BFI && PSI->isColdBlock(&BB, BFI)); + return !CI->getCalledFunction()->getName().contains("longjmp") || + (BFI && PSI->isColdBlock(&BB, BFI)); + } return true; } @@ -575,7 +583,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { continue; bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) || - (EnableStaticAnalyis && unlikelyExecuted(*BB)); + (EnableStaticAnalyis && unlikelyExecuted(*BB, PSI, BFI)); if (!Cold) continue; diff --git a/llvm/test/Transforms/HotColdSplit/longjmp-nosplit.ll b/llvm/test/Transforms/HotColdSplit/longjmp-nosplit.ll new file mode 100644 index 0000000000000..d207e8dae3850 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/longjmp-nosplit.ll @@ -0,0 +1,97 @@ +; RUN: opt -hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t } +%struct.__sigset_t = type { [16 x i64] } + +@c = dso_local global i32 1, align 4 +@buf = dso_local global [20 x i8*] zeroinitializer, align 16 + +; CHECK-LABEL: @f +; CHECK-NOT: f.cold.1 +define dso_local void @f() #0 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %0 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + ret void + +if.else: ; preds = %entry + %1 = load i32, i32* @c, align 4 + %inc = add i32 %1, 1 + store i32 %inc, i32* @c, align 4 + %2 = load i32, i32* @c, align 4 + %inc1 = add i32 %2, 1 + store i32 %inc1, i32* @c, align 4 + %3 = load i32, i32* @c, align 4 + %inc2 = add i32 %3, 1 + store i32 %inc2, i32* @c, align 4 + %4 = load i32, i32* @c, align 4 + %inc3 = add i32 %4, 1 + store i32 %inc3, i32* @c, align 4 + %5 = load i32, i32* @c, align 4 + %dec = add i32 %5, -1 + store i32 %dec, i32* @c, align 4 + %6 = load i32, i32* @c, align 4 + %dec4 = add i32 %6, -1 + store i32 %dec4, i32* @c, align 4 + %7 = load i32, i32* @c, align 4 + %inc5 = add i32 %7, 1 + store i32 %inc5, i32* @c, align 4 + %8 = load i32, i32* @c, align 4 + %inc6 = add i32 %8, 1 + store i32 %inc6, i32* @c, align 4 + %9 = load i32, i32* @c, align 4 + %add = add i32 %9, 1 + store i32 %add, i32* %i, align 4 + %10 = load i32, i32* %i, align 4 + %sub = sub i32 %10, 1 + store i32 %sub, i32* %j, align 4 + %11 = load i32, i32* %i, align 4 + %add7 = add i32 %11, 2 + store i32 %add7, i32* %k, align 4 + call void @longjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*), i32 1) #3 + unreachable +} + +declare dso_local void @longjmp(%struct.__jmp_buf_tag*, i32) #1 + +; CHECK-LABEL: @main +; CHECK-NOT: main.cold.1 +define dso_local i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %call = call i32 @_setjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*)) #4 + %tobool = icmp ne i32 %call, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + call void @f() + store i32 0, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %0 = load i32, i32* %retval, align 4 + ret i32 %0 +} + +declare dso_local i32 @_setjmp(%struct.__jmp_buf_tag*) #2 + +attributes #0 = { nounwind uwtable } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind returns_twice } +attributes #3 = { noreturn nounwind } +attributes #4 = { nounwind returns_twice } diff --git a/llvm/test/Transforms/HotColdSplit/longjmp-split.ll b/llvm/test/Transforms/HotColdSplit/longjmp-split.ll new file mode 100644 index 0000000000000..905f146a79e18 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/longjmp-split.ll @@ -0,0 +1,132 @@ +; RUN: opt -profile-summary-cold-count=0 -hotcoldsplit -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t } +%struct.__sigset_t = type { [16 x i64] } + +@c = dso_local global i32 1, align 4 +@buf = dso_local global [20 x i8*] zeroinitializer, align 16 + +; CHECK-LABEL: @f +; CHECK: f.cold.1 +define dso_local void @f() #0 !prof !31 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %0 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.else, !prof !32 + +if.then: ; preds = %entry + ret void + +if.else: ; preds = %entry + %1 = load i32, i32* @c, align 4 + %inc = add i32 %1, 1 + store i32 %inc, i32* @c, align 4 + %2 = load i32, i32* @c, align 4 + %inc1 = add i32 %2, 1 + store i32 %inc1, i32* @c, align 4 + %3 = load i32, i32* @c, align 4 + %inc2 = add i32 %3, 1 + store i32 %inc2, i32* @c, align 4 + %4 = load i32, i32* @c, align 4 + %inc3 = add i32 %4, 1 + store i32 %inc3, i32* @c, align 4 + %5 = load i32, i32* @c, align 4 + %dec = add i32 %5, -1 + store i32 %dec, i32* @c, align 4 + %6 = load i32, i32* @c, align 4 + %dec4 = add i32 %6, -1 + store i32 %dec4, i32* @c, align 4 + %7 = load i32, i32* @c, align 4 + %inc5 = add i32 %7, 1 + store i32 %inc5, i32* @c, align 4 + %8 = load i32, i32* @c, align 4 + %inc6 = add i32 %8, 1 + store i32 %inc6, i32* @c, align 4 + %9 = load i32, i32* @c, align 4 + %add = add i32 %9, 1 + store i32 %add, i32* %i, align 4 + %10 = load i32, i32* %i, align 4 + %sub = sub i32 %10, 1 + store i32 %sub, i32* %j, align 4 + %11 = load i32, i32* %i, align 4 + %add7 = add i32 %11, 2 + store i32 %add7, i32* %k, align 4 + call void @longjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*), i32 1) #3 + unreachable +} + +declare dso_local void @longjmp(%struct.__jmp_buf_tag*, i32) #1 + +define dso_local i32 @main() #0 !prof !31 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %call = call i32 @_setjmp(%struct.__jmp_buf_tag* bitcast ([20 x i8*]* @buf to %struct.__jmp_buf_tag*)) #4 + %tobool = icmp ne i32 %call, 0 + br i1 %tobool, label %if.then, label %if.end, !prof !33 + +if.then: ; preds = %entry + store i32 1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + call void @f() + store i32 0, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %0 = load i32, i32* %retval, align 4 + ret i32 %0 +} + +declare dso_local i32 @_setjmp(%struct.__jmp_buf_tag*) #2 + +attributes #0 = { inlinehint nounwind uwtable } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind returns_twice } +attributes #3 = { noreturn nounwind } +attributes #4 = { nounwind returns_twice } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 2} +!5 = !{!"MaxCount", i64 1} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1} +!8 = !{!"NumCounts", i64 4} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29} +!14 = !{i32 10000, i64 0, i32 0} +!15 = !{i32 100000, i64 0, i32 0} +!16 = !{i32 200000, i64 0, i32 0} +!17 = !{i32 300000, i64 0, i32 0} +!18 = !{i32 400000, i64 0, i32 0} +!19 = !{i32 500000, i64 1, i32 2} +!20 = !{i32 600000, i64 1, i32 2} +!21 = !{i32 700000, i64 1, i32 2} +!22 = !{i32 800000, i64 1, i32 2} +!23 = !{i32 900000, i64 1, i32 2} +!24 = !{i32 950000, i64 1, i32 2} +!25 = !{i32 990000, i64 1, i32 2} +!26 = !{i32 999000, i64 1, i32 2} +!27 = !{i32 999900, i64 1, i32 2} +!28 = !{i32 999990, i64 1, i32 2} +!29 = !{i32 999999, i64 1, i32 2} +!31 = !{!"function_entry_count", i64 1} +!32 = !{!"branch_weights", i32 1, i32 0} +!33 = !{!"branch_weights", i32 0, i32 1} diff --git a/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll b/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll new file mode 100644 index 0000000000000..f02a1b376ecb2 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll @@ -0,0 +1,103 @@ +; RUN: opt -hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@c = dso_local global i32 1, align 4 +@buf = dso_local global [20 x i8*] zeroinitializer, align 16 + +; CHECK-LABEL: @f +; CHECK-NOT: f.cold.1 +define dso_local void @f() #0 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %0 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + ret void + +if.else: ; preds = %entry + %1 = load i32, i32* @c, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* @c, align 4 + %2 = load i32, i32* @c, align 4 + %inc1 = add nsw i32 %2, 1 + store i32 %inc1, i32* @c, align 4 + %3 = load i32, i32* @c, align 4 + %inc2 = add nsw i32 %3, 1 + store i32 %inc2, i32* @c, align 4 + %4 = load i32, i32* @c, align 4 + %inc3 = add nsw i32 %4, 1 + store i32 %inc3, i32* @c, align 4 + %5 = load i32, i32* @c, align 4 + %dec = add nsw i32 %5, -1 + store i32 %dec, i32* @c, align 4 + %6 = load i32, i32* @c, align 4 + %dec4 = add nsw i32 %6, -1 + store i32 %dec4, i32* @c, align 4 + %7 = load i32, i32* @c, align 4 + %inc5 = add nsw i32 %7, 1 + store i32 %inc5, i32* @c, align 4 + %8 = load i32, i32* @c, align 4 + %inc6 = add nsw i32 %8, 1 + store i32 %inc6, i32* @c, align 4 + %9 = load i32, i32* @c, align 4 + %add = add nsw i32 %9, 1 + store i32 %add, i32* %i, align 4 + %10 = load i32, i32* %i, align 4 + %sub = sub nsw i32 %10, 1 + store i32 %sub, i32* %j, align 4 + %11 = load i32, i32* %i, align 4 + %add7 = add nsw i32 %11, 2 + store i32 %add7, i32* %k, align 4 + call void @llvm.eh.sjlj.longjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + unreachable +} + +declare void @llvm.eh.sjlj.longjmp(i8*) #1 + +; CHECK-LABEL: @main +; CHECK-NOT: main.cold.1 +define dso_local i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %0 = call i8* @llvm.frameaddress.p0i8(i32 0) + store i8* %0, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 0), align 16 + %1 = call i8* @llvm.stacksave() + store i8* %1, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 2), align 16 + %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + %tobool = icmp ne i32 %2, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + call void @f() + store i32 0, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %3 = load i32, i32* %retval, align 4 + ret i32 %3 +} + +declare i8* @llvm.frameaddress.p0i8(i32 immarg) #2 + +declare i8* @llvm.stacksave() #3 + +declare i32 @llvm.eh.sjlj.setjmp(i8*) #3 + +attributes #0 = { nounwind uwtable } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + + diff --git a/llvm/test/Transforms/HotColdSplit/sjlj-split.ll b/llvm/test/Transforms/HotColdSplit/sjlj-split.ll new file mode 100644 index 0000000000000..3a12677ecf519 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/sjlj-split.ll @@ -0,0 +1,136 @@ +; RUN: opt -profile-summary-cold-count=0 -hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@c = dso_local global i32 1, align 4 +@buf = dso_local global [20 x i8*] zeroinitializer, align 16 + +; CHECK-LABEL: @f +; CHECK: f.cold.1 +define dso_local void @f() #0 !prof !31 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %0 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.else, !prof !32 + +if.then: ; preds = %entry + ret void + +if.else: ; preds = %entry + %1 = load i32, i32* @c, align 4 + %inc = add i32 %1, 1 + store i32 %inc, i32* @c, align 4 + %2 = load i32, i32* @c, align 4 + %inc1 = add i32 %2, 1 + store i32 %inc1, i32* @c, align 4 + %3 = load i32, i32* @c, align 4 + %inc2 = add i32 %3, 1 + store i32 %inc2, i32* @c, align 4 + %4 = load i32, i32* @c, align 4 + %inc3 = add i32 %4, 1 + store i32 %inc3, i32* @c, align 4 + %5 = load i32, i32* @c, align 4 + %dec = add i32 %5, -1 + store i32 %dec, i32* @c, align 4 + %6 = load i32, i32* @c, align 4 + %dec4 = add i32 %6, -1 + store i32 %dec4, i32* @c, align 4 + %7 = load i32, i32* @c, align 4 + %inc5 = add i32 %7, 1 + store i32 %inc5, i32* @c, align 4 + %8 = load i32, i32* @c, align 4 + %inc6 = add i32 %8, 1 + store i32 %inc6, i32* @c, align 4 + %9 = load i32, i32* @c, align 4 + %add = add i32 %9, 1 + store i32 %add, i32* %i, align 4 + %10 = load i32, i32* %i, align 4 + %sub = sub i32 %10, 1 + store i32 %sub, i32* %j, align 4 + %11 = load i32, i32* %i, align 4 + %add7 = add i32 %11, 2 + store i32 %add7, i32* %k, align 4 + call void @llvm.eh.sjlj.longjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + unreachable +} + +declare void @llvm.eh.sjlj.longjmp(i8*) #1 + +define dso_local i32 @main() #0 !prof !31 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %0 = call i8* @llvm.frameaddress.p0i8(i32 0) + store i8* %0, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 0), align 16 + %1 = call i8* @llvm.stacksave() + store i8* %1, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 2), align 16 + %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + %tobool = icmp ne i32 %2, 0 + br i1 %tobool, label %if.then, label %if.end, !prof !33 + +if.then: ; preds = %entry + store i32 1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + call void @f() + store i32 0, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %3 = load i32, i32* %retval, align 4 + ret i32 %3 +} + +declare i8* @llvm.frameaddress.p0i8(i32 immarg) #2 + +declare i8* @llvm.stacksave() #3 + +declare i32 @llvm.eh.sjlj.setjmp(i8*) #3 + +attributes #0 = { inlinehint nounwind uwtable } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 2} +!5 = !{!"MaxCount", i64 1} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1} +!8 = !{!"NumCounts", i64 4} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29} +!14 = !{i32 10000, i64 0, i32 0} +!15 = !{i32 100000, i64 0, i32 0} +!16 = !{i32 200000, i64 0, i32 0} +!17 = !{i32 300000, i64 0, i32 0} +!18 = !{i32 400000, i64 0, i32 0} +!19 = !{i32 500000, i64 1, i32 2} +!20 = !{i32 600000, i64 1, i32 2} +!21 = !{i32 700000, i64 1, i32 2} +!22 = !{i32 800000, i64 1, i32 2} +!23 = !{i32 900000, i64 1, i32 2} +!24 = !{i32 950000, i64 1, i32 2} +!25 = !{i32 990000, i64 1, i32 2} +!26 = !{i32 999000, i64 1, i32 2} +!27 = !{i32 999900, i64 1, i32 2} +!28 = !{i32 999990, i64 1, i32 2} +!29 = !{i32 999999, i64 1, i32 2} +!31 = !{!"function_entry_count", i64 1} +!32 = !{!"branch_weights", i32 1, i32 0} +!33 = !{!"branch_weights", i32 0, i32 1} + diff --git a/llvm/test/Transforms/HotColdSplit/split-assert-fail.ll b/llvm/test/Transforms/HotColdSplit/split-assert-fail.ll new file mode 100644 index 0000000000000..ea5f4b9114917 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/split-assert-fail.ll @@ -0,0 +1,47 @@ +; REQUIRES: asserts +; RUN: opt -S -instsimplify -hotcoldsplit -debug < %s 2>&1 | FileCheck %s +; RUN: opt -instcombine -hotcoldsplit -instsimplify %s -o /dev/null + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [2 x i8] c"0\00", align 1 +@.str.1 = private unnamed_addr constant [14 x i8] c"assert-fail.c\00", align 1 +@__PRETTY_FUNCTION__.main = private unnamed_addr constant [15 x i8] c"int main(void)\00", align 1 + +; CHECK: @f +; CHECK-LABEL: codeRepl: +; CHECK } +; CHECK: define {{.*}}@f.cold.1() +; CHECK-LABEL: newFuncRoot: +; CHECK: br label %if.then + +; Function Attrs: nounwind willreturn +define i32 @f() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %0 = load i32, i32* %i, align 4 + %cmp = icmp eq i32 %0, 2 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + call void @__assert_fail(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 10, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__PRETTY_FUNCTION__.main, i64 0, i64 0)) #1 + unreachable + +if.end: ; preds = %entry + %1 = load i32, i32* %i, align 4 + %add = add nsw i32 %1, 1 + store i32 %add, i32* %i, align 4 + %2 = load i32, i32* %i, align 4 + ret i32 %2 +} + +; Function Attrs: noreturn nounwind +declare dso_local void @__assert_fail(i8*, i8*, i32, i8*) #1 + +attributes #0 = { nounwind willreturn } +attributes #1 = { noreturn nounwind } + From aa1f905890fbbfedf396530f1e14409875ece13c Mon Sep 17 00:00:00 2001 From: AK <1894981+hiraditya@users.noreply.github.com> Date: Sat, 1 Aug 2020 22:34:44 -0700 Subject: [PATCH 110/600] [HotColdSplit] Add test case for unlikely attribute in outlined function Authored by: rjf (Ruijie Fang) Reviewed by: hiraditya,tejohnson,fhahn Differential Revision: https://reviews.llvm.org/D69384 --- .../test/Transforms/HotColdSplit/coldentrycount.ll | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll index d63acc188f544..7b196bf4c1048 100644 --- a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll +++ b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll @@ -1,13 +1,14 @@ ; Test to ensure that split cold function gets 0 entry count profile ; metadata when compiling with pgo. -; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s +; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -codegenprepare -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" -; CHECK-LABEL: @fun +; CHECK: define {{.*}} @fun{{.*}} ![[HOTPROF:[0-9]+]] {{.*}}section_prefix ![[LIKELY:[0-9]+]] ; CHECK: call void @fun.cold.1 + define void @fun() !prof !14 { entry: br i1 undef, label %if.then, label %if.else @@ -22,8 +23,12 @@ if.else: declare void @sink() cold -; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] +; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] {{.*}}section_prefix ![[UNLIKELY:[0-9]+]] + +; CHECK: ![[HOTPROF]] = !{!"function_entry_count", i64 100} +; CHECK: ![[LIKELY]] = !{!"function_section_prefix", !".hot"} ; CHECK: ![[PROF]] = !{!"function_entry_count", i64 0} +; CHECK: ![[UNLIKELY]] = !{!"function_section_prefix", !".unlikely"} !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} @@ -41,3 +46,6 @@ declare void @sink() cold !12 = !{i32 999000, i64 100, i32 1} !13 = !{i32 999999, i64 1, i32 2} !14 = !{!"function_entry_count", i64 100} +!15 = !{!"function_section_prefix", !".hot"} +!16 = !{!"function_entry_count", i64 0} +!17 = !{!"function_section_prefix", !".unlikely"} From f89d59a0853f90c54dea60500831d02901623223 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 1 Aug 2020 21:57:24 -0700 Subject: [PATCH 111/600] [X86] Add parity test cases for PR46954. This adds tests cases where the parity idiom of (and (ctpop X, 1)) has a truncate in the middle. --- llvm/test/CodeGen/X86/parity.ll | 147 ++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll index 9ad04db8c280f..404d7a68a6e74 100644 --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -92,5 +92,152 @@ define i64 @parity_64(i64 %x) { ret i64 %2 } +define i32 @parity_64_trunc(i64 %x) { +; X86-NOPOPCNT-LABEL: parity_64_trunc: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOPOPCNT-NEXT: movl %ecx, %edx +; X86-NOPOPCNT-NEXT: shrl %edx +; X86-NOPOPCNT-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOPOPCNT-NEXT: subl %edx, %ecx +; X86-NOPOPCNT-NEXT: movl %ecx, %edx +; X86-NOPOPCNT-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NOPOPCNT-NEXT: shrl $2, %ecx +; X86-NOPOPCNT-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NOPOPCNT-NEXT: addl %edx, %ecx +; X86-NOPOPCNT-NEXT: movl %ecx, %edx +; X86-NOPOPCNT-NEXT: shrl $4, %edx +; X86-NOPOPCNT-NEXT: addl %ecx, %edx +; X86-NOPOPCNT-NEXT: andl $17764111, %edx # imm = 0x10F0F0F +; X86-NOPOPCNT-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101 +; X86-NOPOPCNT-NEXT: shrl $24, %ecx +; X86-NOPOPCNT-NEXT: movl %eax, %edx +; X86-NOPOPCNT-NEXT: shrl %edx +; X86-NOPOPCNT-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOPOPCNT-NEXT: subl %edx, %eax +; X86-NOPOPCNT-NEXT: movl %eax, %edx +; X86-NOPOPCNT-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NOPOPCNT-NEXT: shrl $2, %eax +; X86-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NOPOPCNT-NEXT: addl %edx, %eax +; X86-NOPOPCNT-NEXT: movl %eax, %edx +; X86-NOPOPCNT-NEXT: shrl $4, %edx +; X86-NOPOPCNT-NEXT: addl %eax, %edx +; X86-NOPOPCNT-NEXT: andl $17764111, %edx # imm = 0x10F0F0F +; X86-NOPOPCNT-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 +; X86-NOPOPCNT-NEXT: shrl $24, %eax +; X86-NOPOPCNT-NEXT: addl %ecx, %eax +; X86-NOPOPCNT-NEXT: andl $1, %eax +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_64_trunc: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movq %rdi, %rax +; X64-NOPOPCNT-NEXT: shrq %rax +; X64-NOPOPCNT-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; X64-NOPOPCNT-NEXT: andq %rax, %rcx +; X64-NOPOPCNT-NEXT: subq %rcx, %rdi +; X64-NOPOPCNT-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; X64-NOPOPCNT-NEXT: movq %rdi, %rcx +; X64-NOPOPCNT-NEXT: andq %rax, %rcx +; X64-NOPOPCNT-NEXT: shrq $2, %rdi +; X64-NOPOPCNT-NEXT: andq %rax, %rdi +; X64-NOPOPCNT-NEXT: addq %rcx, %rdi +; X64-NOPOPCNT-NEXT: movq %rdi, %rax +; X64-NOPOPCNT-NEXT: shrq $4, %rax +; X64-NOPOPCNT-NEXT: addq %rdi, %rax +; X64-NOPOPCNT-NEXT: movabsq $76296276040158991, %rcx # imm = 0x10F0F0F0F0F0F0F +; X64-NOPOPCNT-NEXT: andq %rax, %rcx +; X64-NOPOPCNT-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 +; X64-NOPOPCNT-NEXT: imulq %rcx, %rax +; X64-NOPOPCNT-NEXT: shrq $56, %rax +; X64-NOPOPCNT-NEXT: andl $1, %eax +; X64-NOPOPCNT-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_64_trunc: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx +; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: addl %ecx, %eax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_64_trunc: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntq %rdi, %rax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: # kill: def $eax killed $eax killed $rax +; X64-POPCNT-NEXT: retq + %1 = tail call i64 @llvm.ctpop.i64(i64 %x) + %2 = trunc i64 %1 to i32 + %3 = and i32 %2, 1 + ret i32 %3 +} + +define i8 @parity_32_trunc(i32 %x) { +; X86-NOPOPCNT-LABEL: parity_32_trunc: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: shrl %ecx +; X86-NOPOPCNT-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NOPOPCNT-NEXT: subl %ecx, %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NOPOPCNT-NEXT: shrl $2, %eax +; X86-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NOPOPCNT-NEXT: addl %ecx, %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: shrl $4, %ecx +; X86-NOPOPCNT-NEXT: addl %eax, %ecx +; X86-NOPOPCNT-NEXT: andl $17764111, %ecx # imm = 0x10F0F0F +; X86-NOPOPCNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 +; X86-NOPOPCNT-NEXT: shrl $24, %eax +; X86-NOPOPCNT-NEXT: andb $1, %al +; X86-NOPOPCNT-NEXT: # kill: def $al killed $al killed $eax +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_32_trunc: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: shrl %eax +; X64-NOPOPCNT-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NOPOPCNT-NEXT: subl %eax, %edi +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NOPOPCNT-NEXT: shrl $2, %edi +; X64-NOPOPCNT-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X64-NOPOPCNT-NEXT: addl %eax, %edi +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: shrl $4, %eax +; X64-NOPOPCNT-NEXT: addl %edi, %eax +; X64-NOPOPCNT-NEXT: andl $17764111, %eax # imm = 0x10F0F0F +; X64-NOPOPCNT-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 +; X64-NOPOPCNT-NEXT: shrl $24, %eax +; X64-NOPOPCNT-NEXT: andb $1, %al +; X64-NOPOPCNT-NEXT: # kill: def $al killed $al killed $eax +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_32_trunc: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: andb $1, %al +; X86-POPCNT-NEXT: # kill: def $al killed $al killed $eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_32_trunc: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: popcntl %edi, %eax +; X64-POPCNT-NEXT: andb $1, %al +; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax +; X64-POPCNT-NEXT: retq + %1 = tail call i32 @llvm.ctpop.i32(i32 %x) + %2 = trunc i32 %1 to i8 + %3 = and i8 %2, 1 + ret i8 %3 +} + declare i32 @llvm.ctpop.i32(i32 %x) declare i64 @llvm.ctpop.i64(i64 %x) From 56166a3a5229722e442e7dfd861b6d83ebf6c8b5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 1 Aug 2020 22:05:10 -0700 Subject: [PATCH 112/600] [X86] Improve parity idiom recognition to handle (and (truncate (ctpop X)), 1). Fixes part of PR46954 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 34 ++++--- llvm/test/CodeGen/X86/parity.ll | 118 ++++++------------------ 2 files changed, 49 insertions(+), 103 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c135b91620724..752b65fc03310 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42744,26 +42744,30 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, // Turn it into series of XORs and a setnp. static SDValue combineParity(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - EVT VT = N->getValueType(0); - - // We only support 64-bit and 32-bit. 64-bit requires special handling - // unless the 64-bit popcnt instruction is legal. - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT)) + // RHS needs to be 1. + if (!isOneConstant(N1)) return SDValue(); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); + // Popcnt may be truncated. + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) + N0 = N0.getOperand(0); // LHS needs to be a single use CTPOP. if (N0.getOpcode() != ISD::CTPOP || !N0.hasOneUse()) return SDValue(); - // RHS needs to be 1. - if (!isOneConstant(N1)) + EVT VT = N0.getValueType(); + + // We only support 64-bit and 32-bit. 64-bit requires special handling + // unless the 64-bit popcnt instruction is legal. + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isTypeLegal(VT) && TLI.isOperationLegal(ISD::CTPOP, VT)) return SDValue(); SDLoc DL(N); @@ -42782,7 +42786,7 @@ static SDValue combineParity(SDNode *N, SelectionDAG &DAG, SDValue Parity = DAG.getNode(ISD::AND, DL, MVT::i32, DAG.getNode(ISD::CTPOP, DL, MVT::i32, X), DAG.getConstant(1, DL, MVT::i32)); - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Parity); + return DAG.getZExtOrTrunc(Parity, DL, N->getValueType(0)); } assert(VT == MVT::i32 && "Unexpected VT!"); @@ -42803,8 +42807,8 @@ static SDValue combineParity(SDNode *N, SelectionDAG &DAG, // Copy the inverse of the parity flag into a register with setcc. SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); - // Zero extend to original type. - return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), Setnp); + // Extend or truncate to the original type. + return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0)); } diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll index 404d7a68a6e74..8637058e06807 100644 --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -96,71 +96,35 @@ define i32 @parity_64_trunc(i64 %x) { ; X86-NOPOPCNT-LABEL: parity_64_trunc: ; X86-NOPOPCNT: # %bb.0: ; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOPOPCNT-NEXT: movl %ecx, %edx -; X86-NOPOPCNT-NEXT: shrl %edx -; X86-NOPOPCNT-NEXT: andl $1431655765, %edx # imm = 0x55555555 -; X86-NOPOPCNT-NEXT: subl %edx, %ecx -; X86-NOPOPCNT-NEXT: movl %ecx, %edx -; X86-NOPOPCNT-NEXT: andl $858993459, %edx # imm = 0x33333333 -; X86-NOPOPCNT-NEXT: shrl $2, %ecx -; X86-NOPOPCNT-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-NOPOPCNT-NEXT: addl %edx, %ecx -; X86-NOPOPCNT-NEXT: movl %ecx, %edx -; X86-NOPOPCNT-NEXT: shrl $4, %edx -; X86-NOPOPCNT-NEXT: addl %ecx, %edx -; X86-NOPOPCNT-NEXT: andl $17764111, %edx # imm = 0x10F0F0F -; X86-NOPOPCNT-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101 -; X86-NOPOPCNT-NEXT: shrl $24, %ecx -; X86-NOPOPCNT-NEXT: movl %eax, %edx -; X86-NOPOPCNT-NEXT: shrl %edx -; X86-NOPOPCNT-NEXT: andl $1431655765, %edx # imm = 0x55555555 -; X86-NOPOPCNT-NEXT: subl %edx, %eax -; X86-NOPOPCNT-NEXT: movl %eax, %edx -; X86-NOPOPCNT-NEXT: andl $858993459, %edx # imm = 0x33333333 -; X86-NOPOPCNT-NEXT: shrl $2, %eax -; X86-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X86-NOPOPCNT-NEXT: addl %edx, %eax -; X86-NOPOPCNT-NEXT: movl %eax, %edx -; X86-NOPOPCNT-NEXT: shrl $4, %edx -; X86-NOPOPCNT-NEXT: addl %eax, %edx -; X86-NOPOPCNT-NEXT: andl $17764111, %edx # imm = 0x10F0F0F -; X86-NOPOPCNT-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101 -; X86-NOPOPCNT-NEXT: shrl $24, %eax -; X86-NOPOPCNT-NEXT: addl %ecx, %eax -; X86-NOPOPCNT-NEXT: andl $1, %eax +; X86-NOPOPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: shrl $16, %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb %ch, %cl +; X86-NOPOPCNT-NEXT: setnp %al ; X86-NOPOPCNT-NEXT: retl ; ; X64-NOPOPCNT-LABEL: parity_64_trunc: ; X64-NOPOPCNT: # %bb.0: ; X64-NOPOPCNT-NEXT: movq %rdi, %rax -; X64-NOPOPCNT-NEXT: shrq %rax -; X64-NOPOPCNT-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 -; X64-NOPOPCNT-NEXT: andq %rax, %rcx -; X64-NOPOPCNT-NEXT: subq %rcx, %rdi -; X64-NOPOPCNT-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 -; X64-NOPOPCNT-NEXT: movq %rdi, %rcx -; X64-NOPOPCNT-NEXT: andq %rax, %rcx -; X64-NOPOPCNT-NEXT: shrq $2, %rdi -; X64-NOPOPCNT-NEXT: andq %rax, %rdi -; X64-NOPOPCNT-NEXT: addq %rcx, %rdi -; X64-NOPOPCNT-NEXT: movq %rdi, %rax -; X64-NOPOPCNT-NEXT: shrq $4, %rax -; X64-NOPOPCNT-NEXT: addq %rdi, %rax -; X64-NOPOPCNT-NEXT: movabsq $76296276040158991, %rcx # imm = 0x10F0F0F0F0F0F0F -; X64-NOPOPCNT-NEXT: andq %rax, %rcx -; X64-NOPOPCNT-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101 -; X64-NOPOPCNT-NEXT: imulq %rcx, %rax -; X64-NOPOPCNT-NEXT: shrq $56, %rax -; X64-NOPOPCNT-NEXT: andl $1, %eax -; X64-NOPOPCNT-NEXT: # kill: def $eax killed $eax killed $rax +; X64-NOPOPCNT-NEXT: shrq $32, %rax +; X64-NOPOPCNT-NEXT: xorl %edi, %eax +; X64-NOPOPCNT-NEXT: movl %eax, %ecx +; X64-NOPOPCNT-NEXT: shrl $16, %ecx +; X64-NOPOPCNT-NEXT: xorl %eax, %ecx +; X64-NOPOPCNT-NEXT: movl %ecx, %edx +; X64-NOPOPCNT-NEXT: shrl $8, %edx +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb %cl, %dl +; X64-NOPOPCNT-NEXT: setnp %al ; X64-NOPOPCNT-NEXT: retq ; ; X86-POPCNT-LABEL: parity_64_trunc: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx -; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: addl %ecx, %eax +; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax ; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: retl ; @@ -181,43 +145,21 @@ define i8 @parity_32_trunc(i32 %x) { ; X86-NOPOPCNT: # %bb.0: ; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOPOPCNT-NEXT: movl %eax, %ecx -; X86-NOPOPCNT-NEXT: shrl %ecx -; X86-NOPOPCNT-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; X86-NOPOPCNT-NEXT: subl %ecx, %eax -; X86-NOPOPCNT-NEXT: movl %eax, %ecx -; X86-NOPOPCNT-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-NOPOPCNT-NEXT: shrl $2, %eax -; X86-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X86-NOPOPCNT-NEXT: addl %ecx, %eax -; X86-NOPOPCNT-NEXT: movl %eax, %ecx -; X86-NOPOPCNT-NEXT: shrl $4, %ecx -; X86-NOPOPCNT-NEXT: addl %eax, %ecx -; X86-NOPOPCNT-NEXT: andl $17764111, %ecx # imm = 0x10F0F0F -; X86-NOPOPCNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 -; X86-NOPOPCNT-NEXT: shrl $24, %eax -; X86-NOPOPCNT-NEXT: andb $1, %al -; X86-NOPOPCNT-NEXT: # kill: def $al killed $al killed $eax +; X86-NOPOPCNT-NEXT: shrl $16, %ecx +; X86-NOPOPCNT-NEXT: xorl %eax, %ecx +; X86-NOPOPCNT-NEXT: xorb %ch, %cl +; X86-NOPOPCNT-NEXT: setnp %al ; X86-NOPOPCNT-NEXT: retl ; ; X64-NOPOPCNT-LABEL: parity_32_trunc: ; X64-NOPOPCNT: # %bb.0: ; X64-NOPOPCNT-NEXT: movl %edi, %eax -; X64-NOPOPCNT-NEXT: shrl %eax -; X64-NOPOPCNT-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; X64-NOPOPCNT-NEXT: subl %eax, %edi -; X64-NOPOPCNT-NEXT: movl %edi, %eax -; X64-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X64-NOPOPCNT-NEXT: shrl $2, %edi -; X64-NOPOPCNT-NEXT: andl $858993459, %edi # imm = 0x33333333 -; X64-NOPOPCNT-NEXT: addl %eax, %edi -; X64-NOPOPCNT-NEXT: movl %edi, %eax -; X64-NOPOPCNT-NEXT: shrl $4, %eax -; X64-NOPOPCNT-NEXT: addl %edi, %eax -; X64-NOPOPCNT-NEXT: andl $17764111, %eax # imm = 0x10F0F0F -; X64-NOPOPCNT-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 -; X64-NOPOPCNT-NEXT: shrl $24, %eax -; X64-NOPOPCNT-NEXT: andb $1, %al -; X64-NOPOPCNT-NEXT: # kill: def $al killed $al killed $eax +; X64-NOPOPCNT-NEXT: shrl $16, %eax +; X64-NOPOPCNT-NEXT: xorl %edi, %eax +; X64-NOPOPCNT-NEXT: movl %eax, %ecx +; X64-NOPOPCNT-NEXT: shrl $8, %ecx +; X64-NOPOPCNT-NEXT: xorb %al, %cl +; X64-NOPOPCNT-NEXT: setnp %al ; X64-NOPOPCNT-NEXT: retq ; ; X86-POPCNT-LABEL: parity_32_trunc: From 21c165de2a1bcca9dceb452f637d9e8959fba113 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Fri, 31 Jul 2020 00:11:40 +0200 Subject: [PATCH 113/600] [CMake] Pass bugreport URL to standalone clang build BUG_REPORT_URL is currently used both in LLVM and in Clang but declared only in the latter. This means that it's missing in standalone clang builds and the driver ends up outputting: PLEASE submit a bug report to and include [...] (note the missing URL) To fix this, include LLVM_PACKAGE_BUGREPORT in LLVMConfig.cmake (similarly to how we pass PACKAGE_VERSION) and use it to fill BUG_REPORT_URL when building clang standalone. Differential Revision: https://reviews.llvm.org/D84987 --- clang/CMakeLists.txt | 2 ++ llvm/cmake/modules/LLVMConfig.cmake.in | 1 + 2 files changed, 3 insertions(+) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 0f08538495fca..c487e506cae11 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -121,6 +121,8 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) include(LLVMDistributionSupport) set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}") + set(BUG_REPORT_URL "${LLVM_PACKAGE_BUGREPORT}" CACHE STRING + "Default URL where bug reports are to be submitted.") if (NOT DEFINED LLVM_INCLUDE_TESTS) set(LLVM_INCLUDE_TESTS ON) diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index 17cc5eacc57b7..a5c370bbc25e4 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -7,6 +7,7 @@ set(LLVM_VERSION_MINOR @LLVM_VERSION_MINOR@) set(LLVM_VERSION_PATCH @LLVM_VERSION_PATCH@) set(LLVM_VERSION_SUFFIX @LLVM_VERSION_SUFFIX@) set(LLVM_PACKAGE_VERSION @PACKAGE_VERSION@) +set(LLVM_PACKAGE_BUGREPORT @PACKAGE_BUGREPORT@) set(LLVM_BUILD_TYPE @CMAKE_BUILD_TYPE@) From ffb47352005f2006851a3dcc2083329f434b8730 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 31 Jul 2020 22:10:39 +0100 Subject: [PATCH 114/600] [SCEV] Precommit tests with signed counting down loop. From PR46939. --- .../pr46939-trip-count-count-down.ll | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll diff --git a/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll b/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll new file mode 100644 index 0000000000000..f1001ecec0fde --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll @@ -0,0 +1,38 @@ +; RUN: opt -analyze -scalar-evolution %s 2>&1 | FileCheck %s + +declare void @iteration() + +define void @reverse_loop(i32 %n) { +; CHECK-LABEL: 'reverse_loop' +; CHECK-NEXT: Classifying expressions for: @reverse_loop +; CHECK-NEXT: %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] +; CHECK-NEXT: --> {%n,+,-1}<%for.body> U: full-set S: full-set Exits: (0 smin %n) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: %dec = add nsw i32 %i.011, -1 +; CHECK-NEXT: --> {(-1 + %n),+,-1}<%for.body> U: full-set S: full-set Exits: (-1 + (0 smin %n)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: Determining loop execution counts for: @reverse_loop +; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 * (0 smin %n)) + %n) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 2147483647 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-1 * (0 smin %n)) + %n) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + %cmp10 = icmp sgt i32 %n, -1 + br i1 %cmp10, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: + br label %for.body + +for.body: + %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] + call void @iteration() + %dec = add nsw i32 %i.011, -1 + %cmp = icmp sgt i32 %i.011, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void +} From d3f01b6681bf73b382521db812341e0041787cc9 Mon Sep 17 00:00:00 2001 From: Shinji Okumura Date: Sun, 2 Aug 2020 18:37:07 +0900 Subject: [PATCH 115/600] [Attributor] AAPotentialValues Interface This is a split patch of D80991. This patch introduces AAPotentialValues and its interface only. For more detail of AAPotentialValues abstract attribute, see the original patch. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D83283 --- llvm/include/llvm/ADT/APInt.h | 26 +- llvm/include/llvm/Transforms/IPO/Attributor.h | 187 +++++++++ llvm/lib/IR/LLVMContextImpl.h | 22 +- llvm/lib/Transforms/IPO/Attributor.cpp | 13 + .../Transforms/IPO/AttributorAttributes.cpp | 162 +++++++ llvm/test/Transforms/Attributor/potential.ll | 394 ++++++++++++++++++ 6 files changed, 782 insertions(+), 22 deletions(-) create mode 100644 llvm/test/Transforms/Attributor/potential.ll diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index f7df648d27ed6..14643a14a2f30 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -15,6 +15,7 @@ #ifndef LLVM_ADT_APINT_H #define LLVM_ADT_APINT_H +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MathExtras.h" #include @@ -96,7 +97,7 @@ class LLVM_NODISCARD APInt { unsigned BitWidth; ///< The number of bits in this APInt. - friend struct DenseMapAPIntKeyInfo; + friend struct DenseMapInfo; friend class APSInt; @@ -2288,6 +2289,29 @@ void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes); /// from Src into IntVal, which is assumed to be wide enough and to hold zero. void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes); +/// Provide DenseMapInfo for APInt. +template <> struct DenseMapInfo { + static inline APInt getEmptyKey() { + APInt V(nullptr, 0); + V.U.VAL = 0; + return V; + } + + static inline APInt getTombstoneKey() { + APInt V(nullptr, 0); + V.U.VAL = 1; + return V; + } + + static unsigned getHashValue(const APInt &Key) { + return static_cast(hash_value(Key)); + } + + static bool isEqual(const APInt &LHS, const APInt &RHS) { + return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; + } +}; + } // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 418a007319679..b15c8f0dd6c35 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -97,6 +97,7 @@ #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" @@ -115,6 +116,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/TimeProfiler.h" @@ -3348,6 +3350,191 @@ struct AAValueConstantRange static const char ID; }; +/// A class for a set state. +/// The assumed boolean state indicates whether the corresponding set is full +/// set or not. If the assumed state is false, this is the worst state. The +/// worst state (invalid state) of set of potential values is when the set +/// contains every possible value (i.e. we cannot in any way limit the value +/// that the target position can take). That never happens naturally, we only +/// force it. As for the conditions under which we force it, see +/// AAPotentialValues. +template > +struct PotentialValuesState : BooleanState { + using SetTy = DenseSet; + + PotentialValuesState() : BooleanState(true) {} + + PotentialValuesState(bool IsValid) : BooleanState(IsValid) {} + + /// Return this set. We should check whether this set is valid or not by + /// isValidState() before calling this function. + const SetTy &getAssumedSet() const { + assert(isValidState() && "This set shoud not be used when it is invalid!"); + return Set; + } + + bool operator==(const PotentialValuesState &RHS) const { + if (isValidState() != RHS.isValidState()) + return false; + if (!isValidState() && !RHS.isValidState()) + return true; + return Set == RHS.getAssumedSet(); + } + + /// Maximum number of potential values to be tracked. + /// This is set by -attributor-max-potential-values command line option + static unsigned MaxPotentialValues; + + /// Return empty set as the best state of potential values. + static PotentialValuesState getBestState() { + return PotentialValuesState(true); + } + + static PotentialValuesState getBestState(PotentialValuesState &PVS) { + return getBestState(); + } + + /// Return full set as the worst state of potential values. + static PotentialValuesState getWorstState() { + return PotentialValuesState(false); + } + + /// Union assumed set with the passed value. + void unionAssumed(const MemberTy &C) { insert(C); } + + /// Union assumed set with assumed set of the passed state \p PVS. + void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); } + + /// "Clamp" this state with \p PVS. + PotentialValuesState operator^=(const PotentialValuesState &PVS) { + unionAssumed(PVS); + return *this; + } + + PotentialValuesState operator&=(const PotentialValuesState &PVS) { + unionAssumed(PVS); + return *this; + } + +private: + /// Check the size of this set, and invalidate when the size is no + /// less than \p MaxPotentialValues threshold. + void checkAndInvalidate() { + if (Set.size() >= MaxPotentialValues) + indicatePessimisticFixpoint(); + } + + /// Insert an element into this set. + void insert(const MemberTy &C) { + if (!isValidState()) + return; + Set.insert(C); + checkAndInvalidate(); + } + + /// Take union with R. + void unionWith(const PotentialValuesState &R) { + /// If this is a full set, do nothing.; + if (!isValidState()) + return; + /// If R is full set, change L to a full set. + if (!R.isValidState()) { + indicatePessimisticFixpoint(); + return; + } + for (const MemberTy &C : R.Set) + Set.insert(C); + checkAndInvalidate(); + } + + /// Take intersection with R. + void intersectWith(const PotentialValuesState &R) { + /// If R is a full set, do nothing. + if (!R.isValidState()) + return; + /// If this is a full set, change this to R. + if (!isValidState()) { + *this = R; + return; + } + SetTy IntersectSet; + for (const MemberTy &C : Set) { + if (R.Set.count(C)) + IntersectSet.insert(C); + } + Set = IntersectSet; + } + + /// Container for potential values + SetTy Set; +}; + +using PotentialConstantIntValuesState = PotentialValuesState; + +raw_ostream &operator<<(raw_ostream &OS, + const PotentialConstantIntValuesState &R); + +/// An abstract interface for potential values analysis. +/// +/// This AA collects potential values for each IR position. +/// An assumed set of potential values is initialized with the empty set (the +/// best state) and it will grow monotonically as we find more potential values +/// for this position. +/// The set might be forced to the worst state, that is, to contain every +/// possible value for this position in 2 cases. +/// 1. We surpassed the \p MaxPotentialValues threshold. This includes the +/// case that this position is affected (e.g. because of an operation) by a +/// Value that is in the worst state. +/// 2. We tried to initialize on a Value that we cannot handle (e.g. an +/// operator we do not currently handle). +/// +/// TODO: Support values other than constant integers. +struct AAPotentialValues + : public StateWrapper { + using Base = StateWrapper; + AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// See AbstractAttribute::getState(...). + PotentialConstantIntValuesState &getState() override { return *this; } + const PotentialConstantIntValuesState &getState() const override { + return *this; + } + + /// Create an abstract attribute view for the position \p IRP. + static AAPotentialValues &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Return assumed constant for the associated value + Optional + getAssumedConstantInt(Attributor &A, + const Instruction *CtxI = nullptr) const { + if (!isValidState()) + return nullptr; + if (getAssumedSet().size() == 1) + return cast(ConstantInt::get(getAssociatedValue().getType(), + *(getAssumedSet().begin()))); + if (getAssumedSet().size() == 0) + return llvm::None; + + return nullptr; + } + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAPotentialValues"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAPotentialValues + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + /// Run options, used by the pass manager. enum AttributorRunOption { NONE = 0, diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index b97ac37c5fcfd..e8fdaa23761c3 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -57,27 +57,7 @@ class Type; class Value; class ValueHandleBase; -struct DenseMapAPIntKeyInfo { - static inline APInt getEmptyKey() { - APInt V(nullptr, 0); - V.U.VAL = 0; - return V; - } - - static inline APInt getTombstoneKey() { - APInt V(nullptr, 0); - V.U.VAL = 1; - return V; - } - - static unsigned getHashValue(const APInt &Key) { - return static_cast(hash_value(Key)); - } - - static bool isEqual(const APInt &LHS, const APInt &RHS) { - return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; - } -}; +using DenseMapAPIntKeyInfo = DenseMapInfo; struct DenseMapAPFloatKeyInfo { static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); } diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 89971cc9c2946..764d71b98e063 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2079,6 +2079,19 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) { return OS; } +raw_ostream &llvm::operator<<(raw_ostream &OS, + const PotentialConstantIntValuesState &S) { + OS << "set-state(< {"; + if (!S.isValidState()) + OS << "full-set"; + else + for (auto &it : S.getAssumedSet()) + OS << it << ", "; + OS << "} >)"; + + return OS; +} + void AbstractAttribute::print(raw_ostream &OS) const { OS << "["; OS << getName(); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index aa6bc94a3668e..a220bc001a4ce 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -45,6 +45,16 @@ static cl::opt ManifestInternal( static cl::opt MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), cl::Hidden); +static cl::opt MaxPotentialValues( + "attributor-max-potential-values", cl::Hidden, + cl::desc("Maximum number of potential values to be " + "tracked for each position."), + cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), + cl::init(7)); + +template <> +unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0; + STATISTIC(NumAAs, "Number of abstract attributes created"); // Some helper macros to deal with statistics tracking. @@ -120,6 +130,7 @@ PIPE_OPERATOR(AAMemoryLocation) PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAPrivatizablePtr) PIPE_OPERATOR(AAUndefinedBehavior) +PIPE_OPERATOR(AAPotentialValues) #undef PIPE_OPERATOR } // namespace llvm @@ -7075,6 +7086,155 @@ struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating { STATS_DECLTRACK_CSARG_ATTR(value_range) } }; + +/// ------------------ Potential Values Attribute ------------------------- + +struct AAPotentialValuesImpl : AAPotentialValues { + using StateType = PotentialConstantIntValuesState; + + AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A) + : AAPotentialValues(IRP, A) {} + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + std::string Str; + llvm::raw_string_ostream OS(Str); + OS << getState(); + return OS.str(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + return indicatePessimisticFixpoint(); + } +}; + +struct AAPotentialValuesArgument final + : AAArgumentFromCallSiteArguments { + using Base = + AAArgumentFromCallSiteArguments; + AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + if (!getAnchorScope() || getAnchorScope()->isDeclaration()) { + indicatePessimisticFixpoint(); + } else { + Base::initialize(A); + } + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(potential_values) + } +}; + +struct AAPotentialValuesReturned + : AAReturnedFromReturnedValues { + using Base = + AAReturnedFromReturnedValues; + AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A) + : Base(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(potential_values) + } +}; + +struct AAPotentialValuesFloating : AAPotentialValuesImpl { + AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(..). + void initialize(Attributor &A) override { + Value &V = getAssociatedValue(); + + if (auto *C = dyn_cast(&V)) { + unionAssumed(C->getValue()); + indicateOptimisticFixpoint(); + return; + } + + if (isa(&V)) { + // Collapse the undef state to 0. + unionAssumed( + APInt(/* numBits */ getAssociatedType()->getIntegerBitWidth(), + /* val */ 0)); + indicateOptimisticFixpoint(); + return; + } + + if (isa(&V) || isa(&V) || isa(&V)) + return; + + if (isa(V) || isa(V)) + return; + + indicatePessimisticFixpoint(); + + LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: " + << getAssociatedValue() << "\n"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(potential_values) + } +}; + +struct AAPotentialValuesFunction : AAPotentialValuesImpl { + AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will " + "not be called"); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FN_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSite : AAPotentialValuesFunction { + AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesFunction(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CS_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSiteReturned + : AACallSiteReturnedFromReturned { + AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AACallSiteReturnedFromReturned(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(potential_values) + } +}; + +struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { + AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AAPotentialValuesFloating(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(potential_values) + } +}; + } // namespace const char AAReturnedValues::ID = 0; @@ -7098,6 +7258,7 @@ const char AAPrivatizablePtr::ID = 0; const char AAMemoryBehavior::ID = 0; const char AAMemoryLocation::ID = 0; const char AAValueConstantRange::ID = 0; +const char AAPotentialValues::ID = 0; // Macro magic to create the static generator function for attributes that // follow the naming scheme. @@ -7207,6 +7368,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead) diff --git a/llvm/test/Transforms/Attributor/potential.ll b/llvm/test/Transforms/Attributor/potential.ll new file mode 100644 index 0000000000000..a4fc23c8b700b --- /dev/null +++ b/llvm/test/Transforms/Attributor/potential.ll @@ -0,0 +1,394 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM +; +; Test for multiple potential values +; +; potential-test 1 +; bool iszero(int c) { return c == 0; } +; bool potential_test1(bool c) { return iszero(c ? 1 : -1); } + +define internal i1 @iszero1(i32 %c) { +; CHECK-LABEL: define {{[^@]+}}@iszero1 +; CHECK-SAME: (i32 [[C:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp eq i32 %c, 0 + ret i1 %cmp +} + +define i1 @potential_test1(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@potential_test1 +; CHECK-SAME: (i1 [[C:%.*]]) +; CHECK-NEXT: [[ARG:%.*]] = select i1 [[C]], i32 -1, i32 1 +; CHECK-NEXT: [[RET:%.*]] = call i1 @iszero1(i32 [[ARG]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %arg = select i1 %c, i32 -1, i32 1 + %ret = call i1 @iszero1(i32 %arg) + ret i1 %ret +} + + +; potential-test 2 +; +; potential values of argument of iszero are {1,-1} +; potential value of returned value of iszero is 0 +; +; int call_with_two_values(int x) { return iszero(x) + iszero(-x); } +; int potential_test2(int x) { return call_with_two_values(1) + call_with_two_values(-1); } + +define internal i32 @iszero2(i32 %c) { +; CHECK-LABEL: define {{[^@]+}}@iszero2 +; CHECK-SAME: (i32 [[C:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: [[RET:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[RET]] +; + %cmp = icmp eq i32 %c, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define internal i32 @call_with_two_values(i32 %c) { +; IS__TUNIT____-LABEL: define {{[^@]+}}@call_with_two_values +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) +; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @iszero2(i32 [[C]]) #0, !range !0 +; IS__TUNIT____-NEXT: [[MINUSC:%.*]] = sub i32 0, [[C]] +; IS__TUNIT____-NEXT: [[CSRET2:%.*]] = call i32 @iszero2(i32 [[MINUSC]]) #0, !range !0 +; IS__TUNIT____-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] +; IS__TUNIT____-NEXT: ret i32 [[RET]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@call_with_two_values +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) +; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @iszero2(i32 [[C]]) +; IS__CGSCC____-NEXT: [[MINUSC:%.*]] = sub i32 0, [[C]] +; IS__CGSCC____-NEXT: [[CSRET2:%.*]] = call i32 @iszero2(i32 [[MINUSC]]) +; IS__CGSCC____-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] +; IS__CGSCC____-NEXT: ret i32 [[RET]] +; + %csret1 = call i32 @iszero2(i32 %c) + %minusc = sub i32 0, %c + %csret2 = call i32 @iszero2(i32 %minusc) + %ret = add i32 %csret1, %csret2 + ret i32 %ret +} + +define i32 @potential_test2(i1 %c) { +; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test2 +; IS__TUNIT____-SAME: (i1 [[C:%.*]]) +; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @call_with_two_values(i32 1) #0, !range !1 +; IS__TUNIT____-NEXT: [[CSRET2:%.*]] = call i32 @call_with_two_values(i32 -1) #1, !range !1 +; IS__TUNIT____-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] +; IS__TUNIT____-NEXT: ret i32 [[RET]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test2 +; IS__CGSCC____-SAME: (i1 [[C:%.*]]) +; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @call_with_two_values(i32 1) +; IS__CGSCC____-NEXT: [[CSRET2:%.*]] = call i32 @call_with_two_values(i32 -1) +; IS__CGSCC____-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] +; IS__CGSCC____-NEXT: ret i32 [[RET]] +; + %csret1 = call i32 @call_with_two_values(i32 1) + %csret2 = call i32 @call_with_two_values(i32 -1) + %ret = add i32 %csret1, %csret2 + ret i32 %ret +} + + +; potential-test 3 +; +; potential values of returned value of f are {0,1} +; potential values of argument of g are {0,1} +; potential value of returned value of g is 1 +; then returned value of g can be simplified +; +; int zero_or_one(int c) { return c < 2; } +; int potential_test3() { return zero_or_one(iszero(0))+zero_or_one(iszero(1)); } + +define internal i32 @iszero3(i32 %c) { +; CHECK-LABEL: define {{[^@]+}}@iszero3 +; CHECK-SAME: (i32 [[C:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: [[RET:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[RET]] +; + %cmp = icmp eq i32 %c, 0 + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define internal i32 @less_than_two(i32 %c) { +; CHECK-LABEL: define {{[^@]+}}@less_than_two +; CHECK-SAME: (i32 [[C:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 2 +; CHECK-NEXT: [[RET:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[RET]] +; + %cmp = icmp slt i32 %c, 2 + %ret = zext i1 %cmp to i32 + ret i32 %ret +} + +define i32 @potential_test3() { +; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@potential_test3() +; NOT_TUNIT_NPM-NEXT: [[CMP1:%.*]] = call i32 @iszero3(i32 0) +; NOT_TUNIT_NPM-NEXT: [[TRUE1:%.*]] = call i32 @less_than_two(i32 [[CMP1]]) +; NOT_TUNIT_NPM-NEXT: [[CMP2:%.*]] = call i32 @iszero3(i32 1) +; NOT_TUNIT_NPM-NEXT: [[TRUE2:%.*]] = call i32 @less_than_two(i32 [[CMP2]]) +; NOT_TUNIT_NPM-NEXT: [[RET:%.*]] = add i32 [[TRUE1]], [[TRUE2]] +; NOT_TUNIT_NPM-NEXT: ret i32 [[RET]] +; +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@potential_test3() +; IS__TUNIT_NPM-NEXT: [[CMP1:%.*]] = call i32 @iszero3(i32 0) #0, !range !0 +; IS__TUNIT_NPM-NEXT: [[TRUE1:%.*]] = call i32 @less_than_two(i32 [[CMP1]]) #0, !range !0 +; IS__TUNIT_NPM-NEXT: [[CMP2:%.*]] = call i32 @iszero3(i32 1) #0, !range !0 +; IS__TUNIT_NPM-NEXT: [[TRUE2:%.*]] = call i32 @less_than_two(i32 [[CMP2]]) #0, !range !0 +; IS__TUNIT_NPM-NEXT: [[RET:%.*]] = add i32 [[TRUE1]], [[TRUE2]] +; IS__TUNIT_NPM-NEXT: ret i32 [[RET]] +; + %cmp1 = call i32 @iszero3(i32 0) + %true1 = call i32 @less_than_two(i32 %cmp1) + %cmp2 = call i32 @iszero3(i32 1) + %true2 = call i32 @less_than_two(i32 %cmp2) + %ret = add i32 %true1, %true2 + ret i32 %ret +} + + +; potential-test 4,5 +; +; simplified +; int potential_test4(int c) { return return1or3(c) == 2; } +; int potential_test5(int c) { return return1or3(c) == return2or4(c); } +; +; not simplified +; int potential_test6(int c) { return return1or3(c) == 3; } +; int potential_test7(int c) { return return1or3(c) == return3or4(c); } + +define i32 @potential_test4(i32 %c) { +; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test4 +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) +; IS__TUNIT____-NEXT: [[CSRET:%.*]] = call i32 @return1or3(i32 [[C]]) #0, !range !2 +; IS__TUNIT____-NEXT: [[FALSE:%.*]] = icmp eq i32 [[CSRET]], 2 +; IS__TUNIT____-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 +; IS__TUNIT____-NEXT: ret i32 [[RET]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test4 +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) +; IS__CGSCC____-NEXT: [[CSRET:%.*]] = call i32 @return1or3(i32 [[C]]) +; IS__CGSCC____-NEXT: [[FALSE:%.*]] = icmp eq i32 [[CSRET]], 2 +; IS__CGSCC____-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 +; IS__CGSCC____-NEXT: ret i32 [[RET]] +; + %csret = call i32 @return1or3(i32 %c) + %false = icmp eq i32 %csret, 2 + %ret = zext i1 %false to i32 + ret i32 %ret +} + +define i32 @potential_test5(i32 %c) { +; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test5 +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) +; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #0, !range !2 +; IS__TUNIT____-NEXT: [[CSRET2:%.*]] = call i32 @return2or4(i32 [[C]]) #0, !range !3 +; IS__TUNIT____-NEXT: [[FALSE:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] +; IS__TUNIT____-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 +; IS__TUNIT____-NEXT: ret i32 [[RET]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test5 +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) +; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) +; IS__CGSCC____-NEXT: [[CSRET2:%.*]] = call i32 @return2or4(i32 [[C]]) +; IS__CGSCC____-NEXT: [[FALSE:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] +; IS__CGSCC____-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 +; IS__CGSCC____-NEXT: ret i32 [[RET]] +; + %csret1 = call i32 @return1or3(i32 %c) + %csret2 = call i32 @return2or4(i32 %c) + %false = icmp eq i32 %csret1, %csret2 + %ret = zext i1 %false to i32 + ret i32 %ret +} + +define i1 @potential_test6(i32 %c) { +; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test6 +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) +; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #0, !range !2 +; IS__TUNIT____-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], 3 +; IS__TUNIT____-NEXT: ret i1 [[RET]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test6 +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) +; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) +; IS__CGSCC____-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], 3 +; IS__CGSCC____-NEXT: ret i1 [[RET]] +; + %csret1 = call i32 @return1or3(i32 %c) + %ret = icmp eq i32 %csret1, 3 + ret i1 %ret +} + +define i1 @potential_test7(i32 %c) { +; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test7 +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) +; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #0, !range !2 +; IS__TUNIT____-NEXT: [[CSRET2:%.*]] = call i32 @return3or4(i32 [[C]]) #0, !range !4 +; IS__TUNIT____-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] +; IS__TUNIT____-NEXT: ret i1 [[RET]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test7 +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) +; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) +; IS__CGSCC____-NEXT: [[CSRET2:%.*]] = call i32 @return3or4(i32 [[C]]) +; IS__CGSCC____-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] +; IS__CGSCC____-NEXT: ret i1 [[RET]] +; + %csret1 = call i32 @return1or3(i32 %c) + %csret2 = call i32 @return3or4(i32 %c) + %ret = icmp eq i32 %csret1, %csret2 + ret i1 %ret +} + +define internal i32 @return1or3(i32 %c) { +; CHECK-LABEL: define {{[^@]+}}@return1or3 +; CHECK-SAME: (i32 [[C:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: [[RET:%.*]] = select i1 [[CMP]], i32 1, i32 3 +; CHECK-NEXT: ret i32 [[RET]] +; + %cmp = icmp eq i32 %c, 0 + %ret = select i1 %cmp, i32 1, i32 3 + ret i32 %ret +} + +define internal i32 @return2or4(i32 %c) { +; CHECK-LABEL: define {{[^@]+}}@return2or4 +; CHECK-SAME: (i32 [[C:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: [[RET:%.*]] = select i1 [[CMP]], i32 2, i32 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cmp = icmp eq i32 %c, 0 + %ret = select i1 %cmp, i32 2, i32 4 + ret i32 %ret +} + +define internal i32 @return3or4(i32 %c) { +; CHECK-LABEL: define {{[^@]+}}@return3or4 +; CHECK-SAME: (i32 [[C:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: [[RET:%.*]] = select i1 [[CMP]], i32 3, i32 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cmp = icmp eq i32 %c, 0 + %ret = select i1 %cmp, i32 3, i32 4 + ret i32 %ret +} + +; potential-test 8 +; +; propagate argument to callsite argument + +define internal i1 @cmp_with_four(i32 %c) { +; CHECK-LABEL: define {{[^@]+}}@cmp_with_four +; CHECK-SAME: (i32 [[C:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 4 +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp eq i32 %c, 4 + ret i1 %cmp +} + +define internal i1 @wrapper(i32 %c) { +; CHECK-LABEL: define {{[^@]+}}@wrapper +; CHECK-SAME: (i32 [[C:%.*]]) +; CHECK-NEXT: [[RET:%.*]] = call i1 @cmp_with_four(i32 [[C]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %ret = call i1 @cmp_with_four(i32 %c) + ret i1 %ret +} + +define i1 @potential_test8() { +; CHECK-LABEL: define {{[^@]+}}@potential_test8() +; CHECK-NEXT: [[RES1:%.*]] = call i1 @wrapper(i32 1) +; CHECK-NEXT: [[RES3:%.*]] = call i1 @wrapper(i32 3) +; CHECK-NEXT: [[RES5:%.*]] = call i1 @wrapper(i32 5) +; CHECK-NEXT: [[RES13:%.*]] = or i1 [[RES1]], [[RES3]] +; CHECK-NEXT: [[RES135:%.*]] = or i1 [[RES13]], [[RES5]] +; CHECK-NEXT: ret i1 [[RES135]] +; + %res1 = call i1 @wrapper(i32 1) + %res3 = call i1 @wrapper(i32 3) + %res5 = call i1 @wrapper(i32 5) + %res13 = or i1 %res1, %res3 + %res135 = or i1 %res13, %res5 + ret i1 %res135 +} + +define i1 @potential_test9() { +; IS________OPM-LABEL: define {{[^@]+}}@potential_test9() +; IS________OPM-NEXT: entry: +; IS________OPM-NEXT: br label [[COND:%.*]] +; IS________OPM: cond: +; IS________OPM-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_1:%.*]], [[INC:%.*]] ] +; IS________OPM-NEXT: [[C_0:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[C_1:%.*]], [[INC]] ] +; IS________OPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 10 +; IS________OPM-NEXT: br i1 [[CMP]], label [[BODY:%.*]], label [[END:%.*]] +; IS________OPM: body: +; IS________OPM-NEXT: [[C_1]] = mul i32 [[C_0]], -1 +; IS________OPM-NEXT: br label [[INC]] +; IS________OPM: inc: +; IS________OPM-NEXT: [[I_1]] = add i32 [[I_0]], 1 +; IS________OPM-NEXT: br label [[COND]] +; IS________OPM: end: +; IS________OPM-NEXT: [[RET:%.*]] = icmp eq i32 [[C_0]], 0 +; IS________OPM-NEXT: ret i1 [[RET]] +; +; IS________NPM-LABEL: define {{[^@]+}}@potential_test9() +; IS________NPM-NEXT: entry: +; IS________NPM-NEXT: br label [[COND:%.*]] +; IS________NPM: cond: +; IS________NPM-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_1:%.*]], [[INC:%.*]] ] +; IS________NPM-NEXT: [[C_0:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[C_1:%.*]], [[INC]] ] +; IS________NPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 10 +; IS________NPM-NEXT: br i1 [[CMP]], label [[BODY:%.*]], label [[END:%.*]] +; IS________NPM: body: +; IS________NPM-NEXT: [[C_1]] = mul i32 [[C_0]], -1 +; IS________NPM-NEXT: br label [[INC]] +; IS________NPM: inc: +; IS________NPM-NEXT: [[I_1]] = add i32 [[I_0]], 1 +; IS________NPM-NEXT: br label [[COND]] +; IS________NPM: end: +; IS________NPM-NEXT: ret i1 false +; +entry: + br label %cond +cond: + %i.0 = phi i32 [0, %entry], [%i.1, %inc] + %c.0 = phi i32 [1, %entry], [%c.1, %inc] + %cmp = icmp slt i32 %i.0, 10 + br i1 %cmp, label %body, label %end +body: + %c.1 = mul i32 %c.0, -1 + br label %inc +inc: + %i.1 = add i32 %i.0, 1 + br label %cond +end: + %ret = icmp eq i32 %c.0, 0 + ret i1 %ret +} + +; IS__TUNIT____: !0 = !{i32 0, i32 2} +; IS__TUNIT____: !1 = !{i32 0, i32 3} +; IS__TUNIT____: !2 = !{i32 1, i32 4} +; IS__TUNIT____: !3 = !{i32 2, i32 5} +; IS__TUNIT____: !4 = !{i32 3, i32 5} +; IS__TUNIT____-NOT: !5 + +; NOT_TUNIT____-NOT: !0 From 2700311cce99d2a3ef45002e32b8832b88214f7d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 14:13:13 +0100 Subject: [PATCH 116/600] [X86] combineX86ShuffleChain - pull out repeated RootVT.getSizeInBits() calls. NFCI. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 752b65fc03310..a16c3bf7ac51f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34464,6 +34464,10 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, assert((Inputs.size() == 1 || Inputs.size() == 2) && "Unexpected number of shuffle inputs!"); + MVT RootVT = Root.getSimpleValueType(); + unsigned RootSizeInBits = RootVT.getSizeInBits(); + unsigned NumRootElts = RootVT.getVectorNumElements(); + // Find the inputs that enter the chain. Note that multiple uses are OK // here, we're not going to remove the operands we find. bool UnaryShuffle = (Inputs.size() == 1); @@ -34473,10 +34477,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, MVT VT1 = V1.getSimpleValueType(); MVT VT2 = V2.getSimpleValueType(); - MVT RootVT = Root.getSimpleValueType(); - assert(VT1.getSizeInBits() == RootVT.getSizeInBits() && - VT2.getSizeInBits() == RootVT.getSizeInBits() && - "Vector size mismatch"); + assert(VT1.getSizeInBits() == RootSizeInBits && + VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch"); SDLoc DL(Root); SDValue Res; @@ -34488,8 +34490,6 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } bool OptForSize = DAG.shouldOptForSize(); - unsigned RootSizeInBits = RootVT.getSizeInBits(); - unsigned NumRootElts = RootVT.getVectorNumElements(); unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || (RootVT.isFloatingPoint() && Depth >= 1) || From 3f276840b6f8b2624f0bbeb6097d8049d27d5ca0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 14:15:15 +0100 Subject: [PATCH 117/600] [X86] Use const APInt& in for-range loop to avoid unnecessary copies. NFCI. Fixes clang-tidy warning. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a16c3bf7ac51f..ec34dcaaf95b8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6686,7 +6686,7 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode, return false; // Insert the extracted elements into the mask. - for (APInt Elt : EltBits) + for (const APInt &Elt : EltBits) RawMask.push_back(Elt.getZExtValue()); return true; From d7e261674141ce42557c57f01869d27f0aecf6ee Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 14:23:00 +0100 Subject: [PATCH 118/600] [X86] Pass SDLoc by const reference. NFCI. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ec34dcaaf95b8..26904f8945ee3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28119,8 +28119,8 @@ bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { /// a) very likely accessed only by a single thread to minimize cache traffic, /// and b) definitely dereferenceable. Returns the new Chain result. static SDValue emitLockedStackOp(SelectionDAG &DAG, - const X86Subtarget &Subtarget, - SDValue Chain, SDLoc DL) { + const X86Subtarget &Subtarget, SDValue Chain, + const SDLoc &DL) { // Implementation notes: // 1) LOCK prefix creates a full read/write reordering barrier for memory // operations issued by the current processor. As such, the location @@ -38298,7 +38298,7 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG, // a vector/float/double that got truncated/extended/bitcast to/from a scalar // integer. If so, replace the scalar ops with bool vector equivalents back down // the chain. -static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, SDLoc DL, +static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); From 20fbbbc583f4d7a35c380b0b88ea96cff0237856 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 14:32:05 +0100 Subject: [PATCH 119/600] [X86] Use const APInt& in for-range loop to avoid unnecessary copies. NFCI. Fixes clang-tidy warning. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 26904f8945ee3..15f283b1bc42a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42976,7 +42976,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (VT == SrcVecVT.getScalarType() && N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) && getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) && - llvm::all_of(EltBits, [](APInt M) { + llvm::all_of(EltBits, [](const APInt &M) { return M.isNullValue() || M.isAllOnesValue(); })) { unsigned NumElts = SrcVecVT.getVectorNumElements(); From 8d1b9505f24aad4015d435cc9f23f77f7ff703f8 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Sun, 2 Aug 2020 21:37:11 +0800 Subject: [PATCH 120/600] [DWARFYAML][debug_aranges] Make the 'Descriptors' field optional. --- llvm/lib/ObjectYAML/DWARFYAML.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 04dd185ad954e..37d45996786d3 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -107,7 +107,7 @@ void MappingTraits::mapping(IO &IO, IO.mapRequired("CuOffset", ARange.CuOffset); IO.mapOptional("AddressSize", ARange.AddrSize); IO.mapOptional("SegmentSelectorSize", ARange.SegSize, 0); - IO.mapRequired("Descriptors", ARange.Descriptors); + IO.mapOptional("Descriptors", ARange.Descriptors); } void MappingTraits::mapping( From a0addbb4ec8c7bf791139699d46b08413c46eed7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 2 Aug 2020 15:45:24 +0200 Subject: [PATCH 121/600] [InstSimplify] Reduce code duplication in icmp of binop folds (NFC) For folds where we check for the binop on both the LHS and RHS, extract a function that expects it on the LHS and call it with swapped order. --- llvm/lib/Analysis/InstructionSimplify.cpp | 215 +++++++++------------- 1 file changed, 82 insertions(+), 133 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index b1438b416d56b..4f09ea1896643 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2753,14 +2753,87 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, return nullptr; } +static Value *simplifyICmpWithBinOpOnLHS( + CmpInst::Predicate Pred, BinaryOperator *LBO, Value *RHS, + const SimplifyQuery &Q, unsigned MaxRecurse) { + Type *ITy = GetCompareTy(RHS); // The return type. + + Value *Y = nullptr; + // icmp pred (or X, Y), X + if (match(LBO, m_c_Or(m_Value(Y), m_Specific(RHS)))) { + if (Pred == ICmpInst::ICMP_ULT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_UGE) + return getTrue(ITy); + + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { + KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (RHSKnown.isNonNegative() && YKnown.isNegative()) + return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); + if (RHSKnown.isNegative() || YKnown.isNonNegative()) + return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy); + } + } + + // icmp pred (and X, Y), X + if (match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) { + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + + // icmp pred (urem X, Y), Y + if (match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) + break; + LLVM_FALLTHROUGH; + } + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return getFalse(ITy); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) + break; + LLVM_FALLTHROUGH; + } + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return getTrue(ITy); + } + } + + // x >> y <=u x + // x udiv y <=u x. + if (match(LBO, m_LShr(m_Specific(RHS), m_Value())) || + match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) { + // icmp pred (X op Y), X + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + + return nullptr; +} + /// TODO: A large part of this logic is duplicated in InstCombine's /// foldICmpBinOp(). We should be able to share that and avoid the code /// duplication. static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { - Type *ITy = GetCompareTy(LHS); // The return type. - BinaryOperator *LBO = dyn_cast(LHS); BinaryOperator *RBO = dyn_cast(RHS); if (MaxRecurse && (LBO || RBO)) { @@ -2831,56 +2904,14 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, } } - { - Value *Y = nullptr; - // icmp pred (or X, Y), X - if (LBO && match(LBO, m_c_Or(m_Value(Y), m_Specific(RHS)))) { - if (Pred == ICmpInst::ICMP_ULT) - return getFalse(ITy); - if (Pred == ICmpInst::ICMP_UGE) - return getTrue(ITy); - - if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { - KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (RHSKnown.isNonNegative() && YKnown.isNegative()) - return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); - if (RHSKnown.isNegative() || YKnown.isNonNegative()) - return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy); - } - } - // icmp pred X, (or X, Y) - if (RBO && match(RBO, m_c_Or(m_Value(Y), m_Specific(LHS)))) { - if (Pred == ICmpInst::ICMP_ULE) - return getTrue(ITy); - if (Pred == ICmpInst::ICMP_UGT) - return getFalse(ITy); - - if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) { - KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (LHSKnown.isNonNegative() && YKnown.isNegative()) - return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy); - if (LHSKnown.isNegative() || YKnown.isNonNegative()) - return Pred == ICmpInst::ICMP_SGT ? getFalse(ITy) : getTrue(ITy); - } - } - } + if (LBO) + if (Value *V = simplifyICmpWithBinOpOnLHS(Pred, LBO, RHS, Q, MaxRecurse)) + return V; - // icmp pred (and X, Y), X - if (LBO && match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) { - if (Pred == ICmpInst::ICMP_UGT) - return getFalse(ITy); - if (Pred == ICmpInst::ICMP_ULE) - return getTrue(ITy); - } - // icmp pred X, (and X, Y) - if (RBO && match(RBO, m_c_And(m_Value(), m_Specific(LHS)))) { - if (Pred == ICmpInst::ICMP_UGE) - return getTrue(ITy); - if (Pred == ICmpInst::ICMP_ULT) - return getFalse(ITy); - } + if (RBO) + if (Value *V = simplifyICmpWithBinOpOnLHS( + ICmpInst::getSwappedPredicate(Pred), RBO, LHS, Q, MaxRecurse)) + return V; // 0 - (zext X) pred C if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) { @@ -2904,88 +2935,6 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, } } - // icmp pred (urem X, Y), Y - if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { - switch (Pred) { - default: - break; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: { - KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (!Known.isNonNegative()) - break; - LLVM_FALLTHROUGH; - } - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: - return getFalse(ITy); - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: { - KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (!Known.isNonNegative()) - break; - LLVM_FALLTHROUGH; - } - case ICmpInst::ICMP_NE: - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: - return getTrue(ITy); - } - } - - // icmp pred X, (urem Y, X) - if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { - switch (Pred) { - default: - break; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: { - KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (!Known.isNonNegative()) - break; - LLVM_FALLTHROUGH; - } - case ICmpInst::ICMP_NE: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: - return getTrue(ITy); - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: { - KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (!Known.isNonNegative()) - break; - LLVM_FALLTHROUGH; - } - case ICmpInst::ICMP_EQ: - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: - return getFalse(ITy); - } - } - - // x >> y <=u x - // x udiv y <=u x. - if (LBO && (match(LBO, m_LShr(m_Specific(RHS), m_Value())) || - match(LBO, m_UDiv(m_Specific(RHS), m_Value())))) { - // icmp pred (X op Y), X - if (Pred == ICmpInst::ICMP_UGT) - return getFalse(ITy); - if (Pred == ICmpInst::ICMP_ULE) - return getTrue(ITy); - } - - // x >=u x >> y - // x >=u x udiv y. - if (RBO && (match(RBO, m_LShr(m_Specific(LHS), m_Value())) || - match(RBO, m_UDiv(m_Specific(LHS), m_Value())))) { - // icmp pred X, (X op Y) - if (Pred == ICmpInst::ICMP_ULT) - return getFalse(ITy); - if (Pred == ICmpInst::ICMP_UGE) - return getTrue(ITy); - } - // handle: // CI2 << X == CI // CI2 << X != CI From 376b64926b70c8b146caaf397616fb681ae329ca Mon Sep 17 00:00:00 2001 From: Shinji Okumura Date: Sun, 2 Aug 2020 22:48:28 +0900 Subject: [PATCH 122/600] Revert "[Attributor] AAPotentialValues Interface" The commit cause build failure. --- llvm/include/llvm/ADT/APInt.h | 26 +- llvm/include/llvm/Transforms/IPO/Attributor.h | 187 --------- llvm/lib/IR/LLVMContextImpl.h | 22 +- llvm/lib/Transforms/IPO/Attributor.cpp | 13 - .../Transforms/IPO/AttributorAttributes.cpp | 162 ------- llvm/test/Transforms/Attributor/potential.ll | 394 ------------------ 6 files changed, 22 insertions(+), 782 deletions(-) delete mode 100644 llvm/test/Transforms/Attributor/potential.ll diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 14643a14a2f30..f7df648d27ed6 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -15,7 +15,6 @@ #ifndef LLVM_ADT_APINT_H #define LLVM_ADT_APINT_H -#include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MathExtras.h" #include @@ -97,7 +96,7 @@ class LLVM_NODISCARD APInt { unsigned BitWidth; ///< The number of bits in this APInt. - friend struct DenseMapInfo; + friend struct DenseMapAPIntKeyInfo; friend class APSInt; @@ -2289,29 +2288,6 @@ void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes); /// from Src into IntVal, which is assumed to be wide enough and to hold zero. void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes); -/// Provide DenseMapInfo for APInt. -template <> struct DenseMapInfo { - static inline APInt getEmptyKey() { - APInt V(nullptr, 0); - V.U.VAL = 0; - return V; - } - - static inline APInt getTombstoneKey() { - APInt V(nullptr, 0); - V.U.VAL = 1; - return V; - } - - static unsigned getHashValue(const APInt &Key) { - return static_cast(hash_value(Key)); - } - - static bool isEqual(const APInt &LHS, const APInt &RHS) { - return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; - } -}; - } // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index b15c8f0dd6c35..418a007319679 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -97,7 +97,6 @@ #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" @@ -116,7 +115,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/TimeProfiler.h" @@ -3350,191 +3348,6 @@ struct AAValueConstantRange static const char ID; }; -/// A class for a set state. -/// The assumed boolean state indicates whether the corresponding set is full -/// set or not. If the assumed state is false, this is the worst state. The -/// worst state (invalid state) of set of potential values is when the set -/// contains every possible value (i.e. we cannot in any way limit the value -/// that the target position can take). That never happens naturally, we only -/// force it. As for the conditions under which we force it, see -/// AAPotentialValues. -template > -struct PotentialValuesState : BooleanState { - using SetTy = DenseSet; - - PotentialValuesState() : BooleanState(true) {} - - PotentialValuesState(bool IsValid) : BooleanState(IsValid) {} - - /// Return this set. We should check whether this set is valid or not by - /// isValidState() before calling this function. - const SetTy &getAssumedSet() const { - assert(isValidState() && "This set shoud not be used when it is invalid!"); - return Set; - } - - bool operator==(const PotentialValuesState &RHS) const { - if (isValidState() != RHS.isValidState()) - return false; - if (!isValidState() && !RHS.isValidState()) - return true; - return Set == RHS.getAssumedSet(); - } - - /// Maximum number of potential values to be tracked. - /// This is set by -attributor-max-potential-values command line option - static unsigned MaxPotentialValues; - - /// Return empty set as the best state of potential values. - static PotentialValuesState getBestState() { - return PotentialValuesState(true); - } - - static PotentialValuesState getBestState(PotentialValuesState &PVS) { - return getBestState(); - } - - /// Return full set as the worst state of potential values. - static PotentialValuesState getWorstState() { - return PotentialValuesState(false); - } - - /// Union assumed set with the passed value. - void unionAssumed(const MemberTy &C) { insert(C); } - - /// Union assumed set with assumed set of the passed state \p PVS. - void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); } - - /// "Clamp" this state with \p PVS. - PotentialValuesState operator^=(const PotentialValuesState &PVS) { - unionAssumed(PVS); - return *this; - } - - PotentialValuesState operator&=(const PotentialValuesState &PVS) { - unionAssumed(PVS); - return *this; - } - -private: - /// Check the size of this set, and invalidate when the size is no - /// less than \p MaxPotentialValues threshold. - void checkAndInvalidate() { - if (Set.size() >= MaxPotentialValues) - indicatePessimisticFixpoint(); - } - - /// Insert an element into this set. - void insert(const MemberTy &C) { - if (!isValidState()) - return; - Set.insert(C); - checkAndInvalidate(); - } - - /// Take union with R. - void unionWith(const PotentialValuesState &R) { - /// If this is a full set, do nothing.; - if (!isValidState()) - return; - /// If R is full set, change L to a full set. - if (!R.isValidState()) { - indicatePessimisticFixpoint(); - return; - } - for (const MemberTy &C : R.Set) - Set.insert(C); - checkAndInvalidate(); - } - - /// Take intersection with R. - void intersectWith(const PotentialValuesState &R) { - /// If R is a full set, do nothing. - if (!R.isValidState()) - return; - /// If this is a full set, change this to R. - if (!isValidState()) { - *this = R; - return; - } - SetTy IntersectSet; - for (const MemberTy &C : Set) { - if (R.Set.count(C)) - IntersectSet.insert(C); - } - Set = IntersectSet; - } - - /// Container for potential values - SetTy Set; -}; - -using PotentialConstantIntValuesState = PotentialValuesState; - -raw_ostream &operator<<(raw_ostream &OS, - const PotentialConstantIntValuesState &R); - -/// An abstract interface for potential values analysis. -/// -/// This AA collects potential values for each IR position. -/// An assumed set of potential values is initialized with the empty set (the -/// best state) and it will grow monotonically as we find more potential values -/// for this position. -/// The set might be forced to the worst state, that is, to contain every -/// possible value for this position in 2 cases. -/// 1. We surpassed the \p MaxPotentialValues threshold. This includes the -/// case that this position is affected (e.g. because of an operation) by a -/// Value that is in the worst state. -/// 2. We tried to initialize on a Value that we cannot handle (e.g. an -/// operator we do not currently handle). -/// -/// TODO: Support values other than constant integers. -struct AAPotentialValues - : public StateWrapper { - using Base = StateWrapper; - AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} - - /// See AbstractAttribute::getState(...). - PotentialConstantIntValuesState &getState() override { return *this; } - const PotentialConstantIntValuesState &getState() const override { - return *this; - } - - /// Create an abstract attribute view for the position \p IRP. - static AAPotentialValues &createForPosition(const IRPosition &IRP, - Attributor &A); - - /// Return assumed constant for the associated value - Optional - getAssumedConstantInt(Attributor &A, - const Instruction *CtxI = nullptr) const { - if (!isValidState()) - return nullptr; - if (getAssumedSet().size() == 1) - return cast(ConstantInt::get(getAssociatedValue().getType(), - *(getAssumedSet().begin()))); - if (getAssumedSet().size() == 0) - return llvm::None; - - return nullptr; - } - - /// See AbstractAttribute::getName() - const std::string getName() const override { return "AAPotentialValues"; } - - /// See AbstractAttribute::getIdAddr() - const char *getIdAddr() const override { return &ID; } - - /// This function should return true if the type of the \p AA is - /// AAPotentialValues - static bool classof(const AbstractAttribute *AA) { - return (AA->getIdAddr() == &ID); - } - - /// Unique ID (due to the unique address) - static const char ID; -}; - /// Run options, used by the pass manager. enum AttributorRunOption { NONE = 0, diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index e8fdaa23761c3..b97ac37c5fcfd 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -57,7 +57,27 @@ class Type; class Value; class ValueHandleBase; -using DenseMapAPIntKeyInfo = DenseMapInfo; +struct DenseMapAPIntKeyInfo { + static inline APInt getEmptyKey() { + APInt V(nullptr, 0); + V.U.VAL = 0; + return V; + } + + static inline APInt getTombstoneKey() { + APInt V(nullptr, 0); + V.U.VAL = 1; + return V; + } + + static unsigned getHashValue(const APInt &Key) { + return static_cast(hash_value(Key)); + } + + static bool isEqual(const APInt &LHS, const APInt &RHS) { + return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; + } +}; struct DenseMapAPFloatKeyInfo { static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); } diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 764d71b98e063..89971cc9c2946 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2079,19 +2079,6 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) { return OS; } -raw_ostream &llvm::operator<<(raw_ostream &OS, - const PotentialConstantIntValuesState &S) { - OS << "set-state(< {"; - if (!S.isValidState()) - OS << "full-set"; - else - for (auto &it : S.getAssumedSet()) - OS << it << ", "; - OS << "} >)"; - - return OS; -} - void AbstractAttribute::print(raw_ostream &OS) const { OS << "["; OS << getName(); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index a220bc001a4ce..aa6bc94a3668e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -45,16 +45,6 @@ static cl::opt ManifestInternal( static cl::opt MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), cl::Hidden); -static cl::opt MaxPotentialValues( - "attributor-max-potential-values", cl::Hidden, - cl::desc("Maximum number of potential values to be " - "tracked for each position."), - cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), - cl::init(7)); - -template <> -unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0; - STATISTIC(NumAAs, "Number of abstract attributes created"); // Some helper macros to deal with statistics tracking. @@ -130,7 +120,6 @@ PIPE_OPERATOR(AAMemoryLocation) PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAPrivatizablePtr) PIPE_OPERATOR(AAUndefinedBehavior) -PIPE_OPERATOR(AAPotentialValues) #undef PIPE_OPERATOR } // namespace llvm @@ -7086,155 +7075,6 @@ struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating { STATS_DECLTRACK_CSARG_ATTR(value_range) } }; - -/// ------------------ Potential Values Attribute ------------------------- - -struct AAPotentialValuesImpl : AAPotentialValues { - using StateType = PotentialConstantIntValuesState; - - AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A) - : AAPotentialValues(IRP, A) {} - - /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { - std::string Str; - llvm::raw_string_ostream OS(Str); - OS << getState(); - return OS.str(); - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - return indicatePessimisticFixpoint(); - } -}; - -struct AAPotentialValuesArgument final - : AAArgumentFromCallSiteArguments { - using Base = - AAArgumentFromCallSiteArguments; - AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A) - : Base(IRP, A) {} - - /// See AbstractAttribute::initialize(..). - void initialize(Attributor &A) override { - if (!getAnchorScope() || getAnchorScope()->isDeclaration()) { - indicatePessimisticFixpoint(); - } else { - Base::initialize(A); - } - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_ARG_ATTR(potential_values) - } -}; - -struct AAPotentialValuesReturned - : AAReturnedFromReturnedValues { - using Base = - AAReturnedFromReturnedValues; - AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A) - : Base(IRP, A) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FNRET_ATTR(potential_values) - } -}; - -struct AAPotentialValuesFloating : AAPotentialValuesImpl { - AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesImpl(IRP, A) {} - - /// See AbstractAttribute::initialize(..). - void initialize(Attributor &A) override { - Value &V = getAssociatedValue(); - - if (auto *C = dyn_cast(&V)) { - unionAssumed(C->getValue()); - indicateOptimisticFixpoint(); - return; - } - - if (isa(&V)) { - // Collapse the undef state to 0. - unionAssumed( - APInt(/* numBits */ getAssociatedType()->getIntegerBitWidth(), - /* val */ 0)); - indicateOptimisticFixpoint(); - return; - } - - if (isa(&V) || isa(&V) || isa(&V)) - return; - - if (isa(V) || isa(V)) - return; - - indicatePessimisticFixpoint(); - - LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: " - << getAssociatedValue() << "\n"); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FLOATING_ATTR(potential_values) - } -}; - -struct AAPotentialValuesFunction : AAPotentialValuesImpl { - AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesImpl(IRP, A) {} - - /// See AbstractAttribute::initialize(...). - ChangeStatus updateImpl(Attributor &A) override { - llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will " - "not be called"); - } - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_FN_ATTR(potential_values) - } -}; - -struct AAPotentialValuesCallSite : AAPotentialValuesFunction { - AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesFunction(IRP, A) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_CS_ATTR(potential_values) - } -}; - -struct AAPotentialValuesCallSiteReturned - : AACallSiteReturnedFromReturned { - AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A) - : AACallSiteReturnedFromReturned(IRP, A) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_CSRET_ATTR(potential_values) - } -}; - -struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { - AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A) - : AAPotentialValuesFloating(IRP, A) {} - - /// See AbstractAttribute::trackStatistics() - void trackStatistics() const override { - STATS_DECLTRACK_CSARG_ATTR(potential_values) - } -}; - } // namespace const char AAReturnedValues::ID = 0; @@ -7258,7 +7098,6 @@ const char AAPrivatizablePtr::ID = 0; const char AAMemoryBehavior::ID = 0; const char AAMemoryLocation::ID = 0; const char AAValueConstantRange::ID = 0; -const char AAPotentialValues::ID = 0; // Macro magic to create the static generator function for attributes that // follow the naming scheme. @@ -7368,7 +7207,6 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) -CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead) diff --git a/llvm/test/Transforms/Attributor/potential.ll b/llvm/test/Transforms/Attributor/potential.ll deleted file mode 100644 index a4fc23c8b700b..0000000000000 --- a/llvm/test/Transforms/Attributor/potential.ll +++ /dev/null @@ -1,394 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM -; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM -; -; Test for multiple potential values -; -; potential-test 1 -; bool iszero(int c) { return c == 0; } -; bool potential_test1(bool c) { return iszero(c ? 1 : -1); } - -define internal i1 @iszero1(i32 %c) { -; CHECK-LABEL: define {{[^@]+}}@iszero1 -; CHECK-SAME: (i32 [[C:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: ret i1 [[CMP]] -; - %cmp = icmp eq i32 %c, 0 - ret i1 %cmp -} - -define i1 @potential_test1(i1 %c) { -; CHECK-LABEL: define {{[^@]+}}@potential_test1 -; CHECK-SAME: (i1 [[C:%.*]]) -; CHECK-NEXT: [[ARG:%.*]] = select i1 [[C]], i32 -1, i32 1 -; CHECK-NEXT: [[RET:%.*]] = call i1 @iszero1(i32 [[ARG]]) -; CHECK-NEXT: ret i1 [[RET]] -; - %arg = select i1 %c, i32 -1, i32 1 - %ret = call i1 @iszero1(i32 %arg) - ret i1 %ret -} - - -; potential-test 2 -; -; potential values of argument of iszero are {1,-1} -; potential value of returned value of iszero is 0 -; -; int call_with_two_values(int x) { return iszero(x) + iszero(-x); } -; int potential_test2(int x) { return call_with_two_values(1) + call_with_two_values(-1); } - -define internal i32 @iszero2(i32 %c) { -; CHECK-LABEL: define {{[^@]+}}@iszero2 -; CHECK-SAME: (i32 [[C:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: [[RET:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: ret i32 [[RET]] -; - %cmp = icmp eq i32 %c, 0 - %ret = zext i1 %cmp to i32 - ret i32 %ret -} - -define internal i32 @call_with_two_values(i32 %c) { -; IS__TUNIT____-LABEL: define {{[^@]+}}@call_with_two_values -; IS__TUNIT____-SAME: (i32 [[C:%.*]]) -; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @iszero2(i32 [[C]]) #0, !range !0 -; IS__TUNIT____-NEXT: [[MINUSC:%.*]] = sub i32 0, [[C]] -; IS__TUNIT____-NEXT: [[CSRET2:%.*]] = call i32 @iszero2(i32 [[MINUSC]]) #0, !range !0 -; IS__TUNIT____-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] -; IS__TUNIT____-NEXT: ret i32 [[RET]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@call_with_two_values -; IS__CGSCC____-SAME: (i32 [[C:%.*]]) -; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @iszero2(i32 [[C]]) -; IS__CGSCC____-NEXT: [[MINUSC:%.*]] = sub i32 0, [[C]] -; IS__CGSCC____-NEXT: [[CSRET2:%.*]] = call i32 @iszero2(i32 [[MINUSC]]) -; IS__CGSCC____-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] -; IS__CGSCC____-NEXT: ret i32 [[RET]] -; - %csret1 = call i32 @iszero2(i32 %c) - %minusc = sub i32 0, %c - %csret2 = call i32 @iszero2(i32 %minusc) - %ret = add i32 %csret1, %csret2 - ret i32 %ret -} - -define i32 @potential_test2(i1 %c) { -; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test2 -; IS__TUNIT____-SAME: (i1 [[C:%.*]]) -; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @call_with_two_values(i32 1) #0, !range !1 -; IS__TUNIT____-NEXT: [[CSRET2:%.*]] = call i32 @call_with_two_values(i32 -1) #1, !range !1 -; IS__TUNIT____-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] -; IS__TUNIT____-NEXT: ret i32 [[RET]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test2 -; IS__CGSCC____-SAME: (i1 [[C:%.*]]) -; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @call_with_two_values(i32 1) -; IS__CGSCC____-NEXT: [[CSRET2:%.*]] = call i32 @call_with_two_values(i32 -1) -; IS__CGSCC____-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] -; IS__CGSCC____-NEXT: ret i32 [[RET]] -; - %csret1 = call i32 @call_with_two_values(i32 1) - %csret2 = call i32 @call_with_two_values(i32 -1) - %ret = add i32 %csret1, %csret2 - ret i32 %ret -} - - -; potential-test 3 -; -; potential values of returned value of f are {0,1} -; potential values of argument of g are {0,1} -; potential value of returned value of g is 1 -; then returned value of g can be simplified -; -; int zero_or_one(int c) { return c < 2; } -; int potential_test3() { return zero_or_one(iszero(0))+zero_or_one(iszero(1)); } - -define internal i32 @iszero3(i32 %c) { -; CHECK-LABEL: define {{[^@]+}}@iszero3 -; CHECK-SAME: (i32 [[C:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: [[RET:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: ret i32 [[RET]] -; - %cmp = icmp eq i32 %c, 0 - %ret = zext i1 %cmp to i32 - ret i32 %ret -} - -define internal i32 @less_than_two(i32 %c) { -; CHECK-LABEL: define {{[^@]+}}@less_than_two -; CHECK-SAME: (i32 [[C:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 2 -; CHECK-NEXT: [[RET:%.*]] = zext i1 [[CMP]] to i32 -; CHECK-NEXT: ret i32 [[RET]] -; - %cmp = icmp slt i32 %c, 2 - %ret = zext i1 %cmp to i32 - ret i32 %ret -} - -define i32 @potential_test3() { -; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@potential_test3() -; NOT_TUNIT_NPM-NEXT: [[CMP1:%.*]] = call i32 @iszero3(i32 0) -; NOT_TUNIT_NPM-NEXT: [[TRUE1:%.*]] = call i32 @less_than_two(i32 [[CMP1]]) -; NOT_TUNIT_NPM-NEXT: [[CMP2:%.*]] = call i32 @iszero3(i32 1) -; NOT_TUNIT_NPM-NEXT: [[TRUE2:%.*]] = call i32 @less_than_two(i32 [[CMP2]]) -; NOT_TUNIT_NPM-NEXT: [[RET:%.*]] = add i32 [[TRUE1]], [[TRUE2]] -; NOT_TUNIT_NPM-NEXT: ret i32 [[RET]] -; -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@potential_test3() -; IS__TUNIT_NPM-NEXT: [[CMP1:%.*]] = call i32 @iszero3(i32 0) #0, !range !0 -; IS__TUNIT_NPM-NEXT: [[TRUE1:%.*]] = call i32 @less_than_two(i32 [[CMP1]]) #0, !range !0 -; IS__TUNIT_NPM-NEXT: [[CMP2:%.*]] = call i32 @iszero3(i32 1) #0, !range !0 -; IS__TUNIT_NPM-NEXT: [[TRUE2:%.*]] = call i32 @less_than_two(i32 [[CMP2]]) #0, !range !0 -; IS__TUNIT_NPM-NEXT: [[RET:%.*]] = add i32 [[TRUE1]], [[TRUE2]] -; IS__TUNIT_NPM-NEXT: ret i32 [[RET]] -; - %cmp1 = call i32 @iszero3(i32 0) - %true1 = call i32 @less_than_two(i32 %cmp1) - %cmp2 = call i32 @iszero3(i32 1) - %true2 = call i32 @less_than_two(i32 %cmp2) - %ret = add i32 %true1, %true2 - ret i32 %ret -} - - -; potential-test 4,5 -; -; simplified -; int potential_test4(int c) { return return1or3(c) == 2; } -; int potential_test5(int c) { return return1or3(c) == return2or4(c); } -; -; not simplified -; int potential_test6(int c) { return return1or3(c) == 3; } -; int potential_test7(int c) { return return1or3(c) == return3or4(c); } - -define i32 @potential_test4(i32 %c) { -; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test4 -; IS__TUNIT____-SAME: (i32 [[C:%.*]]) -; IS__TUNIT____-NEXT: [[CSRET:%.*]] = call i32 @return1or3(i32 [[C]]) #0, !range !2 -; IS__TUNIT____-NEXT: [[FALSE:%.*]] = icmp eq i32 [[CSRET]], 2 -; IS__TUNIT____-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 -; IS__TUNIT____-NEXT: ret i32 [[RET]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test4 -; IS__CGSCC____-SAME: (i32 [[C:%.*]]) -; IS__CGSCC____-NEXT: [[CSRET:%.*]] = call i32 @return1or3(i32 [[C]]) -; IS__CGSCC____-NEXT: [[FALSE:%.*]] = icmp eq i32 [[CSRET]], 2 -; IS__CGSCC____-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 -; IS__CGSCC____-NEXT: ret i32 [[RET]] -; - %csret = call i32 @return1or3(i32 %c) - %false = icmp eq i32 %csret, 2 - %ret = zext i1 %false to i32 - ret i32 %ret -} - -define i32 @potential_test5(i32 %c) { -; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test5 -; IS__TUNIT____-SAME: (i32 [[C:%.*]]) -; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #0, !range !2 -; IS__TUNIT____-NEXT: [[CSRET2:%.*]] = call i32 @return2or4(i32 [[C]]) #0, !range !3 -; IS__TUNIT____-NEXT: [[FALSE:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] -; IS__TUNIT____-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 -; IS__TUNIT____-NEXT: ret i32 [[RET]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test5 -; IS__CGSCC____-SAME: (i32 [[C:%.*]]) -; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) -; IS__CGSCC____-NEXT: [[CSRET2:%.*]] = call i32 @return2or4(i32 [[C]]) -; IS__CGSCC____-NEXT: [[FALSE:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] -; IS__CGSCC____-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 -; IS__CGSCC____-NEXT: ret i32 [[RET]] -; - %csret1 = call i32 @return1or3(i32 %c) - %csret2 = call i32 @return2or4(i32 %c) - %false = icmp eq i32 %csret1, %csret2 - %ret = zext i1 %false to i32 - ret i32 %ret -} - -define i1 @potential_test6(i32 %c) { -; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test6 -; IS__TUNIT____-SAME: (i32 [[C:%.*]]) -; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #0, !range !2 -; IS__TUNIT____-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], 3 -; IS__TUNIT____-NEXT: ret i1 [[RET]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test6 -; IS__CGSCC____-SAME: (i32 [[C:%.*]]) -; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) -; IS__CGSCC____-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], 3 -; IS__CGSCC____-NEXT: ret i1 [[RET]] -; - %csret1 = call i32 @return1or3(i32 %c) - %ret = icmp eq i32 %csret1, 3 - ret i1 %ret -} - -define i1 @potential_test7(i32 %c) { -; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test7 -; IS__TUNIT____-SAME: (i32 [[C:%.*]]) -; IS__TUNIT____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #0, !range !2 -; IS__TUNIT____-NEXT: [[CSRET2:%.*]] = call i32 @return3or4(i32 [[C]]) #0, !range !4 -; IS__TUNIT____-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] -; IS__TUNIT____-NEXT: ret i1 [[RET]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@potential_test7 -; IS__CGSCC____-SAME: (i32 [[C:%.*]]) -; IS__CGSCC____-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) -; IS__CGSCC____-NEXT: [[CSRET2:%.*]] = call i32 @return3or4(i32 [[C]]) -; IS__CGSCC____-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] -; IS__CGSCC____-NEXT: ret i1 [[RET]] -; - %csret1 = call i32 @return1or3(i32 %c) - %csret2 = call i32 @return3or4(i32 %c) - %ret = icmp eq i32 %csret1, %csret2 - ret i1 %ret -} - -define internal i32 @return1or3(i32 %c) { -; CHECK-LABEL: define {{[^@]+}}@return1or3 -; CHECK-SAME: (i32 [[C:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: [[RET:%.*]] = select i1 [[CMP]], i32 1, i32 3 -; CHECK-NEXT: ret i32 [[RET]] -; - %cmp = icmp eq i32 %c, 0 - %ret = select i1 %cmp, i32 1, i32 3 - ret i32 %ret -} - -define internal i32 @return2or4(i32 %c) { -; CHECK-LABEL: define {{[^@]+}}@return2or4 -; CHECK-SAME: (i32 [[C:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: [[RET:%.*]] = select i1 [[CMP]], i32 2, i32 4 -; CHECK-NEXT: ret i32 [[RET]] -; - %cmp = icmp eq i32 %c, 0 - %ret = select i1 %cmp, i32 2, i32 4 - ret i32 %ret -} - -define internal i32 @return3or4(i32 %c) { -; CHECK-LABEL: define {{[^@]+}}@return3or4 -; CHECK-SAME: (i32 [[C:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: [[RET:%.*]] = select i1 [[CMP]], i32 3, i32 4 -; CHECK-NEXT: ret i32 [[RET]] -; - %cmp = icmp eq i32 %c, 0 - %ret = select i1 %cmp, i32 3, i32 4 - ret i32 %ret -} - -; potential-test 8 -; -; propagate argument to callsite argument - -define internal i1 @cmp_with_four(i32 %c) { -; CHECK-LABEL: define {{[^@]+}}@cmp_with_four -; CHECK-SAME: (i32 [[C:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 4 -; CHECK-NEXT: ret i1 [[CMP]] -; - %cmp = icmp eq i32 %c, 4 - ret i1 %cmp -} - -define internal i1 @wrapper(i32 %c) { -; CHECK-LABEL: define {{[^@]+}}@wrapper -; CHECK-SAME: (i32 [[C:%.*]]) -; CHECK-NEXT: [[RET:%.*]] = call i1 @cmp_with_four(i32 [[C]]) -; CHECK-NEXT: ret i1 [[RET]] -; - %ret = call i1 @cmp_with_four(i32 %c) - ret i1 %ret -} - -define i1 @potential_test8() { -; CHECK-LABEL: define {{[^@]+}}@potential_test8() -; CHECK-NEXT: [[RES1:%.*]] = call i1 @wrapper(i32 1) -; CHECK-NEXT: [[RES3:%.*]] = call i1 @wrapper(i32 3) -; CHECK-NEXT: [[RES5:%.*]] = call i1 @wrapper(i32 5) -; CHECK-NEXT: [[RES13:%.*]] = or i1 [[RES1]], [[RES3]] -; CHECK-NEXT: [[RES135:%.*]] = or i1 [[RES13]], [[RES5]] -; CHECK-NEXT: ret i1 [[RES135]] -; - %res1 = call i1 @wrapper(i32 1) - %res3 = call i1 @wrapper(i32 3) - %res5 = call i1 @wrapper(i32 5) - %res13 = or i1 %res1, %res3 - %res135 = or i1 %res13, %res5 - ret i1 %res135 -} - -define i1 @potential_test9() { -; IS________OPM-LABEL: define {{[^@]+}}@potential_test9() -; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: br label [[COND:%.*]] -; IS________OPM: cond: -; IS________OPM-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_1:%.*]], [[INC:%.*]] ] -; IS________OPM-NEXT: [[C_0:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[C_1:%.*]], [[INC]] ] -; IS________OPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 10 -; IS________OPM-NEXT: br i1 [[CMP]], label [[BODY:%.*]], label [[END:%.*]] -; IS________OPM: body: -; IS________OPM-NEXT: [[C_1]] = mul i32 [[C_0]], -1 -; IS________OPM-NEXT: br label [[INC]] -; IS________OPM: inc: -; IS________OPM-NEXT: [[I_1]] = add i32 [[I_0]], 1 -; IS________OPM-NEXT: br label [[COND]] -; IS________OPM: end: -; IS________OPM-NEXT: [[RET:%.*]] = icmp eq i32 [[C_0]], 0 -; IS________OPM-NEXT: ret i1 [[RET]] -; -; IS________NPM-LABEL: define {{[^@]+}}@potential_test9() -; IS________NPM-NEXT: entry: -; IS________NPM-NEXT: br label [[COND:%.*]] -; IS________NPM: cond: -; IS________NPM-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_1:%.*]], [[INC:%.*]] ] -; IS________NPM-NEXT: [[C_0:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[C_1:%.*]], [[INC]] ] -; IS________NPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 10 -; IS________NPM-NEXT: br i1 [[CMP]], label [[BODY:%.*]], label [[END:%.*]] -; IS________NPM: body: -; IS________NPM-NEXT: [[C_1]] = mul i32 [[C_0]], -1 -; IS________NPM-NEXT: br label [[INC]] -; IS________NPM: inc: -; IS________NPM-NEXT: [[I_1]] = add i32 [[I_0]], 1 -; IS________NPM-NEXT: br label [[COND]] -; IS________NPM: end: -; IS________NPM-NEXT: ret i1 false -; -entry: - br label %cond -cond: - %i.0 = phi i32 [0, %entry], [%i.1, %inc] - %c.0 = phi i32 [1, %entry], [%c.1, %inc] - %cmp = icmp slt i32 %i.0, 10 - br i1 %cmp, label %body, label %end -body: - %c.1 = mul i32 %c.0, -1 - br label %inc -inc: - %i.1 = add i32 %i.0, 1 - br label %cond -end: - %ret = icmp eq i32 %c.0, 0 - ret i1 %ret -} - -; IS__TUNIT____: !0 = !{i32 0, i32 2} -; IS__TUNIT____: !1 = !{i32 0, i32 3} -; IS__TUNIT____: !2 = !{i32 1, i32 4} -; IS__TUNIT____: !3 = !{i32 2, i32 5} -; IS__TUNIT____: !4 = !{i32 3, i32 5} -; IS__TUNIT____-NOT: !5 - -; NOT_TUNIT____-NOT: !0 From 90dab1aece7100ace855321162c0d2a09b31c1b3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 14:57:39 +0100 Subject: [PATCH 123/600] Remove unused param tag to fix Wdocumentation warning. NFC. --- clang/lib/Parse/ParseOpenMP.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 8a92a8259c444..f24cb3d3f0b7d 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3191,7 +3191,6 @@ static void parseMapType(Parser &P, Parser::OpenMPVarListDataTy &Data) { /// Parses simple expression in parens for single-expression clauses of OpenMP /// constructs. -/// \param RLoc Returned location of right paren. ExprResult Parser::ParseOpenMPIteratorsExpr() { assert(Tok.is(tok::identifier) && PP.getSpelling(Tok) == "iterator" && "Expected 'iterator' token."); From d14a22da5e437dfbf5fe96a6586cec2153f36861 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 15:03:04 +0100 Subject: [PATCH 124/600] [DAG] TargetLowering::LowerAsmOutputForConstraint - pass SDLoc as const& Try to be more consistent with the SDLoc param in the TargetLowering methods. --- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++-- llvm/lib/Target/X86/X86ISelLowering.h | 3 ++- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 79ef2d06d38f4..eae39f319d89f 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4182,7 +4182,7 @@ class TargetLowering : public TargetLoweringBase { // Lower custom output constraints. If invalid, return SDValue(). virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, - SDLoc DL, + const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4c7c46218c785..5e26ff354b674 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4322,8 +4322,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const { } SDValue TargetLowering::LowerAsmOutputForConstraint( - SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo, - SelectionDAG &DAG) const { + SDValue &Chain, SDValue &Flag, const SDLoc &DL, + const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { return SDValue(); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 15f283b1bc42a..93852e13b7d08 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -49594,8 +49594,8 @@ LowerXConstraint(EVT ConstraintVT) const { // Lower @cc targets via setcc. SDValue X86TargetLowering::LowerAsmOutputForConstraint( - SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo, - SelectionDAG &DAG) const { + SDValue &Chain, SDValue &Flag, const SDLoc &DL, + const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { X86::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode); if (Cond == X86::COND_INVALID) return SDValue(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 2c22a62fb506f..55611b6bfc9f8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1116,7 +1116,8 @@ namespace llvm { } /// Handle Lowering flag assembly outputs. - SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL, + SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, + const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override; From b8ffbf0e02e24d7be3017c8ee5f17dab9e39719f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 15:31:36 +0100 Subject: [PATCH 125/600] [DAG] TargetLowering::expandMUL_LOHI - pass SDLoc as const& Try to be more consistent with the SDLoc param in the TargetLowering methods. This also exposes an issue where we were passing a SDNode as a SDLoc, relying on the implicit SDLoc(SDNode) constructor. --- llvm/include/llvm/CodeGen/TargetLowering.h | 2 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index eae39f319d89f..f93b11bd1c2cb 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4263,7 +4263,7 @@ class TargetLowering : public TargetLoweringBase { /// \param RL Low bits of the RHS of the MUL. See LL for meaning /// \param RH High bits of the RHS of the MUL. See LL for meaning. /// \returns true if the node has been expanded, false if it has not - bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS, + bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL = SDValue(), SDValue LH = SDValue(), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ce185be162bcd..87e9a9e280d5c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3398,7 +3398,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector Halves; EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext()); assert(TLI.isTypeLegal(HalfType)); - if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves, + if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, dl, LHS, RHS, Halves, HalfType, DAG, TargetLowering::MulExpansionKind::Always)) { for (unsigned i = 0; i < 2; ++i) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5e26ff354b674..c7829cdd9cdbd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5928,7 +5928,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // Legalization Utilities //===----------------------------------------------------------------------===// -bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, +bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl &Result, EVT HiLoVT, SelectionDAG &DAG, @@ -6116,7 +6116,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SDValue LL, SDValue LH, SDValue RL, SDValue RH) const { SmallVector Result; - bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N, + bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N), N->getOperand(0), N->getOperand(1), Result, HiLoVT, DAG, Kind, LL, LH, RL, RH); if (Ok) { From 7dd4f03595d3687cab555d538c82a41f1c1043ce Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 15:16:00 +0100 Subject: [PATCH 126/600] Use merge null and isa<> tests into isa_and_nonnull<>. NFCI. --- llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index aea355a297318..c9e092e5deca7 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -377,7 +377,7 @@ static Value *simplifyX86varShift(const IntrinsicInst &II, SmallVector ShiftAmts; for (int I = 0; I < NumElts; ++I) { auto *CElt = CShift->getAggregateElement(I); - if (CElt && isa(CElt)) { + if (isa_and_nonnull(CElt)) { ShiftAmts.push_back(-1); continue; } From 00d0f354f26dc725ee1ce756df383557eeb44c65 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 15:21:52 +0100 Subject: [PATCH 127/600] X86InstrInfo.cpp - fix include ordering. NFCI. --- llvm/lib/Target/X86/X86InstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 42c1111735701..ae838274f2e68 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -28,9 +28,9 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" From 212570abcf755b8577a7aec80777503232d36d77 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 15 Jun 2020 11:54:49 -0400 Subject: [PATCH 128/600] GlobalISel: Implement bitcast action for G_EXTRACT_VECTOR_ELEMENT For AMDGPU, vectors with elements < 32 bits should be indexed in 32-bit elements and the desired bits extracted from there. For elements > 64-bits, these should be reduce to 64/32 elements to enable the normal dynamic indexing paths. In the dynamic index cases, this produces shorter code most of the time. This does immediately regress the constant index cases, but this should be fixed once we have the most basic of shift combines. The element size > 64 case is pretty much ported from the exisiting DAG implementation for extract element promote. The increasing element size case is new. --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 4 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 118 + .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 44 +- .../AMDGPU/GlobalISel/extractelement.i128.ll | 769 ++++ .../AMDGPU/GlobalISel/extractelement.i16.ll | 802 +++++ .../AMDGPU/GlobalISel/extractelement.i8.ll | 3135 +++++++++++++++++ .../legalize-extract-vector-elt.mir | 1114 +++++- .../legalize-llvm.amdgcn.image.dim.a16.ll | 952 +++-- .../GlobalISel/legalize-shuffle-vector.mir | 34 +- .../legalize-shuffle-vector.s16.mir | 371 +- 10 files changed, 6776 insertions(+), 567 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index d925c53a57502..e819dca5bdf0f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -307,6 +307,10 @@ class LegalizerHelper { LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + /// Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT. + LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy); + LegalizeResult lowerBitcast(MachineInstr &MI); LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index abb983dac6bdc..920c9e008012e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2330,6 +2330,122 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { return UnableToLegalize; } +/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this +/// is casting to a vector with a smaller element size, perform multiple element +/// extracts and merge the results. If this is coercing to a vector with larger +/// elements, index the bitcasted vector and extract the target element with bit +/// operations. This is intended to force the indexing in the native register +/// size for architectures that can dynamically index the register file. +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register Idx = MI.getOperand(2).getReg(); + LLT SrcVecTy = MRI.getType(SrcVec); + LLT IdxTy = MRI.getType(Idx); + + LLT SrcEltTy = SrcVecTy.getElementType(); + unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1; + unsigned OldNumElts = SrcVecTy.getNumElements(); + + LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy; + Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0); + + const unsigned NewEltSize = NewEltTy.getSizeInBits(); + const unsigned OldEltSize = SrcEltTy.getSizeInBits(); + if (NewNumElts > OldNumElts) { + // Decreasing the vector element size + // + // e.g. i64 = extract_vector_elt x:v2i64, y:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // + // i64 = bitcast + // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), + // (i32 (extract_vector_elt castx, (2 * y + 1))) + // + if (NewNumElts % OldNumElts != 0) + return UnableToLegalize; + + // Type of the intermediate result vector. + const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts; + LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy); + + auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt); + + SmallVector NewOps(NewEltsPerOldElt); + auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK); + + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I); + auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset); + auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx); + NewOps[I] = Elt.getReg(0); + } + + auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps); + MIRBuilder.buildBitcast(Dst, NewVec); + MI.eraseFromParent(); + return Legalized; + } + + if (NewNumElts < OldNumElts) { + if (NewEltSize % OldEltSize != 0) + return UnableToLegalize; + + // This only depends on powers of 2 because we use bit tricks to figure out + // the bit offset we need to shift to get the target element. A general + // expansion could emit division/multiply. + if (!isPowerOf2_32(NewEltSize / OldEltSize)) + return UnableToLegalize; + + // Increasing the vector element size. + // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(), %idx + // + // => + // + // %cast = G_BITCAST %vec + // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize) + // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx + // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize)) + // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize) + // %elt_bits = G_LSHR %wide_elt, %offset_bits + // %elt = G_TRUNC %elt_bits + + const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize); + auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio); + + // Divide to get the index in the wider element type. + auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio); + + Register WideElt = CastVec; + if (CastTy.isVector()) { + WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, + ScaledIdx).getReg(0); + } + + // Now figure out the amount we need to shift to get the target bits. + auto OffsetMask = MIRBuilder.buildConstant( + IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio)); + auto OffsetIdx = MIRBuilder.buildAnd(IdxTy, Idx, OffsetMask); + auto OffsetBits = MIRBuilder.buildShl( + IdxTy, OffsetIdx, + MIRBuilder.buildConstant(IdxTy, Log2_32(OldEltSize))); + + // Shift the wide element to get the target element. + auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits); + MIRBuilder.buildTrunc(Dst, ExtractedBits); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + LegalizerHelper::LegalizeResult LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { switch (MI.getOpcode()) { @@ -2378,6 +2494,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + return bitcastExtractVectorElt(MI, TypeIdx, CastTy); default: return UnableToLegalize; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index c5d5f1675bc8d..cc97e11707ab1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -70,6 +70,13 @@ static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { }; } +static LegalityPredicate sizeIsMultipleOf32(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + return Ty.getSizeInBits() % 32 == 0; + }; +} + static LegalityPredicate isWideVec16(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; @@ -132,6 +139,15 @@ static LegalizeMutation bitcastToRegisterType(unsigned TypeIdx) { }; } +static LegalizeMutation bitcastToVectorElement32(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + unsigned Size = Ty.getSizeInBits(); + assert(Size % 32 == 0); + return std::make_pair(TypeIdx, LLT::scalarOrVector(Size / 32, 32)); + }; +} + static LegalityPredicate vectorSmallerThan(unsigned TypeIdx, unsigned Size) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; @@ -1279,11 +1295,29 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT EltTy = Query.Types[EltTypeIdx]; const LLT VecTy = Query.Types[VecTypeIdx]; const LLT IdxTy = Query.Types[IdxTypeIdx]; - return (EltTy.getSizeInBits() == 16 || - EltTy.getSizeInBits() % 32 == 0) && - VecTy.getSizeInBits() % 32 == 0 && - VecTy.getSizeInBits() <= MaxRegisterSize && - IdxTy.getSizeInBits() == 32; + const unsigned EltSize = EltTy.getSizeInBits(); + return (EltSize == 32 || EltSize == 64) && + VecTy.getSizeInBits() % 32 == 0 && + VecTy.getSizeInBits() <= MaxRegisterSize && + IdxTy.getSizeInBits() == 32; + }) + .bitcastIf(all(sizeIsMultipleOf32(1), scalarOrEltNarrowerThan(1, 32)), + bitcastToVectorElement32(1)) + //.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1)) + .bitcastIf( + all(sizeIsMultipleOf32(1), scalarOrEltWiderThan(1, 64)), + [=](const LegalityQuery &Query) { + // For > 64-bit element types, try to turn this into a 64-bit + // element vector since we may be able to do better indexing + // if this is scalar. If not, fall back to 32. + const LLT EltTy = Query.Types[EltTypeIdx]; + const LLT VecTy = Query.Types[VecTypeIdx]; + const unsigned DstEltSize = EltTy.getSizeInBits(); + const unsigned VecSize = VecTy.getSizeInBits(); + + const unsigned TargetEltSize = DstEltSize % 64 == 0 ? 64 : 32; + return std::make_pair( + VecTypeIdx, LLT::vector(VecSize / TargetEltSize, TargetEltSize)); }) .clampScalar(EltTypeIdx, S32, S64) .clampScalar(VecTypeIdx, S32, S64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll new file mode 100644 index 0000000000000..28c0651b10fd2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll @@ -0,0 +1,769 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s + +define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(<4 x i128> addrspace(4)* inreg %ptr, i32 inreg %idx) { +; GFX9-LABEL: extractelement_sgpr_v4i128_sgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0 +; GFX9-NEXT: s_lshl_b32 m0, s4, 1 +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_movrels_b64 s[0:1], s[8:9] +; GFX9-NEXT: s_movrels_b64 s[2:3], s[10:11] +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0 +; GFX8-NEXT: s_lshl_b32 m0, s4, 1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_movrels_b64 s[0:1], s[8:9] +; GFX8-NEXT: s_movrels_b64 s[2:3], s[10:11] +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0 +; GFX7-NEXT: s_lshl_b32 m0, s4, 1 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_movrels_b64 s[0:1], s[8:9] +; GFX7-NEXT: s_movrels_b64 s[2:3], s[10:11] +; GFX7-NEXT: ; return to shader part epilog + %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %element = extractelement <4 x i128> %vector, i32 %idx + ret i128 %element +} + +define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(<4 x i128> addrspace(1)* %ptr, i32 inreg %idx) { +; GFX9-LABEL: extractelement_vgpr_v4i128_sgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:16 +; GFX9-NEXT: global_load_dwordx4 v[10:13], v[0:1], off offset:32 +; GFX9-NEXT: global_load_dwordx4 v[14:17], v[0:1], off offset:48 +; GFX9-NEXT: s_lshl_b32 s0, s2, 1 +; GFX9-NEXT: s_lshl_b32 s0, s0, 1 +; GFX9-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-NEXT: v_mov_b32_e32 v18, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, v3 +; GFX9-NEXT: s_set_gpr_idx_off +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_readfirstlane_b32 s2, v18 +; GFX9-NEXT: v_readfirstlane_b32 s3, v3 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_vgpr_v4i128_sgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: v_add_u32_e32 v6, vcc, 16, v0 +; GFX8-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc +; GFX8-NEXT: v_add_u32_e32 v10, vcc, 32, v0 +; GFX8-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc +; GFX8-NEXT: flat_load_dwordx4 v[2:5], v[0:1] +; GFX8-NEXT: flat_load_dwordx4 v[6:9], v[6:7] +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-NEXT: flat_load_dwordx4 v[10:13], v[10:11] +; GFX8-NEXT: flat_load_dwordx4 v[14:17], v[0:1] +; GFX8-NEXT: s_lshl_b32 s0, s2, 1 +; GFX8-NEXT: s_lshl_b32 m0, s0, 1 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_movrels_b32_e32 v1, v3 +; GFX8-NEXT: v_movrels_b32_e32 v0, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, v1 +; GFX8-NEXT: v_mov_b32_e32 v2, v0 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_readfirstlane_b32 s2, v2 +; GFX8-NEXT: v_readfirstlane_b32 s3, v3 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_vgpr_v4i128_sgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-NEXT: buffer_load_dwordx4 v[10:13], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-NEXT: buffer_load_dwordx4 v[14:17], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-NEXT: s_lshl_b32 s0, s2, 1 +; GFX7-NEXT: s_lshl_b32 m0, s0, 1 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_movrels_b32_e32 v1, v3 +; GFX7-NEXT: v_movrels_b32_e32 v0, v2 +; GFX7-NEXT: v_mov_b32_e32 v3, v1 +; GFX7-NEXT: v_mov_b32_e32 v2, v0 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7-NEXT: v_readfirstlane_b32 s2, v2 +; GFX7-NEXT: v_readfirstlane_b32 s3, v3 +; GFX7-NEXT: ; return to shader part epilog + %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %element = extractelement <4 x i128> %vector, i32 %idx + ret i128 %element +} + +define i128 @extractelement_vgpr_v4i128_vgpr_idx(<4 x i128> addrspace(1)* %ptr, i32 %idx) { +; GFX9-LABEL: extractelement_vgpr_v4i128_vgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_lshlrev_b32_e32 v16, 1, v2 +; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off +; GFX9-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:16 +; GFX9-NEXT: v_add_u32_e32 v17, 1, v16 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v16 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v17 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 6, v16 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[8:9], 7, v16 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_cndmask_b32_e64 v10, v2, v4, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v11, v3, v5, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_cndmask_b32_e32 v4, v10, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, v11, v7, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v17 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v17 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc +; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 7, v17 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v9, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 4, v17 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 5, v17 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 6, v17 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v12, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v13, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v14, s[8:9] +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[8:9] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v14, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i128_vgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v3, vcc, 16, v0 +; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v1, vcc +; GFX8-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[3:4] +; GFX8-NEXT: v_lshlrev_b32_e32 v16, 1, v2 +; GFX8-NEXT: v_add_u32_e32 v17, vcc, 1, v16 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v16 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v17 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[6:7], 6, v16 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[8:9], 7, v16 +; GFX8-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; GFX8-NEXT: v_cndmask_b32_e64 v2, v8, v10, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v9, v11, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v17 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 +; GFX8-NEXT: v_cndmask_b32_e32 v18, v2, v6, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v19, v3, v7, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v17 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, 32, v0 +; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-NEXT: flat_load_dwordx4 v[8:11], v[2:3] +; GFX8-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 7, v17 +; GFX8-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; GFX8-NEXT: v_cndmask_b32_e32 v0, v18, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v19, v9, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 4, v17 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v5, v9, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 5, v17 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 6, v17 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v12, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v13, s[6:7] +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v14, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[8:9] +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v14, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i128_vgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s10, 0 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 +; GFX7-NEXT: v_lshlrev_b32_e32 v16, 1, v2 +; GFX7-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[8:11], 0 addr64 +; GFX7-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[8:11], 0 addr64 offset:16 +; GFX7-NEXT: v_add_i32_e32 v17, vcc, 1, v16 +; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v16 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v17 +; GFX7-NEXT: v_cmp_eq_u32_e64 s[6:7], 6, v16 +; GFX7-NEXT: s_waitcnt vmcnt(1) +; GFX7-NEXT: v_cndmask_b32_e64 v10, v2, v4, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v11, v3, v5, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v16 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_cndmask_b32_e32 v4, v10, v6, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v5, v11, v7, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v17 +; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v16 +; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v17 +; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc +; GFX7-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[8:11], 0 addr64 offset:32 +; GFX7-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[8:11], 0 addr64 offset:48 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 4, v16 +; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 7, v17 +; GFX7-NEXT: v_cmp_eq_u32_e64 s[8:9], 7, v16 +; GFX7-NEXT: s_waitcnt vmcnt(1) +; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v8, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v9, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 4, v17 +; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 5, v16 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 5, v17 +; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 6, v17 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v12, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v12, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v13, s[6:7] +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v14, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[8:9] +; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v14, s[4:5] +; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %element = extractelement <4 x i128> %vector, i32 %idx + ret i128 %element +} + +define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(<4 x i128> addrspace(4)* inreg %ptr, i32 %idx) { +; GFX9-LABEL: extractelement_sgpr_v4i128_vgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_mov_b32_e32 v4, s3 +; GFX9-NEXT: v_mov_b32_e32 v5, s4 +; GFX9-NEXT: v_mov_b32_e32 v6, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX9-NEXT: v_mov_b32_e32 v7, s6 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2 +; GFX9-NEXT: v_mov_b32_e32 v8, s7 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, s8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 4, v2 +; GFX9-NEXT: v_mov_b32_e32 v10, s9 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX9-NEXT: v_mov_b32_e32 v11, s10 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 5, v2 +; GFX9-NEXT: v_mov_b32_e32 v12, s11 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc +; GFX9-NEXT: v_mov_b32_e32 v13, s12 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 6, v2 +; GFX9-NEXT: v_mov_b32_e32 v14, s13 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v14, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 7, v2 +; GFX9-NEXT: v_mov_b32_e32 v15, s14 +; GFX9-NEXT: v_mov_b32_e32 v16, s15 +; GFX9-NEXT: v_add_u32_e32 v2, 1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v16, vcc +; GFX9-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NEXT: v_mov_b32_e32 v5, s2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GFX9-NEXT: v_mov_b32_e32 v6, s3 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX9-NEXT: v_mov_b32_e32 v7, s4 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2 +; GFX9-NEXT: v_mov_b32_e32 v8, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GFX9-NEXT: v_mov_b32_e32 v9, s6 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2 +; GFX9-NEXT: v_mov_b32_e32 v10, s7 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GFX9-NEXT: v_mov_b32_e32 v11, s8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 4, v2 +; GFX9-NEXT: v_mov_b32_e32 v12, s9 +; GFX9-NEXT: v_mov_b32_e32 v13, s10 +; GFX9-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 5, v2 +; GFX9-NEXT: v_mov_b32_e32 v6, s12 +; GFX9-NEXT: v_mov_b32_e32 v7, s13 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 6, v2 +; GFX9-NEXT: v_mov_b32_e32 v8, s14 +; GFX9-NEXT: v_mov_b32_e32 v9, s15 +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 7, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v9, vcc +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s1, v1 +; GFX9-NEXT: v_readfirstlane_b32 s2, v2 +; GFX9-NEXT: v_readfirstlane_b32 s3, v3 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_sgpr_v4i128_vgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 +; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v3, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s2 +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: v_mov_b32_e32 v4, s3 +; GFX8-NEXT: v_mov_b32_e32 v5, s4 +; GFX8-NEXT: v_mov_b32_e32 v6, s5 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX8-NEXT: v_mov_b32_e32 v7, s6 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2 +; GFX8-NEXT: v_mov_b32_e32 v8, s7 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; GFX8-NEXT: v_mov_b32_e32 v9, s8 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 4, v2 +; GFX8-NEXT: v_mov_b32_e32 v10, s9 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX8-NEXT: v_mov_b32_e32 v11, s10 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 5, v2 +; GFX8-NEXT: v_mov_b32_e32 v12, s11 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc +; GFX8-NEXT: v_mov_b32_e32 v13, s12 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 6, v2 +; GFX8-NEXT: v_mov_b32_e32 v14, s13 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v14, vcc +; GFX8-NEXT: v_mov_b32_e32 v15, s14 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 7, v2 +; GFX8-NEXT: v_mov_b32_e32 v16, s15 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v16, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v2 +; GFX8-NEXT: v_mov_b32_e32 v4, s1 +; GFX8-NEXT: v_mov_b32_e32 v5, s2 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GFX8-NEXT: v_mov_b32_e32 v6, s3 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX8-NEXT: v_mov_b32_e32 v7, s4 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2 +; GFX8-NEXT: v_mov_b32_e32 v8, s5 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GFX8-NEXT: v_mov_b32_e32 v9, s6 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2 +; GFX8-NEXT: v_mov_b32_e32 v10, s7 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GFX8-NEXT: v_mov_b32_e32 v11, s8 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 4, v2 +; GFX8-NEXT: v_mov_b32_e32 v12, s9 +; GFX8-NEXT: v_mov_b32_e32 v13, s10 +; GFX8-NEXT: v_mov_b32_e32 v5, s11 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 5, v2 +; GFX8-NEXT: v_mov_b32_e32 v6, s12 +; GFX8-NEXT: v_mov_b32_e32 v7, s13 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 6, v2 +; GFX8-NEXT: v_mov_b32_e32 v8, s14 +; GFX8-NEXT: v_mov_b32_e32 v9, s15 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 7, v2 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v8, vcc +; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v9, vcc +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s1, v1 +; GFX8-NEXT: v_readfirstlane_b32 s2, v2 +; GFX8-NEXT: v_readfirstlane_b32 s3, v3 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_sgpr_v4i128_vgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 1, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v3, s0 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: v_mov_b32_e32 v0, s1 +; GFX7-NEXT: v_mov_b32_e32 v4, s3 +; GFX7-NEXT: v_mov_b32_e32 v5, s4 +; GFX7-NEXT: v_mov_b32_e32 v6, s5 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX7-NEXT: v_mov_b32_e32 v7, s6 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2 +; GFX7-NEXT: v_mov_b32_e32 v8, s7 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, s8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 4, v2 +; GFX7-NEXT: v_mov_b32_e32 v10, s9 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc +; GFX7-NEXT: v_mov_b32_e32 v11, s10 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 5, v2 +; GFX7-NEXT: v_mov_b32_e32 v12, s11 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc +; GFX7-NEXT: v_mov_b32_e32 v13, s12 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 6, v2 +; GFX7-NEXT: v_mov_b32_e32 v14, s13 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v13, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v4, v0, v14, vcc +; GFX7-NEXT: v_mov_b32_e32 v15, s14 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 7, v2 +; GFX7-NEXT: v_mov_b32_e32 v16, s15 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v15, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v16, vcc +; GFX7-NEXT: v_add_i32_e32 v2, vcc, 1, v2 +; GFX7-NEXT: v_mov_b32_e32 v4, s1 +; GFX7-NEXT: v_mov_b32_e32 v5, s2 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GFX7-NEXT: v_mov_b32_e32 v6, s3 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GFX7-NEXT: v_mov_b32_e32 v7, s4 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v2 +; GFX7-NEXT: v_mov_b32_e32 v8, s5 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GFX7-NEXT: v_mov_b32_e32 v9, s6 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v2 +; GFX7-NEXT: v_mov_b32_e32 v10, s7 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc +; GFX7-NEXT: v_mov_b32_e32 v11, s8 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 4, v2 +; GFX7-NEXT: v_mov_b32_e32 v12, s9 +; GFX7-NEXT: v_mov_b32_e32 v13, s10 +; GFX7-NEXT: v_mov_b32_e32 v5, s11 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 5, v2 +; GFX7-NEXT: v_mov_b32_e32 v6, s12 +; GFX7-NEXT: v_mov_b32_e32 v7, s13 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v13, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 6, v2 +; GFX7-NEXT: v_mov_b32_e32 v8, s14 +; GFX7-NEXT: v_mov_b32_e32 v9, s15 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 7, v2 +; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v8, vcc +; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v9, vcc +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s1, v1 +; GFX7-NEXT: v_readfirstlane_b32 s2, v2 +; GFX7-NEXT: v_readfirstlane_b32 s3, v3 +; GFX7-NEXT: ; return to shader part epilog + %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %element = extractelement <4 x i128> %vector, i32 %idx + ret i128 %element +} + +define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(<4 x i128> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i128_idx0: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %element = extractelement <4 x i128> %vector, i32 0 + ret i128 %element +} + +define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(<4 x i128> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i128_idx1: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s0, s4 +; GCN-NEXT: s_mov_b32 s1, s5 +; GCN-NEXT: s_mov_b32 s2, s6 +; GCN-NEXT: s_mov_b32 s3, s7 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %element = extractelement <4 x i128> %vector, i32 1 + ret i128 %element +} + +define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(<4 x i128> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i128_idx2: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s0, s8 +; GCN-NEXT: s_mov_b32 s1, s9 +; GCN-NEXT: s_mov_b32 s2, s10 +; GCN-NEXT: s_mov_b32 s3, s11 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %element = extractelement <4 x i128> %vector, i32 2 + ret i128 %element +} + +define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(<4 x i128> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i128_idx3: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx16 s[0:15], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s0, s12 +; GCN-NEXT: s_mov_b32 s1, s13 +; GCN-NEXT: s_mov_b32 s2, s14 +; GCN-NEXT: s_mov_b32 s3, s15 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr + %element = extractelement <4 x i128> %vector, i32 3 + ret i128 %element +} + +define i128 @extractelement_vgpr_v4i128_idx0(<4 x i128> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i128_idx0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i128_idx0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i128_idx0: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %element = extractelement <4 x i128> %vector, i32 0 + ret i128 %element +} + +define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i128_idx1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-NEXT: v_mov_b32_e32 v3, v7 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i128_idx1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, v4 +; GFX8-NEXT: v_mov_b32_e32 v1, v5 +; GFX8-NEXT: v_mov_b32_e32 v2, v6 +; GFX8-NEXT: v_mov_b32_e32 v3, v7 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i128_idx1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v0, v4 +; GFX7-NEXT: v_mov_b32_e32 v1, v5 +; GFX7-NEXT: v_mov_b32_e32 v2, v6 +; GFX7-NEXT: v_mov_b32_e32 v3, v7 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %element = extractelement <4 x i128> %vector, i32 1 + ret i128 %element +} + +define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i128_idx2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v8 +; GFX9-NEXT: v_mov_b32_e32 v1, v9 +; GFX9-NEXT: v_mov_b32_e32 v2, v10 +; GFX9-NEXT: v_mov_b32_e32 v3, v11 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i128_idx2: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, v8 +; GFX8-NEXT: v_mov_b32_e32 v1, v9 +; GFX8-NEXT: v_mov_b32_e32 v2, v10 +; GFX8-NEXT: v_mov_b32_e32 v3, v11 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i128_idx2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v0, v8 +; GFX7-NEXT: v_mov_b32_e32 v1, v9 +; GFX7-NEXT: v_mov_b32_e32 v2, v10 +; GFX7-NEXT: v_mov_b32_e32 v3, v11 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %element = extractelement <4 x i128> %vector, i32 2 + ret i128 %element +} + +define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i128_idx3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v12 +; GFX9-NEXT: v_mov_b32_e32 v1, v13 +; GFX9-NEXT: v_mov_b32_e32 v2, v14 +; GFX9-NEXT: v_mov_b32_e32 v3, v15 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i128_idx3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, v12 +; GFX8-NEXT: v_mov_b32_e32 v1, v13 +; GFX8-NEXT: v_mov_b32_e32 v2, v14 +; GFX8-NEXT: v_mov_b32_e32 v3, v15 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i128_idx3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v0, v12 +; GFX7-NEXT: v_mov_b32_e32 v1, v13 +; GFX7-NEXT: v_mov_b32_e32 v2, v14 +; GFX7-NEXT: v_mov_b32_e32 v3, v15 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr + %element = extractelement <4 x i128> %vector, i32 3 + ret i128 %element +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll new file mode 100644 index 0000000000000..13d7fbeda0f6d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll @@ -0,0 +1,802 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s + +define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) { +; GCN-LABEL: extractelement_sgpr_v4i16_sgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_lshr_b32 s2, s4, 1 +; GCN-NEXT: s_cmp_eq_u32 s2, 1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cselect_b32 s0, s1, s0 +; GCN-NEXT: s_and_b32 s1, s4, 1 +; GCN-NEXT: s_lshl_b32 s1, s1, 4 +; GCN-NEXT: s_lshr_b32 s0, s0, s1 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %element = extractelement <4 x i16> %vector, i32 %idx + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 inreg %idx) { +; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_lshr_b32 s0, s2, 1 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 +; GFX9-NEXT: s_and_b32 s1, s2, 1 +; GFX9-NEXT: s_lshl_b32 s0, s1, 4 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_lshr_b32 s0, s2, 1 +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 +; GFX8-NEXT: s_and_b32 s1, s2, 1 +; GFX8-NEXT: s_lshl_b32 s0, s1, 4 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX7-NEXT: s_lshr_b32 s0, s2, 1 +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 +; GFX7-NEXT: s_and_b32 s1, s2, 1 +; GFX7-NEXT: s_lshl_b32 s0, s1, 4 +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: ; return to shader part epilog + %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %element = extractelement <4 x i16> %vector, i32 %idx + ret i16 %element +} + +define i16 @extractelement_vgpr_v4i16_vgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 %idx) { +; GFX9-LABEL: extractelement_vgpr_v4i16_vgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 +; GFX9-NEXT: v_and_b32_e32 v2, 1, v2 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i16_vgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v2 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 +; GFX8-NEXT: v_and_b32_e32 v2, 1, v2 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i16_vgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 1, v2 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 +; GFX7-NEXT: v_and_b32_e32 v2, 1, v2 +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %element = extractelement <4 x i16> %vector, i32 %idx + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 %idx) { +; GCN-LABEL: extractelement_sgpr_v4i16_vgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GCN-NEXT: v_and_b32_e32 v0, 1, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc +; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %element = extractelement <4 x i16> %vector, i32 %idx + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(<4 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i16_idx0: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %element = extractelement <4 x i16> %vector, i32 0 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(<4 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i16_idx1: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s0, 16 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %element = extractelement <4 x i16> %vector, i32 1 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(<4 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i16_idx2: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s0, s1 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %element = extractelement <4 x i16> %vector, i32 2 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(<4 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i16_idx3: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s1, 16 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr + %element = extractelement <4 x i16> %vector, i32 3 + ret i16 %element +} + +define i16 @extractelement_vgpr_v4i16_idx0(<4 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i16_idx0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i16_idx0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i16_idx0: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %element = extractelement <4 x i16> %vector, i32 0 + ret i16 %element +} + +define i16 @extractelement_vgpr_v4i16_idx1(<4 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i16_idx1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i16_idx1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i16_idx1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %element = extractelement <4 x i16> %vector, i32 1 + ret i16 %element +} + +define i16 @extractelement_vgpr_v4i16_idx2(<4 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i16_idx2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i16_idx2: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i16_idx2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %element = extractelement <4 x i16> %vector, i32 2 + ret i16 %element +} + +define i16 @extractelement_vgpr_v4i16_idx3(<4 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i16_idx3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i16_idx3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i16_idx3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr + %element = extractelement <4 x i16> %vector, i32 3 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) { +; GCN-LABEL: extractelement_sgpr_v8i16_sgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_lshr_b32 s5, s4, 1 +; GCN-NEXT: s_cmp_eq_u32 s5, 1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cselect_b32 s0, s1, s0 +; GCN-NEXT: s_cmp_eq_u32 s5, 2 +; GCN-NEXT: s_cselect_b32 s0, s2, s0 +; GCN-NEXT: s_cmp_eq_u32 s5, 3 +; GCN-NEXT: s_cselect_b32 s0, s3, s0 +; GCN-NEXT: s_and_b32 s1, s4, 1 +; GCN-NEXT: s_lshl_b32 s1, s1, 4 +; GCN-NEXT: s_lshr_b32 s0, s0, s1 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 %idx + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 inreg %idx) { +; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_lshr_b32 s0, s2, 1 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 +; GFX9-NEXT: s_and_b32 s1, s2, 1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: s_lshl_b32 s0, s1, 4 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_lshr_b32 s0, s2, 1 +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 +; GFX8-NEXT: s_and_b32 s1, s2, 1 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: s_lshl_b32 s0, s1, 4 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_lshr_b32 s0, s2, 1 +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 +; GFX7-NEXT: s_and_b32 s1, s2, 1 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX7-NEXT: s_lshl_b32 s0, s1, 4 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 %idx + ret i16 %element +} + +define i16 @extractelement_vgpr_v8i16_vgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 %idx) { +; GFX9-LABEL: extractelement_vgpr_v8i16_vgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GFX9-NEXT: v_and_b32_e32 v1, 1, v2 +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i16_vgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v2 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GFX8-NEXT: v_and_b32_e32 v1, 1, v2 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i16_vgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 1, v2 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 1, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 %idx + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 %idx) { +; GCN-LABEL: extractelement_sgpr_v8i16_vgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_and_b32_e32 v0, 1, v0 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GCN-NEXT: v_mov_b32_e32 v4, s2 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v5, s3 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc +; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 %idx + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(<8 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i16_idx0: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 0 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(<8 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i16_idx1: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s0, 16 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 1 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(<8 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i16_idx2: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s0, s1 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 2 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(<8 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i16_idx3: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s1, 16 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 3 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(<8 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i16_idx4: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s0, s2 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 4 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(<8 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i16_idx5: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s2, 16 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 5 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(<8 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i16_idx6: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s0, s3 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 6 + ret i16 %element +} + +define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(<8 x i16> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i16_idx7: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s3, 16 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr + %element = extractelement <8 x i16> %vector, i32 7 + ret i16 %element +} + +define i16 @extractelement_vgpr_v8i16_idx0(<8 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i16_idx0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i16_idx0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i16_idx0: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 0 + ret i16 %element +} + +define i16 @extractelement_vgpr_v8i16_idx1(<8 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i16_idx1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i16_idx1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i16_idx1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 1 + ret i16 %element +} + +define i16 @extractelement_vgpr_v8i16_idx2(<8 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i16_idx2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i16_idx2: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i16_idx2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 2 + ret i16 %element +} + +define i16 @extractelement_vgpr_v8i16_idx3(<8 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i16_idx3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i16_idx3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i16_idx3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 3 + ret i16 %element +} + +define i16 @extractelement_vgpr_v8i16_idx4(<8 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i16_idx4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i16_idx4: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i16_idx4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v0, v2 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 4 + ret i16 %element +} + +define i16 @extractelement_vgpr_v8i16_idx5(<8 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i16_idx5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i16_idx5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i16_idx5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v2 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 5 + ret i16 %element +} + +define i16 @extractelement_vgpr_v8i16_idx6(<8 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i16_idx6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i16_idx6: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, v3 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i16_idx6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mov_b32_e32 v0, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 6 + ret i16 %element +} + +define i16 @extractelement_vgpr_v8i16_idx7(<8 x i16> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i16_idx7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i16_idx7: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v3 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i16_idx7: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + %element = extractelement <8 x i16> %vector, i32 7 + ret i16 %element +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll new file mode 100644 index 0000000000000..95b4177abbcab --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll @@ -0,0 +1,3135 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s + +define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { +; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s0, s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s5, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s1, s0, 8 +; GCN-NEXT: s_and_b32 s1, s1, s5 +; GCN-NEXT: s_lshr_b32 s2, s0, 16 +; GCN-NEXT: s_lshr_b32 s3, s0, 24 +; GCN-NEXT: s_and_b32 s0, s0, s5 +; GCN-NEXT: s_lshl_b32 s1, s1, 8 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s2, s5 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s3, s5 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s4, 3 +; GCN-NEXT: s_lshl_b32 s1, s1, 3 +; GCN-NEXT: s_lshr_b32 s0, s0, s1 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %element = extractelement <4 x i8> %vector, i32 %idx + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 inreg %idx) { +; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: s_movk_i32 s1, 0xff +; GFX9-NEXT: s_and_b32 s2, s2, 3 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v3, v0, s1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v1 +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v3 +; GFX9-NEXT: s_lshl_b32 s0, s2, 3 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_vgpr_v4i8_sgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_movk_i32 s0, 0xff +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_and_b32 s0, s2, 3 +; GFX8-NEXT: s_lshl_b32 s0, s0, 3 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_vgpr_v4i8_sgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s0, 0xff +; GFX7-NEXT: s_and_b32 s1, s2, 3 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s0, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX7-NEXT: s_lshl_b32 s0, s1, 3 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: ; return to shader part epilog + %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %element = extractelement <4 x i8> %vector, i32 %idx + ret i8 %element +} + +define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %idx) { +; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: v_and_b32_e32 v1, 3, v2 +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v3, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v4, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v2 +; GFX9-NEXT: v_or3_b32 v0, v0, v3, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i8_vgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v5, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v3, v0, v3 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i8_vgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: v_and_b32_e32 v1, 3, v2 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %element = extractelement <4 x i8> %vector, i32 %idx + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 %idx) { +; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX9-NEXT: s_movk_i32 s4, 0xff +; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_lshr_b32 s1, s0, 8 +; GFX9-NEXT: s_and_b32 s1, s1, s4 +; GFX9-NEXT: s_lshr_b32 s2, s0, 16 +; GFX9-NEXT: s_lshr_b32 s3, s0, 24 +; GFX9-NEXT: s_and_b32 s0, s0, s4 +; GFX9-NEXT: s_lshl_b32 s1, s1, 8 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s2, s4 +; GFX9-NEXT: s_lshl_b32 s1, s1, 16 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s3, s4 +; GFX9-NEXT: s_lshl_b32 s1, s1, 24 +; GFX9-NEXT: s_or_b32 s0, s0, s1 +; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_sgpr_v4i8_vgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s1, s0, 8 +; GFX8-NEXT: s_and_b32 s1, s1, s4 +; GFX8-NEXT: s_lshr_b32 s2, s0, 16 +; GFX8-NEXT: s_lshr_b32 s3, s0, 24 +; GFX8-NEXT: s_and_b32 s0, s0, s4 +; GFX8-NEXT: s_lshl_b32 s1, s1, 8 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: s_and_b32 s1, s2, s4 +; GFX8-NEXT: s_lshl_b32 s1, s1, 16 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: s_and_b32 s1, s3, s4 +; GFX8-NEXT: s_lshl_b32 s1, s1, 24 +; GFX8-NEXT: s_or_b32 s0, s0, s1 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s0 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_sgpr_v4i8_vgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_lshr_b32 s1, s0, 8 +; GFX7-NEXT: s_and_b32 s1, s1, s4 +; GFX7-NEXT: s_lshr_b32 s2, s0, 16 +; GFX7-NEXT: s_lshr_b32 s3, s0, 24 +; GFX7-NEXT: s_and_b32 s0, s0, s4 +; GFX7-NEXT: s_lshl_b32 s1, s1, 8 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: s_and_b32 s1, s2, s4 +; GFX7-NEXT: s_lshl_b32 s1, s1, 16 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: s_and_b32 s1, s3, s4 +; GFX7-NEXT: s_lshl_b32 s1, s1, 24 +; GFX7-NEXT: s_or_b32 s0, s0, s1 +; GFX7-NEXT: v_lshr_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: ; return to shader part epilog + %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %element = extractelement <4 x i8> %vector, i32 %idx + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i8_idx0: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s0, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s2, s1, 8 +; GCN-NEXT: s_and_b32 s2, s2, s0 +; GCN-NEXT: s_lshr_b32 s3, s1, 16 +; GCN-NEXT: s_lshr_b32 s4, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 8 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_and_b32 s2, s3, s0 +; GCN-NEXT: s_and_b32 s0, s4, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_lshl_b32 s0, s0, 24 +; GCN-NEXT: s_or_b32 s0, s1, s0 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %element = extractelement <4 x i8> %vector, i32 0 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i8_idx1: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s0, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s2, s1, 8 +; GCN-NEXT: s_and_b32 s2, s2, s0 +; GCN-NEXT: s_lshr_b32 s3, s1, 16 +; GCN-NEXT: s_lshr_b32 s4, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 8 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_and_b32 s2, s3, s0 +; GCN-NEXT: s_and_b32 s0, s4, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_lshl_b32 s0, s0, 24 +; GCN-NEXT: s_or_b32 s0, s1, s0 +; GCN-NEXT: s_lshr_b32 s0, s0, 8 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %element = extractelement <4 x i8> %vector, i32 1 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i8_idx2: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s0, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s2, s1, 8 +; GCN-NEXT: s_and_b32 s2, s2, s0 +; GCN-NEXT: s_lshr_b32 s3, s1, 16 +; GCN-NEXT: s_lshr_b32 s4, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 8 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_and_b32 s2, s3, s0 +; GCN-NEXT: s_and_b32 s0, s4, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_lshl_b32 s0, s0, 24 +; GCN-NEXT: s_or_b32 s0, s1, s0 +; GCN-NEXT: s_lshr_b32 s0, s0, 16 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %element = extractelement <4 x i8> %vector, i32 2 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v4i8_idx3: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s1, s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s0, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s2, s1, 8 +; GCN-NEXT: s_and_b32 s2, s2, s0 +; GCN-NEXT: s_lshr_b32 s3, s1, 16 +; GCN-NEXT: s_lshr_b32 s4, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 8 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_and_b32 s2, s3, s0 +; GCN-NEXT: s_and_b32 s0, s4, s0 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_lshl_b32 s0, s0, 24 +; GCN-NEXT: s_or_b32 s0, s1, s0 +; GCN-NEXT: s_lshr_b32 s0, s0, 24 +; GCN-NEXT: ; return to shader part epilog + %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr + %element = extractelement <4 x i8> %vector, i32 3 + ret i8 %element +} + +define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i8_idx0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i8_idx0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i8_idx0: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %element = extractelement <4 x i8> %vector, i32 0 + ret i8 %element +} + +define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i8_idx1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i8_idx1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i8_idx1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %element = extractelement <4 x i8> %vector, i32 1 + ret i8 %element +} + +define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i8_idx2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i8_idx2: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i8_idx2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %element = extractelement <4 x i8> %vector, i32 2 + ret i8 %element +} + +define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v4i8_idx3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v4i8_idx3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v4i8_idx3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + %element = extractelement <4 x i8> %vector, i32 3 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { +; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s9, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s2, s0, 8 +; GCN-NEXT: s_and_b32 s2, s2, s9 +; GCN-NEXT: s_lshr_b32 s3, s0, 16 +; GCN-NEXT: s_lshr_b32 s5, s0, 24 +; GCN-NEXT: s_and_b32 s0, s0, s9 +; GCN-NEXT: s_lshl_b32 s2, s2, 8 +; GCN-NEXT: s_or_b32 s0, s0, s2 +; GCN-NEXT: s_and_b32 s2, s3, s9 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_or_b32 s0, s0, s2 +; GCN-NEXT: s_and_b32 s2, s5, s9 +; GCN-NEXT: s_lshl_b32 s2, s2, 24 +; GCN-NEXT: s_lshr_b32 s6, s1, 8 +; GCN-NEXT: s_or_b32 s0, s0, s2 +; GCN-NEXT: s_and_b32 s2, s6, s9 +; GCN-NEXT: s_lshr_b32 s7, s1, 16 +; GCN-NEXT: s_lshr_b32 s8, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s9 +; GCN-NEXT: s_lshl_b32 s2, s2, 8 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_and_b32 s2, s7, s9 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_and_b32 s2, s8, s9 +; GCN-NEXT: s_lshl_b32 s2, s2, 24 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_lshr_b32 s2, s4, 2 +; GCN-NEXT: s_cmp_eq_u32 s2, 1 +; GCN-NEXT: s_cselect_b32 s0, s1, s0 +; GCN-NEXT: s_and_b32 s1, s4, 3 +; GCN-NEXT: s_lshl_b32 s1, s1, 3 +; GCN-NEXT: s_lshr_b32 s0, s0, s1 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 %idx + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 inreg %idx) { +; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: v_mov_b32_e32 v2, 8 +; GFX9-NEXT: s_movk_i32 s1, 0xff +; GFX9-NEXT: s_lshr_b32 s3, s2, 2 +; GFX9-NEXT: s_and_b32 s2, s2, 3 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, s0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v5, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v6, v0, s1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v3 +; GFX9-NEXT: v_and_b32_sdwa v7, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v8, v1, s1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v1, s1, v2 +; GFX9-NEXT: v_or3_b32 v0, v0, v5, v6 +; GFX9-NEXT: v_or3_b32 v1, v1, v7, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: s_lshl_b32 s0, s2, 3 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_vgpr_v8i8_sgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s0, 0xff +; GFX8-NEXT: v_mov_b32_e32 v2, 8 +; GFX8-NEXT: v_mov_b32_e32 v3, 8 +; GFX8-NEXT: v_mov_b32_e32 v4, s0 +; GFX8-NEXT: s_lshr_b32 s0, s2, 2 +; GFX8-NEXT: s_and_b32 s1, s2, 3 +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 +; GFX8-NEXT: s_lshl_b32 s0, s1, 3 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v7, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v9, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v4, v1, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v7 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_vgpr_v8i8_sgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s0, 0xff +; GFX7-NEXT: s_lshr_b32 s1, s2, 2 +; GFX7-NEXT: s_and_b32 s2, s2, 3 +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 +; GFX7-NEXT: v_and_b32_e32 v5, s0, v5 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s0, v3 +; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 +; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_and_b32_e32 v4, s0, v4 +; GFX7-NEXT: v_and_b32_e32 v7, s0, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v6 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX7-NEXT: s_lshl_b32 s0, s2, 3 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 %idx + ret i8 %element +} + +define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %idx) { +; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: v_mov_b32_e32 v3, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v4, 2, v2 +; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, s4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v7, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v8, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v0, s5, v5 +; GFX9-NEXT: v_and_b32_sdwa v9, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v10, v1, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v1, s5, v3 +; GFX9-NEXT: v_or3_b32 v0, v0, v7, v8 +; GFX9-NEXT: v_or3_b32 v1, v1, v9, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i8_vgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: v_mov_b32_e32 v3, 8 +; GFX8-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-NEXT: v_mov_b32_e32 v5, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, 2, v2 +; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v7, 8, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v9, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v10, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v11, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v5, v1, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v11 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v10 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i8_vgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 2, v2 +; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX7-NEXT: v_and_b32_e32 v7, s4, v7 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v9, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v5, s4, v5 +; GFX7-NEXT: v_and_b32_e32 v8, s4, v8 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX7-NEXT: v_and_b32_e32 v6, s4, v6 +; GFX7-NEXT: v_and_b32_e32 v9, s4, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v8 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 %idx + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 %idx) { +; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s8, 0xff +; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 +; GCN-NEXT: v_and_b32_e32 v0, 3, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s2, s0, 8 +; GCN-NEXT: s_and_b32 s2, s2, s8 +; GCN-NEXT: s_lshr_b32 s3, s0, 16 +; GCN-NEXT: s_lshr_b32 s4, s0, 24 +; GCN-NEXT: s_and_b32 s0, s0, s8 +; GCN-NEXT: s_lshl_b32 s2, s2, 8 +; GCN-NEXT: s_or_b32 s0, s0, s2 +; GCN-NEXT: s_and_b32 s2, s3, s8 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_or_b32 s0, s0, s2 +; GCN-NEXT: s_and_b32 s2, s4, s8 +; GCN-NEXT: s_lshl_b32 s2, s2, 24 +; GCN-NEXT: s_lshr_b32 s5, s1, 8 +; GCN-NEXT: s_or_b32 s0, s0, s2 +; GCN-NEXT: s_and_b32 s2, s5, s8 +; GCN-NEXT: s_lshr_b32 s6, s1, 16 +; GCN-NEXT: s_lshr_b32 s7, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s8 +; GCN-NEXT: s_lshl_b32 s2, s2, 8 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_and_b32 s2, s6, s8 +; GCN-NEXT: s_lshl_b32 s2, s2, 16 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: s_and_b32 s2, s7, s8 +; GCN-NEXT: s_lshl_b32 s2, s2, 24 +; GCN-NEXT: s_or_b32 s1, s1, s2 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc +; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 %idx + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i8_idx0: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s4, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s1, s0, 8 +; GCN-NEXT: s_and_b32 s1, s1, s4 +; GCN-NEXT: s_lshr_b32 s2, s0, 16 +; GCN-NEXT: s_lshr_b32 s3, s0, 24 +; GCN-NEXT: s_and_b32 s0, s0, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 8 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s3, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 0 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i8_idx1: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s4, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s1, s0, 8 +; GCN-NEXT: s_and_b32 s1, s1, s4 +; GCN-NEXT: s_lshr_b32 s2, s0, 16 +; GCN-NEXT: s_lshr_b32 s3, s0, 24 +; GCN-NEXT: s_and_b32 s0, s0, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 8 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s3, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_lshr_b32 s0, s0, 8 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 1 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i8_idx2: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s4, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s1, s0, 8 +; GCN-NEXT: s_and_b32 s1, s1, s4 +; GCN-NEXT: s_lshr_b32 s2, s0, 16 +; GCN-NEXT: s_lshr_b32 s3, s0, 24 +; GCN-NEXT: s_and_b32 s0, s0, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 8 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s3, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_lshr_b32 s0, s0, 16 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 2 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i8_idx3: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s4, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s1, s0, 8 +; GCN-NEXT: s_and_b32 s1, s1, s4 +; GCN-NEXT: s_lshr_b32 s2, s0, 16 +; GCN-NEXT: s_lshr_b32 s3, s0, 24 +; GCN-NEXT: s_and_b32 s0, s0, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 8 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s3, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_lshr_b32 s0, s0, 24 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 3 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i8_idx4: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s4, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s1, 8 +; GCN-NEXT: s_and_b32 s0, s0, s4 +; GCN-NEXT: s_lshr_b32 s2, s1, 16 +; GCN-NEXT: s_lshr_b32 s3, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s4 +; GCN-NEXT: s_lshl_b32 s0, s0, 8 +; GCN-NEXT: s_or_b32 s0, s1, s0 +; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s3, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 4 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i8_idx5: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s4, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s1, 8 +; GCN-NEXT: s_and_b32 s0, s0, s4 +; GCN-NEXT: s_lshr_b32 s2, s1, 16 +; GCN-NEXT: s_lshr_b32 s3, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s4 +; GCN-NEXT: s_lshl_b32 s0, s0, 8 +; GCN-NEXT: s_or_b32 s0, s1, s0 +; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s3, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_lshr_b32 s0, s0, 8 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 5 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i8_idx6: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s4, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s1, 8 +; GCN-NEXT: s_and_b32 s0, s0, s4 +; GCN-NEXT: s_lshr_b32 s2, s1, 16 +; GCN-NEXT: s_lshr_b32 s3, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s4 +; GCN-NEXT: s_lshl_b32 s0, s0, 8 +; GCN-NEXT: s_or_b32 s0, s1, s0 +; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s3, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_lshr_b32 s0, s0, 16 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 6 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg %ptr) { +; GCN-LABEL: extractelement_sgpr_v8i8_idx7: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s4, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, s1, 8 +; GCN-NEXT: s_and_b32 s0, s0, s4 +; GCN-NEXT: s_lshr_b32 s2, s1, 16 +; GCN-NEXT: s_lshr_b32 s3, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s4 +; GCN-NEXT: s_lshl_b32 s0, s0, 8 +; GCN-NEXT: s_or_b32 s0, s1, s0 +; GCN-NEXT: s_and_b32 s1, s2, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 16 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_and_b32 s1, s3, s4 +; GCN-NEXT: s_lshl_b32 s1, s1, 24 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: s_lshr_b32 s0, s0, 24 +; GCN-NEXT: ; return to shader part epilog + %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr + %element = extractelement <8 x i8> %vector, i32 7 + ret i8 %element +} + +define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i8_idx0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i8_idx0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i8_idx0: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 0 + ret i8 %element +} + +define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i8_idx1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i8_idx1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i8_idx1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 1 + ret i8 %element +} + +define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i8_idx2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i8_idx2: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i8_idx2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 2 + ret i8 %element +} + +define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i8_idx3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i8_idx3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i8_idx3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 3 + ret i8 %element +} + +define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i8_idx4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i8_idx4: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i8_idx4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 4 + ret i8 %element +} + +define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i8_idx5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i8_idx5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i8_idx5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 5 + ret i8 %element +} + +define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i8_idx6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i8_idx6: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i8_idx6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 6 + ret i8 %element +} + +define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v8i8_idx7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v8i8_idx7: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v8i8_idx7: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr + %element = extractelement <8 x i8> %vector, i32 7 + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) { +; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s17, 0xff +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s5, s0, 8 +; GCN-NEXT: s_and_b32 s5, s5, s17 +; GCN-NEXT: s_lshr_b32 s6, s0, 16 +; GCN-NEXT: s_lshr_b32 s7, s0, 24 +; GCN-NEXT: s_and_b32 s0, s0, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 8 +; GCN-NEXT: s_or_b32 s0, s0, s5 +; GCN-NEXT: s_and_b32 s5, s6, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 16 +; GCN-NEXT: s_or_b32 s0, s0, s5 +; GCN-NEXT: s_and_b32 s5, s7, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 24 +; GCN-NEXT: s_lshr_b32 s8, s1, 8 +; GCN-NEXT: s_or_b32 s0, s0, s5 +; GCN-NEXT: s_and_b32 s5, s8, s17 +; GCN-NEXT: s_lshr_b32 s9, s1, 16 +; GCN-NEXT: s_lshr_b32 s10, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 8 +; GCN-NEXT: s_or_b32 s1, s1, s5 +; GCN-NEXT: s_and_b32 s5, s9, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 16 +; GCN-NEXT: s_or_b32 s1, s1, s5 +; GCN-NEXT: s_and_b32 s5, s10, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 24 +; GCN-NEXT: s_lshr_b32 s11, s2, 8 +; GCN-NEXT: s_or_b32 s1, s1, s5 +; GCN-NEXT: s_and_b32 s5, s11, s17 +; GCN-NEXT: s_lshr_b32 s12, s2, 16 +; GCN-NEXT: s_lshr_b32 s13, s2, 24 +; GCN-NEXT: s_and_b32 s2, s2, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 8 +; GCN-NEXT: s_or_b32 s2, s2, s5 +; GCN-NEXT: s_and_b32 s5, s12, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 16 +; GCN-NEXT: s_or_b32 s2, s2, s5 +; GCN-NEXT: s_and_b32 s5, s13, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 24 +; GCN-NEXT: s_lshr_b32 s14, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s5 +; GCN-NEXT: s_and_b32 s5, s14, s17 +; GCN-NEXT: s_lshr_b32 s15, s3, 16 +; GCN-NEXT: s_lshr_b32 s16, s3, 24 +; GCN-NEXT: s_and_b32 s3, s3, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 8 +; GCN-NEXT: s_or_b32 s3, s3, s5 +; GCN-NEXT: s_and_b32 s5, s15, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 16 +; GCN-NEXT: s_or_b32 s3, s3, s5 +; GCN-NEXT: s_and_b32 s5, s16, s17 +; GCN-NEXT: s_lshl_b32 s5, s5, 24 +; GCN-NEXT: s_or_b32 s3, s3, s5 +; GCN-NEXT: s_lshr_b32 s5, s4, 2 +; GCN-NEXT: s_cmp_eq_u32 s5, 1 +; GCN-NEXT: s_cselect_b32 s0, s1, s0 +; GCN-NEXT: s_cmp_eq_u32 s5, 2 +; GCN-NEXT: s_cselect_b32 s0, s2, s0 +; GCN-NEXT: s_cmp_eq_u32 s5, 3 +; GCN-NEXT: s_cselect_b32 s0, s3, s0 +; GCN-NEXT: s_and_b32 s1, s4, 3 +; GCN-NEXT: s_lshl_b32 s1, s1, 3 +; GCN-NEXT: s_lshr_b32 s0, s0, s1 +; GCN-NEXT: ; return to shader part epilog + %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr + %element = extractelement <16 x i8> %vector, i32 %idx + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 inreg %idx) { +; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s0, 8 +; GFX9-NEXT: v_mov_b32_e32 v5, 8 +; GFX9-NEXT: s_movk_i32 s1, 0xff +; GFX9-NEXT: v_mov_b32_e32 v4, 0xff +; GFX9-NEXT: s_lshr_b32 s3, s2, 2 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 1 +; GFX9-NEXT: s_and_b32 s2, s2, 3 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 8, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 8, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v6, s0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v7, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v10, v0, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v11, v0, s1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v0, s1, v6 +; GFX9-NEXT: v_and_b32_sdwa v12, v1, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v13, v1, s1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v1, s1, v7 +; GFX9-NEXT: v_and_b32_sdwa v14, v2, s1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v15, v2, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v2, v2, s1, v8 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_or3_b32 v0, v0, v10, v11 +; GFX9-NEXT: v_or3_b32 v1, v1, v12, v13 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX9-NEXT: v_and_b32_sdwa v16, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v17, v3, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v3, v3, v4, v5 +; GFX9-NEXT: v_or3_b32 v2, v2, v14, v15 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 2 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_or3_b32 v3, v3, v16, v17 +; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 3 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: s_lshl_b32 s0, s2, 3 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX9-NEXT: v_readfirstlane_b32 s0, v0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_vgpr_v16i8_sgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s0, 0xff +; GFX8-NEXT: v_mov_b32_e32 v5, 8 +; GFX8-NEXT: v_mov_b32_e32 v6, 8 +; GFX8-NEXT: v_mov_b32_e32 v7, s0 +; GFX8-NEXT: v_mov_b32_e32 v4, 0xff +; GFX8-NEXT: s_lshr_b32 s0, s2, 2 +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 +; GFX8-NEXT: s_and_b32 s1, s2, 3 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v8, 8, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v5, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v8, v6, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshrrev_b32_e32 v11, 8, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v9, v6, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v12, v0, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v13, v0, v7 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v14, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v7, v1, v7 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v12 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v14 +; GFX8-NEXT: v_and_b32_sdwa v15, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v16, v2, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v6, v6, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v15 +; GFX8-NEXT: v_and_b32_sdwa v17, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v4, v3, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v13 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v17 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX8-NEXT: v_or_b32_e32 v2, v2, v16 +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 2 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 3 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX8-NEXT: s_lshl_b32 s0, s1, 3 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX8-NEXT: v_readfirstlane_b32 s0, v0 +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_vgpr_v16i8_sgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s0, 0xff +; GFX7-NEXT: v_mov_b32_e32 v4, 0xff +; GFX7-NEXT: s_lshr_b32 s1, s2, 2 +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 1 +; GFX7-NEXT: s_and_b32 s2, s2, 3 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v8, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v11, 8, v2 +; GFX7-NEXT: v_and_b32_e32 v5, s0, v5 +; GFX7-NEXT: v_and_b32_e32 v8, s0, v8 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v14, 8, v3 +; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 +; GFX7-NEXT: v_and_b32_e32 v9, s0, v9 +; GFX7-NEXT: v_and_b32_e32 v11, v11, v4 +; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 +; GFX7-NEXT: v_lshrrev_b32_e32 v13, 24, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v3 +; GFX7-NEXT: v_and_b32_e32 v7, s0, v7 +; GFX7-NEXT: v_and_b32_e32 v10, s0, v10 +; GFX7-NEXT: v_and_b32_e32 v12, v12, v4 +; GFX7-NEXT: v_and_b32_e32 v14, v14, v4 +; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v11 +; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX7-NEXT: v_and_b32_e32 v13, v13, v4 +; GFX7-NEXT: v_and_b32_e32 v15, v15, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v16, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v3, v3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v14, 8, v14 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 24, v7 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v10, 24, v10 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v12, 16, v12 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v11 +; GFX7-NEXT: v_and_b32_e32 v4, v16, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v13, 24, v13 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v12 +; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v14 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v7 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v10 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v15 +; GFX7-NEXT: v_or_b32_e32 v2, v2, v13 +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 2 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s1, 3 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX7-NEXT: s_lshl_b32 s0, s2, 3 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_readfirstlane_b32 s0, v0 +; GFX7-NEXT: ; return to shader part epilog + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 %idx + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 %idx) { +; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[3:6], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 2, v2 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 +; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 8, v3 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v5 +; GFX9-NEXT: v_lshrrev_b32_e32 v11, 8, v6 +; GFX9-NEXT: v_lshlrev_b32_sdwa v8, s4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v12, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v13, v3, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v14, v4, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v15, v4, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_or_b32 v3, v3, s5, v8 +; GFX9-NEXT: v_and_or_b32 v4, v4, s5, v9 +; GFX9-NEXT: v_and_b32_sdwa v16, v5, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v17, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v18, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v19, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v6, v0, v1 +; GFX9-NEXT: v_or3_b32 v1, v3, v12, v13 +; GFX9-NEXT: v_or3_b32 v3, v4, v14, v15 +; GFX9-NEXT: v_and_or_b32 v5, v5, s5, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_or3_b32 v4, v5, v16, v17 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 2, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX9-NEXT: v_or3_b32 v0, v0, v18, v19 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 3, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_vgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v7, 8 +; GFX8-NEXT: v_mov_b32_e32 v8, s4 +; GFX8-NEXT: v_mov_b32_e32 v0, 0xff +; GFX8-NEXT: v_lshrrev_b32_e32 v9, 2, v2 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9 +; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_lshrrev_b32_e32 v10, 8, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v11, 8, v4 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshrrev_b32_e32 v12, 8, v5 +; GFX8-NEXT: v_lshlrev_b32_sdwa v10, v7, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshrrev_b32_e32 v13, 8, v6 +; GFX8-NEXT: v_lshlrev_b32_sdwa v11, v7, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v14, v3, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v15, v3, v8 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v16, v4, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v3, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v14 +; GFX8-NEXT: v_and_b32_sdwa v8, v4, v8 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v3, v3, v16 +; GFX8-NEXT: v_and_b32_sdwa v17, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v4, v5, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_lshlrev_b32_sdwa v7, v7, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v18, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v4, v4, v17 +; GFX8-NEXT: v_and_b32_sdwa v19, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v15 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v8 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX8-NEXT: v_and_b32_sdwa v0, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v5, v5, v19 +; GFX8-NEXT: v_or_b32_e32 v4, v4, v18 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 2, v9 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX8-NEXT: v_or_b32_e32 v0, v5, v0 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 3, v9 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_vgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: v_mov_b32_e32 v0, 0xff +; GFX7-NEXT: v_lshrrev_b32_e32 v18, 2, v2 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v18 +; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v12, 8, v5 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_and_b32_e32 v9, s4, v9 +; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v11, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v13, 16, v5 +; GFX7-NEXT: v_lshrrev_b32_e32 v15, 8, v6 +; GFX7-NEXT: v_and_b32_e32 v7, s4, v7 +; GFX7-NEXT: v_and_b32_e32 v10, s4, v10 +; GFX7-NEXT: v_and_b32_e32 v12, v12, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-NEXT: v_lshrrev_b32_e32 v14, 24, v5 +; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v6 +; GFX7-NEXT: v_and_b32_e32 v8, s4, v8 +; GFX7-NEXT: v_and_b32_e32 v11, s4, v11 +; GFX7-NEXT: v_and_b32_e32 v13, v13, v0 +; GFX7-NEXT: v_and_b32_e32 v15, v15, v0 +; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 +; GFX7-NEXT: v_and_b32_e32 v5, s4, v5 +; GFX7-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX7-NEXT: v_or_b32_e32 v3, v4, v9 +; GFX7-NEXT: v_lshlrev_b32_e32 v12, 8, v12 +; GFX7-NEXT: v_and_b32_e32 v14, v14, v0 +; GFX7-NEXT: v_and_b32_e32 v16, v16, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v17, 24, v6 +; GFX7-NEXT: v_and_b32_e32 v6, v6, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-NEXT: v_lshlrev_b32_e32 v11, 24, v11 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v10 +; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13 +; GFX7-NEXT: v_or_b32_e32 v4, v5, v12 +; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v15 +; GFX7-NEXT: v_and_b32_e32 v0, v17, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v14, 24, v14 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v13 +; GFX7-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; GFX7-NEXT: v_or_b32_e32 v5, v6, v15 +; GFX7-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v11 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_or_b32_e32 v5, v5, v16 +; GFX7-NEXT: v_or_b32_e32 v4, v4, v14 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 2, v18 +; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX7-NEXT: v_or_b32_e32 v0, v5, v0 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 3, v18 +; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 %idx + ret i8 %element +} + +define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 %idx) { +; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0 +; GCN-NEXT: s_movk_i32 s16, 0xff +; GCN-NEXT: v_lshrrev_b32_e32 v1, 2, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; GCN-NEXT: v_and_b32_e32 v0, 3, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s4, s0, 8 +; GCN-NEXT: s_and_b32 s4, s4, s16 +; GCN-NEXT: s_lshr_b32 s5, s0, 16 +; GCN-NEXT: s_lshr_b32 s6, s0, 24 +; GCN-NEXT: s_and_b32 s0, s0, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 8 +; GCN-NEXT: s_or_b32 s0, s0, s4 +; GCN-NEXT: s_and_b32 s4, s5, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 16 +; GCN-NEXT: s_or_b32 s0, s0, s4 +; GCN-NEXT: s_and_b32 s4, s6, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 24 +; GCN-NEXT: s_lshr_b32 s7, s1, 8 +; GCN-NEXT: s_or_b32 s0, s0, s4 +; GCN-NEXT: s_and_b32 s4, s7, s16 +; GCN-NEXT: s_lshr_b32 s8, s1, 16 +; GCN-NEXT: s_lshr_b32 s9, s1, 24 +; GCN-NEXT: s_and_b32 s1, s1, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 8 +; GCN-NEXT: s_or_b32 s1, s1, s4 +; GCN-NEXT: s_and_b32 s4, s8, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 16 +; GCN-NEXT: s_or_b32 s1, s1, s4 +; GCN-NEXT: s_and_b32 s4, s9, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 24 +; GCN-NEXT: s_lshr_b32 s10, s2, 8 +; GCN-NEXT: s_or_b32 s1, s1, s4 +; GCN-NEXT: s_and_b32 s4, s10, s16 +; GCN-NEXT: s_lshr_b32 s11, s2, 16 +; GCN-NEXT: s_lshr_b32 s12, s2, 24 +; GCN-NEXT: s_and_b32 s2, s2, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 8 +; GCN-NEXT: s_or_b32 s2, s2, s4 +; GCN-NEXT: s_and_b32 s4, s11, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 16 +; GCN-NEXT: s_or_b32 s2, s2, s4 +; GCN-NEXT: s_and_b32 s4, s12, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 24 +; GCN-NEXT: s_lshr_b32 s13, s3, 8 +; GCN-NEXT: s_or_b32 s2, s2, s4 +; GCN-NEXT: s_and_b32 s4, s13, s16 +; GCN-NEXT: s_lshr_b32 s14, s3, 16 +; GCN-NEXT: s_lshr_b32 s15, s3, 24 +; GCN-NEXT: s_and_b32 s3, s3, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 8 +; GCN-NEXT: s_or_b32 s3, s3, s4 +; GCN-NEXT: s_and_b32 s4, s14, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 16 +; GCN-NEXT: s_or_b32 s3, s3, s4 +; GCN-NEXT: s_and_b32 s4, s15, s16 +; GCN-NEXT: s_lshl_b32 s4, s4, 24 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: s_or_b32 s3, s3, s4 +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; GCN-NEXT: v_mov_b32_e32 v4, s2 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v1 +; GCN-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GCN-NEXT: v_mov_b32_e32 v5, s3 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v1 +; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc +; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GCN-NEXT: v_lshrrev_b32_e32 v0, v0, v1 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: ; return to shader part epilog + %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr + %element = extractelement <16 x i8> %vector, i32 %idx + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx0: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 0 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx1: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 1 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx2: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx2: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 2 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v0, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v1, v0, s5, v1 +; GFX9-NEXT: v_and_b32_sdwa v0, v0, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 3 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx4: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx4: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 4 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 5 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx6: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx6: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx6: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 6 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx7: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v2, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v1, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v1, v1, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v2, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx7: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx7: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v1 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 7 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v1, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v2, v2, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx8: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 8 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx9: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v1, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v2, v2, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx9: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx9: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 9 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx10: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v1, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v2, v2, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx10: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx10: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 10 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx11: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v1, v2, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v2, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v2, v2, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx11: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v2 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx11: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v2 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 11 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx12: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v1, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx12: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx12: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 12 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx13: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v1, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx13: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx13: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 13 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx14: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v1, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx14: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx14: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 14 + ret i8 %element +} + +define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) { +; GFX9-LABEL: extractelement_vgpr_v16i8_idx15: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: s_mov_b32 s4, 8 +; GFX9-NEXT: s_movk_i32 s5, 0xff +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_and_b32_sdwa v1, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_or_b32 v0, v3, s5, v0 +; GFX9-NEXT: v_and_b32_sdwa v2, v3, s5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: extractelement_vgpr_v16i8_idx15: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; GFX8-NEXT: s_movk_i32 s4, 0xff +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 8 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 +; GFX8-NEXT: v_lshrrev_b32_e32 v2, 8, v3 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_and_b32_sdwa v4, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: extractelement_vgpr_v16i8_idx15: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_movk_i32 s4, 0xff +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX7-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 24, v3 +; GFX7-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX7-NEXT: v_and_b32_e32 v3, s4, v3 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 24, v2 +; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: s_setpc_b64 s[30:31] + %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + %element = extractelement <16 x i8> %vector, i32 15 + ret i8 %element +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index 1d3311673cf6c..b548ff5503435 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s --- name: extract_vector_elt_0_v2i32 @@ -228,8 +228,10 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i16_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s16>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 @@ -290,25 +292,155 @@ name: extract_vector_elt_v2s8_varidx_i32 body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1 ; CHECK-LABEL: name: extract_vector_elt_v2s8_varidx_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[COPY]](<2 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 8 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 8 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32) - ; CHECK: $vgpr0 = COPY [[COPY3]](s32) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(<2 x s8>) = G_TRUNC %0 - %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 - %4:_(s32) = G_ANYEXT %3 - $vgpr0 = COPY %4 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32) + ; CHECK: $vgpr0 = COPY [[COPY6]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(<2 x s8>) = G_BITCAST %2 + %4:_(s8) = G_EXTRACT_VECTOR_ELT %3, %1 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: extract_vector_elt_v2s8_constidx_0_i32 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_vector_elt_v2s8_constidx_0_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 8 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) + ; CHECK: $vgpr0 = COPY [[COPY6]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(<2 x s8>) = G_BITCAST %2 + %4:_(s32) = G_CONSTANT i32 0 + %5:_(s8) = G_EXTRACT_VECTOR_ELT %3, %4 + %6:_(s32) = G_ANYEXT %5 + $vgpr0 = COPY %6 +... + +--- +name: extract_vector_elt_v2s8_constidx_1_i32 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_vector_elt_v2s8_constidx_1_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 8 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 32 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) + ; CHECK: $vgpr0 = COPY [[COPY6]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(<2 x s8>) = G_BITCAST %2 + %4:_(s32) = G_CONSTANT i32 1 + %5:_(s8) = G_EXTRACT_VECTOR_ELT %3, %4 + %6:_(s32) = G_ANYEXT %5 + $vgpr0 = COPY %6 +... + +--- +name: extract_vector_elt_v4s4_varidx_i32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_v4s4_varidx_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C3]](s32) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C4]](s32) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C5]](s32) + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C6]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 4 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 4 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 4 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 4 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32) + ; CHECK: $vgpr0 = COPY [[COPY10]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(<4 x s4>) = G_BITCAST %2 + %4:_(s4) = G_EXTRACT_VECTOR_ELT %3, %1 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 ... --- @@ -343,24 +475,559 @@ name: extract_vector_elt_v4s8_varidx_i32 body: | bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + liveins: $vgpr0, $vgpr1 ; CHECK-LABEL: name: extract_vector_elt_v4s8_varidx_i32 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 8 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 8 - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV2]], 8 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV3]], 8 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32), [[SEXT_INREG3]](s32) - ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32) - ; CHECK: $vgpr0 = COPY [[COPY3]](s32) - %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr4 - %2:_(<4 x s8>) = G_TRUNC %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[SHL3]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY6]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<4 x s8>) = G_BITCAST %0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v4s8_constidx_0_i32 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_vector_elt_v4s8_constidx_0_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C3]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY5]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(<4 x s8>) = G_BITCAST %0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v4s8_constidx_1_i32 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_vector_elt_v4s8_constidx_1_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY5]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(<4 x s8>) = G_BITCAST %0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v4s8_constidx_2_i32 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_vector_elt_v4s8_constidx_2_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY5]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(<4 x s8>) = G_BITCAST %0 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v4s8_constidx_3_i32 + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: extract_vector_elt_v4s8_constidx_3_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C2]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY5]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(<4 x s8>) = G_BITCAST %0 + %2:_(s32) = G_CONSTANT i32 3 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + + + +--- +name: extract_vector_elt_v8s8_varidx_i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; CHECK-LABEL: name: extract_vector_elt_v8s8_varidx_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C5]](s32) + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[LSHR6]](s32) + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C6]](s32) + ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[EVEC]], [[SHL6]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; CHECK: $vgpr0 = COPY [[COPY6]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(<8 x s8>) = G_BITCAST %0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + + +--- +name: extract_vector_elt_v8s8_constidx_0_i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_0_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C2]](s16) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<8 x s8>) = G_BITCAST %0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v8s8_constidx_1_i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_1_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<8 x s8>) = G_BITCAST %0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v8s8_constidx_3_i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_3_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C4]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 3 + %2:_(<8 x s8>) = G_BITCAST %0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v8s8_constidx_4_i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_4_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C5]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 4 + %2:_(<8 x s8>) = G_BITCAST %0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v8s8_constidx_5_i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_5_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 5 + %2:_(<8 x s8>) = G_BITCAST %0 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v8s8_constidx_7_i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_v8s8_constidx_7_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C4]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CONSTANT i32 7 + %2:_(<8 x s8>) = G_BITCAST %0 %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1 %4:_(s32) = G_ANYEXT %3 $vgpr0 = COPY %4 @@ -376,9 +1043,14 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s16_varidx_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s16>), [[COPY1]](s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[SHL]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY2]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -395,8 +1067,10 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx0_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 @@ -435,8 +1109,11 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx2_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 2 %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -579,9 +1256,16 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v4s16_varidx_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s16>), [[COPY1]](s32) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<2 x s32>), [[LSHR]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EVEC]], [[SHL]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: $vgpr0 = COPY [[COPY2]](s32) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -599,8 +1283,18 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s128_varidx_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[EVEC:%[0-9]+]]:_(s128) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s128>), [[COPY1]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[EVEC]](s128) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s64>) = G_BITCAST [[COPY]](<2 x s128>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[C1]] + ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<4 x s64>), [[ADD]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL]], [[C2]] + ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BITCAST]](<4 x s64>), [[ADD1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<2 x s64>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST1]](s128) %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(s32) = COPY $vgpr8 %2:_(s128) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1146,3 +1840,335 @@ body: | %3:_(s32) = G_EXTRACT_VECTOR_ELT %2, %1 S_ENDPGM 0, implicit %3 ... + +--- +name: extract_vector_elt_v32s1_varidx_i32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: extract_vector_elt_v32s1_varidx_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C3]](s32) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C4]](s32) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C5]](s32) + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C6]](s32) + ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C7]](s32) + ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C8]](s32) + ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C9]](s32) + ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C10]](s32) + ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C11]](s32) + ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C12]](s32) + ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C13]](s32) + ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C14]](s32) + ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C15]](s32) + ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C16]](s32) + ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C17]](s32) + ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CHECK: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C18]](s32) + ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C19]](s32) + ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; CHECK: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C20]](s32) + ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; CHECK: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C21]](s32) + ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; CHECK: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C22]](s32) + ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C23]](s32) + ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CHECK: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C24]](s32) + ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; CHECK: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C25]](s32) + ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; CHECK: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C26]](s32) + ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C27]](s32) + ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CHECK: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C28]](s32) + ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CHECK: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C29]](s32) + ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C30]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C3]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C5]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C]] + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] + ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C7]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C8]](s32) + ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C]] + ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C9]](s32) + ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C]] + ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C10]](s32) + ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32) + ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C11]](s32) + ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32) + ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] + ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C12]](s32) + ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32) + ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C]] + ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C13]](s32) + ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) + ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C]] + ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C14]](s32) + ; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[OR13]], [[SHL14]] + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) + ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C]] + ; CHECK: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C15]](s32) + ; CHECK: [[OR15:%[0-9]+]]:_(s32) = G_OR [[OR14]], [[SHL15]] + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[LSHR16]](s32) + ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C]] + ; CHECK: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C16]](s32) + ; CHECK: [[OR16:%[0-9]+]]:_(s32) = G_OR [[OR15]], [[SHL16]] + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[LSHR17]](s32) + ; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C]] + ; CHECK: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C17]](s32) + ; CHECK: [[OR17:%[0-9]+]]:_(s32) = G_OR [[OR16]], [[SHL17]] + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[LSHR18]](s32) + ; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C]] + ; CHECK: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C18]](s32) + ; CHECK: [[OR18:%[0-9]+]]:_(s32) = G_OR [[OR17]], [[SHL18]] + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[LSHR19]](s32) + ; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C]] + ; CHECK: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C19]](s32) + ; CHECK: [[OR19:%[0-9]+]]:_(s32) = G_OR [[OR18]], [[SHL19]] + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[LSHR20]](s32) + ; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C]] + ; CHECK: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C20]](s32) + ; CHECK: [[OR20:%[0-9]+]]:_(s32) = G_OR [[OR19]], [[SHL20]] + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) + ; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C]] + ; CHECK: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C21]](s32) + ; CHECK: [[OR21:%[0-9]+]]:_(s32) = G_OR [[OR20]], [[SHL21]] + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) + ; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C]] + ; CHECK: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C22]](s32) + ; CHECK: [[OR22:%[0-9]+]]:_(s32) = G_OR [[OR21]], [[SHL22]] + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR23]](s32) + ; CHECK: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C]] + ; CHECK: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C23]](s32) + ; CHECK: [[OR23:%[0-9]+]]:_(s32) = G_OR [[OR22]], [[SHL23]] + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[LSHR24]](s32) + ; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C]] + ; CHECK: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[C24]](s32) + ; CHECK: [[OR24:%[0-9]+]]:_(s32) = G_OR [[OR23]], [[SHL24]] + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[LSHR25]](s32) + ; CHECK: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY28]], [[C]] + ; CHECK: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C25]](s32) + ; CHECK: [[OR25:%[0-9]+]]:_(s32) = G_OR [[OR24]], [[SHL25]] + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[LSHR26]](s32) + ; CHECK: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C]] + ; CHECK: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C26]](s32) + ; CHECK: [[OR26:%[0-9]+]]:_(s32) = G_OR [[OR25]], [[SHL26]] + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[LSHR27]](s32) + ; CHECK: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C]] + ; CHECK: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C27]](s32) + ; CHECK: [[OR27:%[0-9]+]]:_(s32) = G_OR [[OR26]], [[SHL27]] + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) + ; CHECK: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C]] + ; CHECK: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C28]](s32) + ; CHECK: [[OR28:%[0-9]+]]:_(s32) = G_OR [[OR27]], [[SHL28]] + ; CHECK: [[COPY32:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) + ; CHECK: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY32]], [[C]] + ; CHECK: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[C29]](s32) + ; CHECK: [[OR29:%[0-9]+]]:_(s32) = G_OR [[OR28]], [[SHL29]] + ; CHECK: [[COPY33:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32) + ; CHECK: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY33]], [[C]] + ; CHECK: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[C30]](s32) + ; CHECK: [[OR30:%[0-9]+]]:_(s32) = G_OR [[OR29]], [[SHL30]] + ; CHECK: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C30]] + ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[AND32]], [[C31]](s32) + ; CHECK: [[LSHR31:%[0-9]+]]:_(s32) = G_LSHR [[OR30]], [[SHL31]](s32) + ; CHECK: [[COPY34:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32) + ; CHECK: $vgpr0 = COPY [[COPY34]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<32 x s1>) = G_BITCAST %0 + %3:_(s1) = G_EXTRACT_VECTOR_ELT %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v12s8_varidx_s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 + ; CHECK-LABEL: name: extract_vector_elt_v12s8_varidx_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) + ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C1]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C3]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C3]] + ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C4]](s32) + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[LSHR9]](s32) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C5]](s32) + ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[EVEC]], [[SHL9]](s32) + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32) + ; CHECK: $vgpr0 = COPY [[COPY14]](s32) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<12 x s8>) = G_BITCAST %0 + %2:_(s32) = COPY $vgpr3 + %3:_(s8) = G_EXTRACT_VECTOR_ELT %1, %2 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +... + +--- +name: extract_vector_elt_v3s8_varidx_s32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: extract_vector_elt_v3s8_varidx_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 8 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32), [[SEXT_INREG2]](s32) + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[COPY1]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32) + ; CHECK: $vgpr0 = COPY [[COPY8]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s24) = G_TRUNC %0 + %3:_(<3 x s8>) = G_BITCAST %2 + %4:_(s8) = G_EXTRACT_VECTOR_ELT %3, %1 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll index 86d7a2f4e4dbd..e566572763e57 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll @@ -16,8 +16,10 @@ define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -38,8 +40,10 @@ define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -67,12 +71,14 @@ define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -94,12 +100,14 @@ define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -130,15 +138,18 @@ define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_l ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -163,15 +174,18 @@ define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_l ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -205,15 +219,18 @@ define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -238,15 +255,18 @@ define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -279,12 +299,14 @@ define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -306,12 +328,14 @@ define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -342,15 +366,18 @@ define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -375,15 +402,18 @@ define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coo ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -417,15 +447,18 @@ define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -450,15 +483,18 @@ define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -492,18 +528,21 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") @@ -527,18 +566,21 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") @@ -571,12 +613,14 @@ define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -598,12 +642,14 @@ define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) @@ -634,15 +680,18 @@ define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -667,15 +716,18 @@ define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -709,18 +761,21 @@ define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") @@ -744,18 +799,21 @@ define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") @@ -789,18 +847,21 @@ define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") @@ -824,18 +885,21 @@ define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") @@ -869,15 +933,18 @@ define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -902,15 +969,18 @@ define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -944,18 +1014,21 @@ define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") @@ -979,18 +1052,21 @@ define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") @@ -1028,8 +1104,10 @@ define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d @@ -1050,8 +1128,10 @@ define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX10NSA: S_ENDPGM 0 main_body: @@ -1079,12 +1159,14 @@ define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX9: S_ENDPGM 0 @@ -1106,12 +1188,14 @@ define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX10NSA: S_ENDPGM 0 @@ -1142,15 +1226,18 @@ define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1175,15 +1262,18 @@ define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1217,15 +1307,18 @@ define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1250,15 +1343,18 @@ define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1291,12 +1387,14 @@ define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX9: S_ENDPGM 0 @@ -1318,12 +1416,14 @@ define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX10NSA: S_ENDPGM 0 @@ -1354,15 +1454,18 @@ define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1387,15 +1490,18 @@ define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1429,15 +1535,18 @@ define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1462,15 +1571,18 @@ define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1504,18 +1616,21 @@ define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") @@ -1539,18 +1654,21 @@ define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2darraymsaa), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") @@ -1583,12 +1701,14 @@ define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX9: S_ENDPGM 0 @@ -1610,12 +1730,14 @@ define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX10NSA: S_ENDPGM 0 @@ -1646,15 +1768,18 @@ define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1679,15 +1804,18 @@ define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1721,18 +1849,21 @@ define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") @@ -1756,18 +1887,21 @@ define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") @@ -1801,18 +1935,21 @@ define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") @@ -1836,18 +1973,21 @@ define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") @@ -1881,15 +2021,18 @@ define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1914,15 +2057,18 @@ define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[DEF]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -1956,18 +2102,21 @@ define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX9: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") @@ -1991,18 +2140,21 @@ define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vda ; GFX10NSA: [[COPY13:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY13]](<2 x s16>) - ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) - ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 16 into custom "TargetCustom8") @@ -2030,8 +2182,10 @@ define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2052,8 +2206,10 @@ define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2081,8 +2237,10 @@ define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2103,8 +2261,10 @@ define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2132,8 +2292,10 @@ define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2154,8 +2316,10 @@ define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %co ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.3d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2183,8 +2347,10 @@ define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2205,8 +2371,10 @@ define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.cube), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2234,8 +2402,10 @@ define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2256,8 +2426,10 @@ define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.1darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2285,8 +2457,10 @@ define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2307,8 +2481,10 @@ define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darray), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2336,8 +2512,10 @@ define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2358,8 +2536,10 @@ define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2dmsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2387,8 +2567,10 @@ define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2409,8 +2591,10 @@ define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.getresinfo.2darraymsaa), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2438,8 +2622,10 @@ define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8") ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -2456,8 +2642,10 @@ define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) { ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 8, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 4 from custom "TargetCustom8") ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -2481,8 +2669,10 @@ define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coord ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2501,8 +2691,10 @@ define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coord ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 9, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load 8 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2529,8 +2721,10 @@ define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "TargetCustom8") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_V1 @@ -2547,8 +2741,10 @@ define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[COPY8]](s32), 2, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 4 into custom "TargetCustom8") ; GFX10NSA: S_ENDPGM 0 main_body: @@ -2574,8 +2770,10 @@ define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "TargetCustom8") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_V2 @@ -2594,8 +2792,10 @@ define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<2 x s32>), 12, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store 8 into custom "TargetCustom8") ; GFX10NSA: S_ENDPGM 0 main_body: @@ -2618,8 +2818,10 @@ define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2640,8 +2842,10 @@ define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2669,8 +2873,10 @@ define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2691,8 +2897,10 @@ define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2720,8 +2928,10 @@ define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) @@ -2742,8 +2952,10 @@ define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> % ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX10NSA: $vgpr0 = COPY [[UV]](s32) @@ -2776,8 +2988,10 @@ define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_glc @@ -2798,8 +3012,10 @@ define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 1, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX10NSA: S_ENDPGM 0 main_body: @@ -2827,8 +3043,10 @@ define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_slc @@ -2849,8 +3067,10 @@ define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, < ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX10NSA: S_ENDPGM 0 main_body: @@ -2878,8 +3098,10 @@ define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdat ; GFX9: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX9: S_ENDPGM 0 ; GFX10NSA-LABEL: name: store_1d_glc_slc @@ -2900,8 +3122,10 @@ define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdat ; GFX10NSA: [[COPY12:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY12]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 3, 3 :: (dereferenceable store 16 into custom "TargetCustom8") ; GFX10NSA: S_ENDPGM 0 main_body: @@ -2970,8 +3194,10 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX9: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) @@ -2994,8 +3220,10 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) ; GFX10NSA: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) @@ -3028,12 +3256,14 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) @@ -3057,12 +3287,14 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") ; GFX10NSA: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) @@ -3098,15 +3330,18 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -3133,15 +3368,18 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX10NSA: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF1]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) @@ -3180,18 +3418,21 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i ; GFX9: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") @@ -3217,18 +3458,21 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i ; GFX10NSA: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) ; GFX10NSA: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10NSA: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) + ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10NSA: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY8]](<2 x s16>) - ; GFX10NSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10NSA: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10NSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) ; GFX10NSA: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; GFX10NSA: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10NSA: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) - ; GFX10NSA: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) - ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load 16 from custom "TargetCustom8") diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir index 9f614bef378ed..692078edbe65f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -217,16 +217,18 @@ body: | ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -250,15 +252,17 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir index d4de328b679a7..d4b80ce4a5721 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -45,13 +45,15 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C2]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -80,14 +82,16 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_0 @@ -115,16 +119,18 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -154,15 +160,17 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -191,15 +199,18 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -267,15 +278,18 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -304,13 +318,15 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C2]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -339,14 +355,16 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C]], [[SHL]] + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v2s16_undef_2 @@ -374,16 +392,18 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -413,15 +433,17 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -561,15 +583,18 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -577,10 +602,13 @@ body: | ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -602,15 +630,18 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -640,15 +671,17 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -657,11 +690,13 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -683,16 +718,18 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -700,12 +737,14 @@ body: | ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -728,15 +767,17 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -745,11 +786,13 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -771,16 +814,18 @@ body: | ; GFX8: liveins: $vgpr0, $vgpr1 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) @@ -788,12 +833,14 @@ body: | ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -933,24 +980,38 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 16 - ; GFX8: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT1]](s16) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; GFX8: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C2]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 16 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 @@ -971,24 +1032,36 @@ body: | ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 16 - ; GFX8: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 48 - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[EXTRACT1]](s16) + ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; GFX8: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) + ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; GFX8: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32 + ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: shufflevector_v2s16_v4s16_1_3 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 16 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<4 x s16>), 48 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32 + ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 From e37987563ad194c41125ce836cc04df57737c698 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 2 Aug 2020 10:53:51 -0400 Subject: [PATCH 129/600] [InstSimplify] add tests for max(max x,y), x) and variants; NFC --- .../InstSimplify/maxmin_intrinsics.ll | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index 40dacb2d07ba6..6fcc6133aca3b 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -267,3 +267,179 @@ define <2 x i8> @umin_maxval_partial_undef(<2 x i8> %x) { %r = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> ) ret <2 x i8> %r } + +define i8 @umax_umax(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_umax( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define i8 @umax_umax_commute1(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_umax_commute1( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define i8 @umax_umax_commute2(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_umax_commute2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.umax.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define <2 x i8> @umax_umax_commute3(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @umax_umax_commute3( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[M]], <2 x i8> [[X]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %y, <2 x i8> %x) + %m2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %m, <2 x i8> %x) + ret <2 x i8> %m2 +} + +define i8 @umin_umin(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_umin( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define i8 @umin_umin_commute1(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_umin_commute1( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define <2 x i8> @umin_umin_commute2(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @umin_umin_commute2( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[M]], <2 x i8> [[X]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> %y) + %m2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %m, <2 x i8> %x) + ret <2 x i8> %m2 +} + +define i8 @umin_umin_commute3(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_umin_commute3( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.umin.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @smax_smax(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_smax( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.smax.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define <2 x i8> @smax_smax_commute1(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @smax_smax_commute1( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X]], <2 x i8> [[M]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %y, <2 x i8> %x) + %m2 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %x, <2 x i8> %m) + ret <2 x i8> %m2 +} + +define i8 @smax_smax_commute2(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_smax_commute2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.smax.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @smax_smax_commute3(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_smax_commute3( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.smax.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define <2 x i8> @smin_smin(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @smin_smin( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X]], <2 x i8> [[M]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> %y) + %m2 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> %m) + ret <2 x i8> %m2 +} + +define i8 @smin_smin_commute1(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_smin_commute1( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smin.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.smin.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define i8 @smin_smin_commute2(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_smin_commute2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smin.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @smin_smin_commute3(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_smin_commute3( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smin.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) + ret i8 %m2 +} From 4abc69c6f541e7726913c9b0940728b1e0024b4a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 2 Aug 2020 11:11:05 -0400 Subject: [PATCH 130/600] [InstSimplify] fold max (max X, Y), X --> max X, Y https://alive2.llvm.org/ce/z/VGgG3M --- llvm/lib/Analysis/InstructionSimplify.cpp | 11 +++++ .../InstSimplify/maxmin_intrinsics.ll | 48 +++++++------------ 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 4f09ea1896643..6e75478d52afd 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5239,6 +5239,17 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return ConstantInt::get(ReturnType, APInt::getMinValue(BitWidth)); } + // For 4 commuted variants of each intrinsic: + // max (max X, Y), X --> max X, Y + if (auto *MinMax0 = dyn_cast(Op0)) + if (MinMax0->getIntrinsicID() == IID && + (MinMax0->getOperand(0) == Op1 || MinMax0->getOperand(1) == Op1)) + return MinMax0; + if (auto *MinMax1 = dyn_cast(Op1)) + if (MinMax1->getIntrinsicID() == IID && + (MinMax1->getOperand(0) == Op0 || MinMax1->getOperand(1) == Op0)) + return MinMax1; + const APInt *C; if (!match(Op1, m_APIntAllowUndef(C))) break; diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index 6fcc6133aca3b..c0064ab0a423a 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -271,8 +271,7 @@ define <2 x i8> @umin_maxval_partial_undef(<2 x i8> %x) { define i8 @umax_umax(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_umax( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %m) @@ -282,8 +281,7 @@ define i8 @umax_umax(i8 %x, i8 %y) { define i8 @umax_umax_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_umax_commute1( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umax.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %m) @@ -293,8 +291,7 @@ define i8 @umax_umax_commute1(i8 %x, i8 %y) { define i8 @umax_umax_commute2(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_umax_commute2( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.umax.i8(i8 %m, i8 %x) @@ -304,8 +301,7 @@ define i8 @umax_umax_commute2(i8 %x, i8 %y) { define <2 x i8> @umax_umax_commute3(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @umax_umax_commute3( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[M]], <2 x i8> [[X]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %y, <2 x i8> %x) %m2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %m, <2 x i8> %x) @@ -315,8 +311,7 @@ define <2 x i8> @umax_umax_commute3(<2 x i8> %x, <2 x i8> %y) { define i8 @umin_umin(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_umin( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umin.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %m) @@ -326,8 +321,7 @@ define i8 @umin_umin(i8 %x, i8 %y) { define i8 @umin_umin_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_umin_commute1( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umin.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %m) @@ -337,8 +331,7 @@ define i8 @umin_umin_commute1(i8 %x, i8 %y) { define <2 x i8> @umin_umin_commute2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @umin_umin_commute2( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[M]], <2 x i8> [[X]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> %y) %m2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %m, <2 x i8> %x) @@ -348,8 +341,7 @@ define <2 x i8> @umin_umin_commute2(<2 x i8> %x, <2 x i8> %y) { define i8 @umin_umin_commute3(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_umin_commute3( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umin.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.umin.i8(i8 %m, i8 %x) @@ -359,8 +351,7 @@ define i8 @umin_umin_commute3(i8 %x, i8 %y) { define i8 @smax_smax(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_smax( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.smax.i8(i8 %x, i8 %m) @@ -370,8 +361,7 @@ define i8 @smax_smax(i8 %x, i8 %y) { define <2 x i8> @smax_smax_commute1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @smax_smax_commute1( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X]], <2 x i8> [[M]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %y, <2 x i8> %x) %m2 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %x, <2 x i8> %m) @@ -381,8 +371,7 @@ define <2 x i8> @smax_smax_commute1(<2 x i8> %x, <2 x i8> %y) { define i8 @smax_smax_commute2(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_smax_commute2( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.smax.i8(i8 %m, i8 %x) @@ -392,8 +381,7 @@ define i8 @smax_smax_commute2(i8 %x, i8 %y) { define i8 @smax_smax_commute3(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_smax_commute3( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smax.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.smax.i8(i8 %m, i8 %x) @@ -403,8 +391,7 @@ define i8 @smax_smax_commute3(i8 %x, i8 %y) { define <2 x i8> @smin_smin(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @smin_smin( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X]], <2 x i8> [[M]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> %y) %m2 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> %m) @@ -414,8 +401,7 @@ define <2 x i8> @smin_smin(<2 x i8> %x, <2 x i8> %y) { define i8 @smin_smin_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @smin_smin_commute1( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smin.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.smin.i8(i8 %x, i8 %m) @@ -425,8 +411,7 @@ define i8 @smin_smin_commute1(i8 %x, i8 %y) { define i8 @smin_smin_commute2(i8 %x, i8 %y) { ; CHECK-LABEL: @smin_smin_commute2( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smin.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) @@ -436,8 +421,7 @@ define i8 @smin_smin_commute2(i8 %x, i8 %y) { define i8 @smin_smin_commute3(i8 %x, i8 %y) { ; CHECK-LABEL: @smin_smin_commute3( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smin.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) From e20223672100ed4826827412b80a605c759538da Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 16:55:16 +0100 Subject: [PATCH 131/600] [IR] Add IRBuilderBase::CreateVectorSplat(ElementCount EC) variant As discussed on D81500, this adds a more general ElementCount variant of the build helper and converts the (non-scalable) unsigned NumElts variant to use it internally. --- llvm/include/llvm/IR/IRBuilder.h | 4 ++++ llvm/lib/IR/IRBuilder.cpp | 13 +++++++++---- llvm/unittests/Analysis/VectorUtilsTest.cpp | 3 +++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index ffec4ff64ca66..d467789132aca 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2484,6 +2484,10 @@ class IRBuilderBase { /// NumElts elements. Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = ""); + /// Return a vector value that contains \arg V broadcasted to \p + /// EC elements. + Value *CreateVectorSplat(ElementCount EC, Value *V, const Twine &Name = ""); + /// Return a value that has been extracted from a larger integer type. Value *CreateExtractInteger(const DataLayout &DL, Value *From, IntegerType *ExtractedTy, uint64_t Offset, diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 1fffce015f707..f223f2c6a2b3d 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -996,17 +996,22 @@ Value *IRBuilderBase::CreateStripInvariantGroup(Value *Ptr) { Value *IRBuilderBase::CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name) { - assert(NumElts > 0 && "Cannot splat to an empty vector!"); + ElementCount EC(NumElts, false); + return CreateVectorSplat(EC, V, Name); +} + +Value *IRBuilderBase::CreateVectorSplat(ElementCount EC, Value *V, + const Twine &Name) { + assert(EC.Min > 0 && "Cannot splat to an empty vector!"); // First insert it into an undef vector so we can shuffle it. Type *I32Ty = getInt32Ty(); - Value *Undef = UndefValue::get(FixedVectorType::get(V->getType(), NumElts)); + Value *Undef = UndefValue::get(VectorType::get(V->getType(), EC)); V = CreateInsertElement(Undef, V, ConstantInt::get(I32Ty, 0), Name + ".splatinsert"); // Shuffle the value across the desired number of elements. - Value *Zeros = - ConstantAggregateZero::get(FixedVectorType::get(I32Ty, NumElts)); + Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32Ty, EC)); return CreateShuffleVector(V, Undef, Zeros, Name + ".splat"); } diff --git a/llvm/unittests/Analysis/VectorUtilsTest.cpp b/llvm/unittests/Analysis/VectorUtilsTest.cpp index 69e5285e87312..731ebdfe16f52 100644 --- a/llvm/unittests/Analysis/VectorUtilsTest.cpp +++ b/llvm/unittests/Analysis/VectorUtilsTest.cpp @@ -93,6 +93,9 @@ TEST_F(BasicTest, isSplat) { Value *SplatC = IRB.CreateVectorSplat(5, ScalarC); EXPECT_TRUE(isSplatValue(SplatC)); + Value *SplatC_SVE = IRB.CreateVectorSplat(ElementCount(5, true), ScalarC); + EXPECT_TRUE(isSplatValue(SplatC_SVE)); + // FIXME: Constant splat analysis does not allow undef elements. Constant *SplatWithUndefC = ConstantVector::get({ScalarC, UndefScalar}); EXPECT_FALSE(isSplatValue(SplatWithUndefC)); From 4091413c0047b58853b3f62dd5f36c836f75330d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Sun, 2 Aug 2020 16:57:02 +0000 Subject: [PATCH 132/600] Remove debug flags from test (NFC) --- mlir/test/mlir-cuda-runner/two-modules.mlir | 2 +- mlir/test/mlir-rocm-runner/two-modules.mlir | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/test/mlir-cuda-runner/two-modules.mlir b/mlir/test/mlir-cuda-runner/two-modules.mlir index ef4dd0c48b8df..9bdda2ae9c667 100644 --- a/mlir/test/mlir-cuda-runner/two-modules.mlir +++ b/mlir/test/mlir-cuda-runner/two-modules.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-cuda-runner %s --print-ir-after-all --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s --dump-input=always +// RUN: mlir-cuda-runner %s --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s // CHECK: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] func @main() { diff --git a/mlir/test/mlir-rocm-runner/two-modules.mlir b/mlir/test/mlir-rocm-runner/two-modules.mlir index 8aa62eda73992..d6b92229b5856 100644 --- a/mlir/test/mlir-rocm-runner/two-modules.mlir +++ b/mlir/test/mlir-rocm-runner/two-modules.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-rocm-runner %s --print-ir-after-all --shared-libs=%rocm_wrapper_library_dir/librocm-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s --dump-input=always +// RUN: mlir-rocm-runner %s --shared-libs=%rocm_wrapper_library_dir/librocm-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s // CHECK: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] func @main() { From e7a8ee00e6c3b20fc04792db1acf9d5324a1b7bb Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 2 Aug 2020 18:11:01 +0100 Subject: [PATCH 133/600] [AMDGPU] Regenerate tests to fix whitespace indentations Noticed while updating D77804 --- llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 216 +++++++-------- llvm/test/CodeGen/AMDGPU/fshr.ll | 310 +++++++++++----------- 2 files changed, 263 insertions(+), 263 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 7ea072bffecb5..f520b4a8fd8f4 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -636,81 +636,81 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1) define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { ; SI-LABEL: load_v4i8_to_v4f32_2_uses: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s11, 0xf000 -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: s_mov_b32 s3, s11 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_dword v4, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: s_mov_b32 s10, -1 -; SI-NEXT: s_movk_i32 s0, 0xff -; SI-NEXT: s_mov_b32 s6, s10 -; SI-NEXT: s_mov_b32 s7, s11 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; SI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 -; SI-NEXT: v_and_b32_e32 v7, 0xff00, v4 -; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 -; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 -; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 -; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_and_b32_e32 v0, s0, v4 -; SI-NEXT: v_add_i32_e32 v2, vcc, 9, v5 -; SI-NEXT: v_or_b32_e32 v0, v7, v0 -; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v6 -; SI-NEXT: v_and_b32_e32 v2, s0, v2 -; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0 -; SI-NEXT: v_or_b32_e32 v1, v1, v2 -; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NEXT: v_or_b32_e32 v0, v1, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0 -; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 -; SI-NEXT: s_endpgm +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_load_dword v4, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_movk_i32 s0, 0xff +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; SI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 +; SI-NEXT: v_and_b32_e32 v7, 0xff00, v4 +; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 +; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 +; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 +; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 +; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_and_b32_e32 v0, s0, v4 +; SI-NEXT: v_add_i32_e32 v2, vcc, 9, v5 +; SI-NEXT: v_or_b32_e32 v0, v7, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v6 +; SI-NEXT: v_and_b32_e32 v2, s0, v2 +; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0 +; SI-NEXT: v_or_b32_e32 v1, v1, v2 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0 +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm ; ; VI-LABEL: load_v4i8_to_v4f32_2_uses: ; VI: ; %bb.0: -; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x2c -; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_mov_b32 s11, 0xf000 -; VI-NEXT: s_mov_b32 s10, -1 -; VI-NEXT: v_mov_b32_e32 v5, 9 -; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 -; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: flat_load_dword v4, v[0:1] -; VI-NEXT: s_mov_b32 s6, s10 -; VI-NEXT: s_mov_b32 s7, s11 -; VI-NEXT: s_movk_i32 s0, 0x900 -; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 -; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 -; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 -; VI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 -; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 -; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; VI-NEXT: v_and_b32_e32 v7, 0xffffff00, v4 -; VI-NEXT: v_add_u16_e32 v8, 9, v4 -; VI-NEXT: v_add_u16_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v6 -; VI-NEXT: v_or_b32_sdwa v0, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: v_add_u16_e32 v0, s0, v0 -; VI-NEXT: v_add_u16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 -; VI-NEXT: s_endpgm +; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x2c +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: v_mov_b32_e32 v5, 9 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: flat_load_dword v4, v[0:1] +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: s_movk_i32 s0, 0x900 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 +; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 +; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 +; VI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 +; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 +; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; VI-NEXT: v_and_b32_e32 v7, 0xffffff00, v4 +; VI-NEXT: v_add_u16_e32 v8, 9, v4 +; VI-NEXT: v_add_u16_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v6 +; VI-NEXT: v_or_b32_sdwa v0, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: v_add_u16_e32 v0, s0, v0 +; VI-NEXT: v_add_u16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v0, v0, v1 +; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x %load = load <4 x i8>, <4 x i8> addrspace(1)* %in.ptr, align 4 @@ -725,42 +725,42 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind { ; SI-LABEL: load_v7i8_to_v7f32: ; SI: ; %bb.0: -; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s2, 0 -; SI-NEXT: s_mov_b32 s3, s7 -; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_load_ubyte v2, v[0:1], s[0:3], 0 addr64 -; SI-NEXT: buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1 -; SI-NEXT: buffer_load_ubyte v6, v[0:1], s[0:3], 0 addr64 offset:2 -; SI-NEXT: buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:3 -; SI-NEXT: buffer_load_ubyte v7, v[0:1], s[0:3], 0 addr64 offset:4 -; SI-NEXT: buffer_load_ubyte v5, v[0:1], s[0:3], 0 addr64 offset:5 -; SI-NEXT: buffer_load_ubyte v8, v[0:1], s[0:3], 0 addr64 offset:6 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_waitcnt vmcnt(6) -; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v2 -; SI-NEXT: s_waitcnt vmcnt(5) -; SI-NEXT: v_cvt_f32_ubyte2_e32 v1, v3 -; SI-NEXT: s_waitcnt vmcnt(3) -; SI-NEXT: v_lshlrev_b32_e32 v9, 8, v4 -; SI-NEXT: v_or_b32_e32 v3, v9, v6 -; SI-NEXT: s_waitcnt vmcnt(1) -; SI-NEXT: v_cvt_f32_ubyte2_e32 v5, v5 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v8 -; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:24 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v3 -; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v7 -; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v2 -; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v2 -; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16 -; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; SI-NEXT: s_endpgm +; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s2, 0 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; SI-NEXT: v_mov_b32_e32 v1, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: buffer_load_ubyte v2, v[0:1], s[0:3], 0 addr64 +; SI-NEXT: buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1 +; SI-NEXT: buffer_load_ubyte v6, v[0:1], s[0:3], 0 addr64 offset:2 +; SI-NEXT: buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:3 +; SI-NEXT: buffer_load_ubyte v7, v[0:1], s[0:3], 0 addr64 offset:4 +; SI-NEXT: buffer_load_ubyte v5, v[0:1], s[0:3], 0 addr64 offset:5 +; SI-NEXT: buffer_load_ubyte v8, v[0:1], s[0:3], 0 addr64 offset:6 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt vmcnt(6) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v2 +; SI-NEXT: s_waitcnt vmcnt(5) +; SI-NEXT: v_cvt_f32_ubyte2_e32 v1, v3 +; SI-NEXT: s_waitcnt vmcnt(3) +; SI-NEXT: v_lshlrev_b32_e32 v9, 8, v4 +; SI-NEXT: v_or_b32_e32 v3, v9, v6 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_ubyte2_e32 v5, v5 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v8 +; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:24 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v3 +; SI-NEXT: v_cvt_f32_ubyte0_e32 v4, v7 +; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v2 +; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v2 +; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NEXT: s_endpgm ; ; VI-LABEL: load_v7i8_to_v7f32: ; VI: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll index 444421443b4f2..bacbfcb8f500d 100644 --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -1210,167 +1210,167 @@ define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) { define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) { ; SI-LABEL: v_fshr_v2i24: ; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 -; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 -; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 -; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 -; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:12 -; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 -; SI-NEXT: s_mov_b32 s4, 0xffffff -; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab -; SI-NEXT: v_add_i32_e32 v7, vcc, 3, v0 -; SI-NEXT: v_add_i32_e32 v8, vcc, 4, v0 -; SI-NEXT: v_add_i32_e32 v9, vcc, 5, v0 -; SI-NEXT: v_add_i32_e32 v10, vcc, 2, v0 -; SI-NEXT: s_waitcnt vmcnt(5) -; SI-NEXT: v_and_b32_e32 v14, s4, v1 -; SI-NEXT: s_waitcnt vmcnt(4) -; SI-NEXT: v_and_b32_e32 v2, s4, v2 -; SI-NEXT: v_mul_hi_u32 v12, v2, s5 -; SI-NEXT: s_waitcnt vmcnt(3) -; SI-NEXT: v_and_b32_e32 v3, s4, v3 -; SI-NEXT: v_mul_hi_u32 v13, v3, s5 -; SI-NEXT: s_waitcnt vmcnt(1) -; SI-NEXT: v_and_b32_e32 v11, s4, v5 -; SI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 -; SI-NEXT: v_mul_lo_u32 v12, v12, 24 -; SI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 -; SI-NEXT: v_mul_lo_u32 v13, v13, 24 -; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 -; SI-NEXT: v_lshr_b32_e32 v12, v14, v2 -; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v13 -; SI-NEXT: v_sub_i32_e32 v13, vcc, 24, v2 -; SI-NEXT: v_sub_i32_e32 v14, vcc, 24, v3 -; SI-NEXT: v_and_b32_e32 v13, s4, v13 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_lshl_b32_e32 v6, v6, v13 -; SI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 -; SI-NEXT: v_lshr_b32_e32 v11, v11, v3 -; SI-NEXT: v_lshl_b32_e32 v4, v4, v14 -; SI-NEXT: v_or_b32_e32 v6, v6, v12 -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; SI-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; SI-NEXT: v_or_b32_e32 v4, v4, v11 -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; SI-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc -; SI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen -; SI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; SI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen -; SI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen -; SI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; SI-NEXT: s_setpc_b64 s[30:31] +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 +; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 +; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 +; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 +; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:12 +; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 +; SI-NEXT: s_mov_b32 s4, 0xffffff +; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab +; SI-NEXT: v_add_i32_e32 v7, vcc, 3, v0 +; SI-NEXT: v_add_i32_e32 v8, vcc, 4, v0 +; SI-NEXT: v_add_i32_e32 v9, vcc, 5, v0 +; SI-NEXT: v_add_i32_e32 v10, vcc, 2, v0 +; SI-NEXT: s_waitcnt vmcnt(5) +; SI-NEXT: v_and_b32_e32 v14, s4, v1 +; SI-NEXT: s_waitcnt vmcnt(4) +; SI-NEXT: v_and_b32_e32 v2, s4, v2 +; SI-NEXT: v_mul_hi_u32 v12, v2, s5 +; SI-NEXT: s_waitcnt vmcnt(3) +; SI-NEXT: v_and_b32_e32 v3, s4, v3 +; SI-NEXT: v_mul_hi_u32 v13, v3, s5 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_and_b32_e32 v11, s4, v5 +; SI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 +; SI-NEXT: v_mul_lo_u32 v12, v12, 24 +; SI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 +; SI-NEXT: v_mul_lo_u32 v13, v13, 24 +; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 +; SI-NEXT: v_lshr_b32_e32 v12, v14, v2 +; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v13 +; SI-NEXT: v_sub_i32_e32 v13, vcc, 24, v2 +; SI-NEXT: v_sub_i32_e32 v14, vcc, 24, v3 +; SI-NEXT: v_and_b32_e32 v13, s4, v13 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_lshl_b32_e32 v6, v6, v13 +; SI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 +; SI-NEXT: v_lshr_b32_e32 v11, v11, v3 +; SI-NEXT: v_lshl_b32_e32 v4, v4, v14 +; SI-NEXT: v_or_b32_e32 v6, v6, v12 +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; SI-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; SI-NEXT: v_or_b32_e32 v4, v4, v11 +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; SI-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; SI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen +; SI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen +; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; SI-NEXT: s_waitcnt expcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; SI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen +; SI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen +; SI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_fshr_v2i24: ; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 -; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 -; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:12 -; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 -; VI-NEXT: s_mov_b32 s4, 0xffffff -; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab -; VI-NEXT: v_add_u32_e32 v7, vcc, 3, v0 -; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0 -; VI-NEXT: v_add_u32_e32 v9, vcc, 5, v0 -; VI-NEXT: v_add_u32_e32 v10, vcc, 2, v0 -; VI-NEXT: s_waitcnt vmcnt(5) -; VI-NEXT: v_and_b32_e32 v14, s4, v1 -; VI-NEXT: s_waitcnt vmcnt(4) -; VI-NEXT: v_and_b32_e32 v2, s4, v2 -; VI-NEXT: v_mul_hi_u32 v12, v2, s5 -; VI-NEXT: s_waitcnt vmcnt(3) -; VI-NEXT: v_and_b32_e32 v3, s4, v3 -; VI-NEXT: v_mul_hi_u32 v13, v3, s5 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_and_b32_e32 v11, s4, v5 -; VI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 -; VI-NEXT: v_mul_lo_u32 v12, v12, 24 -; VI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 -; VI-NEXT: v_mul_lo_u32 v13, v13, 24 -; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v12 -; VI-NEXT: v_lshrrev_b32_e32 v12, v2, v14 -; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v13 -; VI-NEXT: v_sub_u32_e32 v13, vcc, 24, v2 -; VI-NEXT: v_sub_u32_e32 v14, vcc, 24, v3 -; VI-NEXT: v_and_b32_e32 v13, s4, v13 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v6, v13, v6 -; VI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 -; VI-NEXT: v_lshrrev_b32_e32 v11, v3, v11 -; VI-NEXT: v_lshlrev_b32_e32 v4, v14, v4 -; VI-NEXT: v_or_b32_e32 v6, v6, v12 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; VI-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; VI-NEXT: v_or_b32_e32 v4, v4, v11 -; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; VI-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc -; VI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen -; VI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 -; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 -; VI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen -; VI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen -; VI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_setpc_b64 s[30:31] +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 +; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 +; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 +; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 +; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:12 +; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 +; VI-NEXT: s_mov_b32 s4, 0xffffff +; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab +; VI-NEXT: v_add_u32_e32 v7, vcc, 3, v0 +; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0 +; VI-NEXT: v_add_u32_e32 v9, vcc, 5, v0 +; VI-NEXT: v_add_u32_e32 v10, vcc, 2, v0 +; VI-NEXT: s_waitcnt vmcnt(5) +; VI-NEXT: v_and_b32_e32 v14, s4, v1 +; VI-NEXT: s_waitcnt vmcnt(4) +; VI-NEXT: v_and_b32_e32 v2, s4, v2 +; VI-NEXT: v_mul_hi_u32 v12, v2, s5 +; VI-NEXT: s_waitcnt vmcnt(3) +; VI-NEXT: v_and_b32_e32 v3, s4, v3 +; VI-NEXT: v_mul_hi_u32 v13, v3, s5 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_and_b32_e32 v11, s4, v5 +; VI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 +; VI-NEXT: v_mul_lo_u32 v12, v12, 24 +; VI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 +; VI-NEXT: v_mul_lo_u32 v13, v13, 24 +; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v12 +; VI-NEXT: v_lshrrev_b32_e32 v12, v2, v14 +; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v13 +; VI-NEXT: v_sub_u32_e32 v13, vcc, 24, v2 +; VI-NEXT: v_sub_u32_e32 v14, vcc, 24, v3 +; VI-NEXT: v_and_b32_e32 v13, s4, v13 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_lshlrev_b32_e32 v6, v13, v6 +; VI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 +; VI-NEXT: v_lshrrev_b32_e32 v11, v3, v11 +; VI-NEXT: v_lshlrev_b32_e32 v4, v14, v4 +; VI-NEXT: v_or_b32_e32 v6, v6, v12 +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; VI-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; VI-NEXT: v_or_b32_e32 v4, v4, v11 +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; VI-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; VI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen +; VI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen +; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 +; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; VI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen +; VI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen +; VI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_fshr_v2i24: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 -; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 -; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 -; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 -; GFX9-NEXT: s_mov_b32 s4, 0xffffff -; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab -; GFX9-NEXT: s_waitcnt vmcnt(5) -; GFX9-NEXT: v_and_b32_e32 v1, s4, v1 -; GFX9-NEXT: v_mul_hi_u32 v6, v1, s5 -; GFX9-NEXT: s_waitcnt vmcnt(4) -; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 -; GFX9-NEXT: v_mul_hi_u32 v7, v2, s5 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_and_b32_e32 v9, s4, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 -; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v10, s4, v8 -; GFX9-NEXT: v_sub_u32_e32 v1, v1, v6 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, v1, v10 -; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7 -; GFX9-NEXT: v_sub_u32_e32 v7, 24, v1 -; GFX9-NEXT: v_sub_u32_e32 v10, 24, v2 -; GFX9-NEXT: v_and_b32_e32 v7, s4, v7 -; GFX9-NEXT: v_lshrrev_b32_e32 v9, v2, v9 -; GFX9-NEXT: v_and_b32_e32 v10, 0xffffff, v10 -; GFX9-NEXT: v_lshl_or_b32 v5, v5, v7, v6 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v8, vcc -; GFX9-NEXT: v_lshl_or_b32 v3, v3, v10, v9 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v2 -; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 -; GFX9-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:4 -; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 -; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2 -; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 +; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 +; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 +; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 +; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 +; GFX9-NEXT: s_mov_b32 s4, 0xffffff +; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab +; GFX9-NEXT: s_waitcnt vmcnt(5) +; GFX9-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX9-NEXT: v_mul_hi_u32 v6, v1, s5 +; GFX9-NEXT: s_waitcnt vmcnt(4) +; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 +; GFX9-NEXT: v_mul_hi_u32 v7, v2, s5 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: v_and_b32_e32 v9, s4, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 +; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v10, s4, v8 +; GFX9-NEXT: v_sub_u32_e32 v1, v1, v6 +; GFX9-NEXT: v_lshrrev_b32_e32 v6, v1, v10 +; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7 +; GFX9-NEXT: v_sub_u32_e32 v7, 24, v1 +; GFX9-NEXT: v_sub_u32_e32 v10, 24, v2 +; GFX9-NEXT: v_and_b32_e32 v7, s4, v7 +; GFX9-NEXT: v_lshrrev_b32_e32 v9, v2, v9 +; GFX9-NEXT: v_and_b32_e32 v10, 0xffffff, v10 +; GFX9-NEXT: v_lshl_or_b32 v5, v5, v7, v6 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v8, vcc +; GFX9-NEXT: v_lshl_or_b32 v3, v3, v10, v9 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v2 +; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 +; GFX9-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:4 +; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 +; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2 +; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_fshr_v2i24: ; R600: ; %bb.0: From a258338d627170f204c40ebe93ea7fb18c7c1197 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 2 Aug 2020 00:20:11 -0700 Subject: [PATCH 134/600] [X86] Add test cases for missed opportunity to use a byte test instruction instead of an xor with 0 in parity patterns. If the input to the ctpop fits in 8 bits, we can use the parity flag from a TEST instruction, but we're currently XORing with 0. --- llvm/test/CodeGen/X86/parity.ll | 70 +++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll index 8637058e06807..68f2b8b1b5538 100644 --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -181,5 +181,75 @@ define i8 @parity_32_trunc(i32 %x) { ret i8 %3 } +define i32 @parity_8_zext(i8 %x) { +; X86-NOPOPCNT-LABEL: parity_8_zext: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb $0, %cl +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_8_zext: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb $0, %dil +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_8_zext: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_8_zext: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: movzbl %dil, %eax +; X64-POPCNT-NEXT: popcntl %eax, %eax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: retq + %a = zext i8 %x to i32 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + +define i32 @parity_8_mask(i32 %x) { +; X86-NOPOPCNT-LABEL: parity_8_mask: +; X86-NOPOPCNT: # %bb.0: +; X86-NOPOPCNT-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NOPOPCNT-NEXT: xorl %eax, %eax +; X86-NOPOPCNT-NEXT: xorb $0, %cl +; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: retl +; +; X64-NOPOPCNT-LABEL: parity_8_mask: +; X64-NOPOPCNT: # %bb.0: +; X64-NOPOPCNT-NEXT: xorl %eax, %eax +; X64-NOPOPCNT-NEXT: xorb $0, %dil +; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: retq +; +; X86-POPCNT-LABEL: parity_8_mask: +; X86-POPCNT: # %bb.0: +; X86-POPCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: popcntl %eax, %eax +; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: retl +; +; X64-POPCNT-LABEL: parity_8_mask: +; X64-POPCNT: # %bb.0: +; X64-POPCNT-NEXT: movzbl %dil, %eax +; X64-POPCNT-NEXT: popcntl %eax, %eax +; X64-POPCNT-NEXT: andl $1, %eax +; X64-POPCNT-NEXT: retq + %a = and i32 %x, 255 + %b = tail call i32 @llvm.ctpop.i32(i32 %a) + %c = and i32 %b, 1 + ret i32 %c +} + declare i32 @llvm.ctpop.i32(i32 %x) declare i64 @llvm.ctpop.i64(i64 %x) From 64516ec7c1298a4cb16980db49c2f9466f0f3ab5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 2 Aug 2020 09:58:55 -0700 Subject: [PATCH 135/600] [X86] Use parity flag from byte test/cmp instruction for __builtin_parity when input fits in 8 bits. If the upper bits of the __builtin_parity idiom are known to be 0 we were previously emitting an xor with 0 to get the parity flag. But we can use cmp/test instead which may expose opportunities for load folding or combining an AND. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 ++ llvm/test/CodeGen/X86/parity.ll | 10 +- .../CodeGen/X86/vector-reduce-xor-bool.ll | 162 ++++++++---------- 3 files changed, 87 insertions(+), 96 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 93852e13b7d08..ff59f28c8b6d2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42773,6 +42773,17 @@ static SDValue combineParity(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); SDValue X = N0.getOperand(0); + // Special case. If the input fits in 8-bits we can use a single 8-bit TEST. + if (DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) { + X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); + SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X, + DAG.getConstant(0, DL, MVT::i8)); + // Copy the inverse of the parity flag into a register with setcc. + SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); + // Extend or truncate to the original type. + return DAG.getZExtOrTrunc(Setnp, DL, N->getValueType(0)); + } + // If this is 64-bit, its always best to xor the two 32-bit pieces together // even if we have popcnt. if (VT == MVT::i64) { diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll index 68f2b8b1b5538..869ee552c67da 100644 --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -184,16 +184,15 @@ define i8 @parity_32_trunc(i32 %x) { define i32 @parity_8_zext(i8 %x) { ; X86-NOPOPCNT-LABEL: parity_8_zext: ; X86-NOPOPCNT: # %bb.0: -; X86-NOPOPCNT-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOPOPCNT-NEXT: xorl %eax, %eax -; X86-NOPOPCNT-NEXT: xorb $0, %cl +; X86-NOPOPCNT-NEXT: cmpb $0, {{[0-9]+}}(%esp) ; X86-NOPOPCNT-NEXT: setnp %al ; X86-NOPOPCNT-NEXT: retl ; ; X64-NOPOPCNT-LABEL: parity_8_zext: ; X64-NOPOPCNT: # %bb.0: ; X64-NOPOPCNT-NEXT: xorl %eax, %eax -; X64-NOPOPCNT-NEXT: xorb $0, %dil +; X64-NOPOPCNT-NEXT: testb %dil, %dil ; X64-NOPOPCNT-NEXT: setnp %al ; X64-NOPOPCNT-NEXT: retq ; @@ -219,16 +218,15 @@ define i32 @parity_8_zext(i8 %x) { define i32 @parity_8_mask(i32 %x) { ; X86-NOPOPCNT-LABEL: parity_8_mask: ; X86-NOPOPCNT: # %bb.0: -; X86-NOPOPCNT-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOPOPCNT-NEXT: xorl %eax, %eax -; X86-NOPOPCNT-NEXT: xorb $0, %cl +; X86-NOPOPCNT-NEXT: cmpb $0, {{[0-9]+}}(%esp) ; X86-NOPOPCNT-NEXT: setnp %al ; X86-NOPOPCNT-NEXT: retl ; ; X64-NOPOPCNT-LABEL: parity_8_mask: ; X64-NOPOPCNT: # %bb.0: ; X64-NOPOPCNT-NEXT: xorl %eax, %eax -; X64-NOPOPCNT-NEXT: xorb $0, %dil +; X64-NOPOPCNT-NEXT: testb %dil, %dil ; X64-NOPOPCNT-NEXT: setnp %al ; X64-NOPOPCNT-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll index 627faa02d2b7d..8e50cfc4e4b29 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll @@ -16,7 +16,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; SSE: # %bb.0: ; SSE-NEXT: psllq $63, %xmm0 ; SSE-NEXT: movmskpd %xmm0, %eax -; SSE-NEXT: xorb $0, %al +; SSE-NEXT: testb %al, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -24,7 +24,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX: # %bb.0: ; AVX-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: xorb $0, %al +; AVX-NEXT: testb %al, %al ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; @@ -33,8 +33,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: andl $3, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb $3, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -44,8 +43,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: andl $3, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb $3, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -55,8 +53,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: andl $3, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <2 x i64> %0 to <2 x i1> @@ -69,7 +66,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; SSE: # %bb.0: ; SSE-NEXT: pslld $31, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: xorb $0, %al +; SSE-NEXT: testb %al, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -77,7 +74,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX: # %bb.0: ; AVX-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: xorb $0, %al +; AVX-NEXT: testb %al, %al ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; @@ -86,8 +83,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: andl $15, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb $15, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -97,8 +93,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: andl $15, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb $15, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -108,8 +103,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: andl $15, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <4 x i32> %0 to <4 x i1> @@ -124,7 +118,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; SSE2-NEXT: psllw $15, %xmm0 ; SSE2-NEXT: packsswb %xmm0, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: xorb $0, %al +; SSE2-NEXT: testb %al, %al ; SSE2-NEXT: setnp %al ; SSE2-NEXT: retq ; @@ -134,7 +128,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; SSE41-NEXT: psllw $15, %xmm0 ; SSE41-NEXT: packsswb %xmm0, %xmm0 ; SSE41-NEXT: pmovmskb %xmm0, %eax -; SSE41-NEXT: xorb $0, %al +; SSE41-NEXT: testb %al, %al ; SSE41-NEXT: setnp %al ; SSE41-NEXT: retq ; @@ -144,7 +138,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: xorb $0, %al +; AVX-NEXT: testb %al, %al ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; @@ -154,7 +148,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -164,7 +158,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX512BW-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -174,7 +168,7 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) { ; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512VL-NEXT: vpmovb2m %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = trunc <8 x i8> %0 to <8 x i1> @@ -223,7 +217,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; SSE-NEXT: pslld $31, %xmm0 ; SSE-NEXT: movmskps %xmm0, %eax -; SSE-NEXT: xorb $0, %al +; SSE-NEXT: testb %al, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -233,7 +227,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; AVX-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: xorb $0, %al +; AVX-NEXT: testb %al, %al ; AVX-NEXT: setnp %al ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq @@ -243,8 +237,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: andl $15, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb $15, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -254,8 +247,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: andl $15, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb $15, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -265,8 +257,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0 ; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: andl $15, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -286,7 +277,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; SSE2-NEXT: psllw $15, %xmm0 ; SSE2-NEXT: packsswb %xmm0, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: xorb $0, %al +; SSE2-NEXT: testb %al, %al ; SSE2-NEXT: setnp %al ; SSE2-NEXT: retq ; @@ -299,7 +290,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; SSE41-NEXT: psllw $15, %xmm0 ; SSE41-NEXT: packsswb %xmm0, %xmm0 ; SSE41-NEXT: pmovmskb %xmm0, %eax -; SSE41-NEXT: xorb $0, %al +; SSE41-NEXT: testb %al, %al ; SSE41-NEXT: setnp %al ; SSE41-NEXT: retq ; @@ -313,7 +304,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: xorb $0, %al +; AVX1-NEXT: testb %al, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -325,7 +316,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: xorb $0, %al +; AVX2-NEXT: testb %al, %al ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -335,7 +326,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -345,7 +336,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -355,7 +346,7 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) { ; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -556,7 +547,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; SSE2-NEXT: psllw $15, %xmm2 ; SSE2-NEXT: packsswb %xmm2, %xmm2 ; SSE2-NEXT: pmovmskb %xmm2, %eax -; SSE2-NEXT: xorb $0, %al +; SSE2-NEXT: testb %al, %al ; SSE2-NEXT: setnp %al ; SSE2-NEXT: retq ; @@ -573,7 +564,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; SSE41-NEXT: psllw $15, %xmm0 ; SSE41-NEXT: packsswb %xmm0, %xmm0 ; SSE41-NEXT: pmovmskb %xmm0, %eax -; SSE41-NEXT: xorb $0, %al +; SSE41-NEXT: testb %al, %al ; SSE41-NEXT: setnp %al ; SSE41-NEXT: retq ; @@ -590,7 +581,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: xorb $0, %al +; AVX1-NEXT: testb %al, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -605,7 +596,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: xorb $0, %al +; AVX2-NEXT: testb %al, %al ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -615,7 +606,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -625,7 +616,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -635,7 +626,7 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) { ; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -987,7 +978,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] ; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: movmskpd %xmm0, %eax -; SSE2-NEXT: xorb $0, %al +; SSE2-NEXT: testb %al, %al ; SSE2-NEXT: setnp %al ; SSE2-NEXT: retq ; @@ -996,7 +987,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 ; SSE41-NEXT: movmskpd %xmm1, %eax -; SSE41-NEXT: xorb $0, %al +; SSE41-NEXT: testb %al, %al ; SSE41-NEXT: setnp %al ; SSE41-NEXT: retq ; @@ -1005,7 +996,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: xorb $0, %al +; AVX-NEXT: testb %al, %al ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; @@ -1014,8 +1005,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: andl $3, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb $3, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1025,8 +1015,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: andl $3, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb $3, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1035,8 +1024,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: andl $3, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <2 x i64> %0, zeroinitializer @@ -1050,7 +1038,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; SSE-NEXT: pxor %xmm1, %xmm1 ; SSE-NEXT: pcmpeqd %xmm0, %xmm1 ; SSE-NEXT: movmskps %xmm1, %eax -; SSE-NEXT: xorb $0, %al +; SSE-NEXT: testb %al, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -1059,7 +1047,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: xorb $0, %al +; AVX-NEXT: testb %al, %al ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; @@ -1068,8 +1056,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: andl $15, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb $15, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1079,8 +1066,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: andl $15, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb $15, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1089,8 +1075,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: andl $15, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <4 x i32> %0, zeroinitializer @@ -1106,7 +1091,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; SSE2-NEXT: packsswb %xmm0, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: xorb $0, %al +; SSE2-NEXT: testb %al, %al ; SSE2-NEXT: setnp %al ; SSE2-NEXT: retq ; @@ -1117,7 +1102,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; SSE41-NEXT: pmovsxbw %xmm1, %xmm0 ; SSE41-NEXT: packsswb %xmm0, %xmm0 ; SSE41-NEXT: pmovmskb %xmm0, %eax -; SSE41-NEXT: xorb $0, %al +; SSE41-NEXT: testb %al, %al ; SSE41-NEXT: setnp %al ; SSE41-NEXT: retq ; @@ -1128,7 +1113,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: xorb $0, %al +; AVX-NEXT: testb %al, %al ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; @@ -1139,7 +1124,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1149,7 +1134,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1158,7 +1143,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: retq %a = icmp eq <8 x i8> %0, zeroinitializer @@ -1238,7 +1223,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; SSE2-NEXT: pand %xmm0, %xmm1 ; SSE2-NEXT: packssdw %xmm3, %xmm1 ; SSE2-NEXT: movmskps %xmm1, %eax -; SSE2-NEXT: xorb $0, %al +; SSE2-NEXT: testb %al, %al ; SSE2-NEXT: setnp %al ; SSE2-NEXT: retq ; @@ -1249,7 +1234,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; SSE41-NEXT: pcmpeqq %xmm2, %xmm0 ; SSE41-NEXT: packssdw %xmm1, %xmm0 ; SSE41-NEXT: movmskps %xmm0, %eax -; SSE41-NEXT: xorb $0, %al +; SSE41-NEXT: testb %al, %al ; SSE41-NEXT: setnp %al ; SSE41-NEXT: retq ; @@ -1261,7 +1246,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskpd %ymm0, %eax -; AVX1-NEXT: xorb $0, %al +; AVX1-NEXT: testb %al, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1271,7 +1256,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vmovmskpd %ymm0, %eax -; AVX2-NEXT: xorb $0, %al +; AVX2-NEXT: testb %al, %al ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1281,8 +1266,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: andl $15, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb $15, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1292,8 +1276,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: andl $15, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb $15, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1302,8 +1285,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: andl $15, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1321,7 +1303,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; SSE-NEXT: packssdw %xmm1, %xmm0 ; SSE-NEXT: packsswb %xmm0, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: xorb $0, %al +; SSE-NEXT: testb %al, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -1333,7 +1315,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax -; AVX1-NEXT: xorb $0, %al +; AVX1-NEXT: testb %al, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1343,7 +1325,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: xorb $0, %al +; AVX2-NEXT: testb %al, %al ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1353,7 +1335,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1363,7 +1345,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1372,7 +1354,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1584,7 +1566,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; SSE2-NEXT: packssdw %xmm3, %xmm1 ; SSE2-NEXT: packsswb %xmm1, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: xorb $0, %al +; SSE2-NEXT: testb %al, %al ; SSE2-NEXT: setnp %al ; SSE2-NEXT: retq ; @@ -1600,7 +1582,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; SSE41-NEXT: packssdw %xmm2, %xmm0 ; SSE41-NEXT: packsswb %xmm0, %xmm0 ; SSE41-NEXT: pmovmskb %xmm0, %eax -; SSE41-NEXT: xorb $0, %al +; SSE41-NEXT: testb %al, %al ; SSE41-NEXT: setnp %al ; SSE41-NEXT: retq ; @@ -1617,7 +1599,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: vmovmskps %ymm0, %eax -; AVX1-NEXT: xorb $0, %al +; AVX1-NEXT: testb %al, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1630,7 +1612,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: vmovmskps %ymm0, %eax -; AVX2-NEXT: xorb $0, %al +; AVX2-NEXT: testb %al, %al ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1639,7 +1621,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: xorb $0, %al +; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1648,7 +1630,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: xorb $0, %al +; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1657,7 +1639,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: xorb $0, %al +; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq From e6c2c9a7d15171a57c98024511eaa8885f1bd5fd Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Sun, 2 Aug 2020 22:41:02 +0200 Subject: [PATCH 136/600] [lldb] [test] Fix DW_TAG_GNU_call_site-DW_AT_low_pc.s relocation I have made the DW_FORM_ref4 relative. One could also use relocated DW_FORM_ref_addr instead. Tested with: echo 'void f(){}'|clang -o 1.o -c -Wall -g -x c -;./bin/clang -o 1 1.o ../llvm-monorepo/lldb/test/Shell/SymbolFile/DWARF/DW_TAG_GNU_call_site-DW_AT_low_pc.s;./bin/lldb --no-lldbinit ./1 -o r -o 'p p' -o exit --- .../SymbolFile/DWARF/DW_TAG_GNU_call_site-DW_AT_low_pc.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/test/Shell/SymbolFile/DWARF/DW_TAG_GNU_call_site-DW_AT_low_pc.s b/lldb/test/Shell/SymbolFile/DWARF/DW_TAG_GNU_call_site-DW_AT_low_pc.s index 29cafebaa4405..8cc1cc1ed2f95 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/DW_TAG_GNU_call_site-DW_AT_low_pc.s +++ b/lldb/test/Shell/SymbolFile/DWARF/DW_TAG_GNU_call_site-DW_AT_low_pc.s @@ -79,7 +79,7 @@ main: .uleb128 0x2 # (DIE (0x2d) DW_TAG_subprogram) # DW_AT_external .asciz "main" # DW_AT_name: "main" - .long .Ltype_int # DW_AT_type + .long .Ltype_int - .Ldebug_info0 # DW_AT_type .quad .LFB2 # DW_AT_low_pc .quad .LFE2-.LFB2 # DW_AT_high_pc .uleb128 0x1 # DW_AT_frame_base @@ -87,7 +87,7 @@ main: # DW_AT_GNU_all_call_sites .uleb128 0x3 # (DIE (0x4f) DW_TAG_GNU_call_site) .quad .LVL4 # DW_AT_low_pc - .long .Lfunc_a # DW_AT_abstract_origin + .long .Lfunc_a - .Ldebug_info0 # DW_AT_abstract_origin .uleb128 0x4 # (DIE (0x5c) DW_TAG_GNU_call_site_parameter) .uleb128 0x1 # DW_AT_location .byte 0x55 # DW_OP_reg5 @@ -111,7 +111,7 @@ main: # DW_AT_GNU_all_call_sites .uleb128 0x7 # (DIE (0x86) DW_TAG_formal_parameter) .asciz "p" # DW_AT_name - .long .Ltype_int # DW_AT_type + .long .Ltype_int - .Ldebug_info0 # DW_AT_type .long .LLST0 # DW_AT_location .byte 0 # end of children of DIE 0x6a .byte 0 # end of children of DIE 0xb From 00a0282ff8f9a790e93c19ef6fa3758e209cdbe6 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 2 Aug 2020 21:45:42 +0100 Subject: [PATCH 137/600] [Clang] Remove run-lines which use opt to run -ipconstprop. ipconstprop is going to get removed and checking opt with specific passes makes the tests more fragile. The tests retain the important checks that !callback metadata is created correctly. --- clang/test/CodeGen/callback_annotated.c | 16 +--------------- clang/test/CodeGen/callback_openmp.c | 6 +----- clang/test/CodeGen/callback_pthread_create.c | 7 +------ 3 files changed, 3 insertions(+), 26 deletions(-) diff --git a/clang/test/CodeGen/callback_annotated.c b/clang/test/CodeGen/callback_annotated.c index c5b431d5ef845..83a79c3491daf 100644 --- a/clang/test/CodeGen/callback_annotated.c +++ b/clang/test/CodeGen/callback_annotated.c @@ -1,6 +1,4 @@ -// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 -fno-experimental-new-pass-manager %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN1 -// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 -fno-experimental-new-pass-manager %s -emit-llvm -o - | FileCheck %s --check-prefix=RUN2 -// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 -fno-experimental-new-pass-manager %s -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -fno-experimental-new-pass-manager %s -emit-llvm -o - -disable-llvm-optzns | FileCheck %s --check-prefix=RUN1 // RUN1-DAG: @broker0({{[^#]*#[0-9]+}} !callback ![[cid0:[0-9]+]] __attribute__((callback(1, 2))) void *broker0(void *(*callee)(void *), void *payload) { @@ -29,22 +27,10 @@ __attribute__((callback(4, -1, a, __))) void *broker4(int a, int, int, int (*cal __attribute__((callback(4, d, 5, 2))) void *broker5(int, int, int, int (*callee)(int, int, int), int d); static void *VoidPtr2VoidPtr(void *payload) { - // RUN2: ret i8* %payload - // IPCP: ret i8* null return payload; } static int ThreeInt2Int(int a, int b, int c) { - // RUN2: define internal i32 @ThreeInt2Int(i32 %a, i32 %b, i32 %c) - // RUN2: %mul = mul nsw i32 %b, %a - // RUN2: %add = add nsw i32 %mul, %c - // RUN2: ret i32 %add - - // IPCP: define internal i32 @ThreeInt2Int(i32 %a, i32 %b, i32 %c) - // IPCP: %mul = mul nsw i32 4, %a - // IPCP: %add = add nsw i32 %mul, %c - // IPCP: ret i32 %add - return a * b + c; } diff --git a/clang/test/CodeGen/callback_openmp.c b/clang/test/CodeGen/callback_openmp.c index 2fc9dcd391f63..90e63fdb2e580 100644 --- a/clang/test/CodeGen/callback_openmp.c +++ b/clang/test/CodeGen/callback_openmp.c @@ -1,5 +1,4 @@ -// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp -O1 %s -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -fopenmp %s -emit-llvm -o - -disable-llvm-optzns | FileCheck %s // CHECK: declare !callback ![[cid:[0-9]+]] void @__kmpc_fork_call // CHECK: declare !callback ![[cid]] void @__kmpc_fork_teams @@ -15,14 +14,11 @@ void foo(int q) { #pragma omp parallel firstprivate(q, p) work1(p, q); -// IPCP: call void @work1(i32 2, i32 %{{[._a-zA-Z0-9]*}}) #pragma omp parallel for firstprivate(p, q) for (int i = 0; i < q; i++) work2(i, p); -// IPCP: call void @work2(i32 %{{[._a-zA-Z0-9]*}}, i32 2) #pragma omp target teams firstprivate(p) work12(p, p); -// IPCP: call void @work12(i32 2, i32 2) } diff --git a/clang/test/CodeGen/callback_pthread_create.c b/clang/test/CodeGen/callback_pthread_create.c index 785440030b32e..d1b01b91eac3f 100644 --- a/clang/test/CodeGen/callback_pthread_create.c +++ b/clang/test/CodeGen/callback_pthread_create.c @@ -1,5 +1,4 @@ -// RUN: %clang_cc1 -O1 %s -S -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -O1 %s -S -emit-llvm -o - | opt -ipconstprop -S | FileCheck --check-prefix=IPCP %s +// RUN: %clang_cc1 %s -S -emit-llvm -o - -disable-llvm-optzns | FileCheck %s // CHECK: declare !callback ![[cid:[0-9]+]] {{.*}}i32 @pthread_create // CHECK: ![[cid]] = !{![[cidb:[0-9]+]]} @@ -21,14 +20,10 @@ int pthread_create(pthread_t *, const pthread_attr_t *, const int GlobalVar = 0; static void *callee0(void *payload) { -// IPCP: define internal i8* @callee0 -// IPCP: ret i8* null return payload; } static void *callee1(void *payload) { -// IPCP: define internal i8* @callee1 -// IPCP: ret i8* bitcast (i32* @GlobalVar to i8*) return payload; } From 08cf49658c1da891fb2b2cb577a89a4d6d1a7adc Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Sun, 2 Aug 2020 13:56:38 -0700 Subject: [PATCH 138/600] [StackSafety, NFC] Don't insert empty objects into the map Result should be the same but it makes generateParamAccessSummary 5x faster. --- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index bbfc303aefac4..bcbe9a63c182d 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -928,7 +928,7 @@ void llvm::generateParamAccessSummary(ModuleSummaryIndex &Index) { for (auto &GVS : Index) { for (auto &GV : GVS.second.SummaryList) { FunctionSummary *FS = dyn_cast(GV.get()); - if (!FS) + if (!FS || FS->paramAccesses().empty()) continue; if (FS->isLive() && FS->isDSOLocal()) { FunctionInfo FI; From 599955eb56ebad50c12422cb6194a2da770902a0 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 2 Aug 2020 22:04:44 +0100 Subject: [PATCH 139/600] Recommit "[IPConstProp] Remove and move tests to SCCP." This reverts commit 59d6e814ce0e7b40b7cc3ab136b9af2ffab9c6f8. The cause for the revert (3 clang tests running opt -ipconstprop) was fixed by removing those lines. --- llvm/bindings/go/llvm/transforms_ipo.go | 1 - .../bindings/ocaml/transforms/ipo/ipo_ocaml.c | 6 - .../ocaml/transforms/ipo/llvm_ipo.mli | 5 - llvm/docs/Passes.rst | 9 - llvm/include/llvm-c/Transforms/IPO.h | 3 - llvm/include/llvm/InitializePasses.h | 1 - llvm/include/llvm/LinkAllPasses.h | 1 - llvm/include/llvm/Transforms/IPO.h | 6 - llvm/lib/Transforms/IPO/CMakeLists.txt | 1 - .../Transforms/IPO/IPConstantPropagation.cpp | 308 ------------------ llvm/lib/Transforms/IPO/IPO.cpp | 5 - .../Transforms/IPConstantProp/comdat-ipo.ll | 34 -- .../2008-06-09-WeakProp.ll | 2 +- .../{IPConstantProp => SCCP}/PR43857.ll | 4 +- .../arg-count-mismatch.ll | 4 +- .../arg-type-mismatch.ll | 2 +- llvm/test/Transforms/SCCP/comdat-ipo.ll | 14 +- .../{IPConstantProp => SCCP}/deadarg.ll | 2 +- .../multiple_callbacks.ll | 7 +- .../{IPConstantProp => SCCP}/naked-return.ll | 1 - .../openmp_parallel_for.ll | 4 +- .../{IPConstantProp => SCCP}/pthreads.ll | 6 +- .../{IPConstantProp => SCCP}/recursion.ll | 2 +- .../return-argument.ll | 8 +- .../return-constant.ll | 6 +- .../return-constants.ll | 18 +- .../thread_local_acs.ll | 4 +- llvm/utils/findoptdiff | 2 +- .../llvm/lib/Transforms/IPO/BUILD.gn | 1 - 29 files changed, 43 insertions(+), 424 deletions(-) delete mode 100644 llvm/lib/Transforms/IPO/IPConstantPropagation.cpp delete mode 100644 llvm/test/Transforms/IPConstantProp/comdat-ipo.ll rename llvm/test/Transforms/{IPConstantProp => SCCP}/2008-06-09-WeakProp.ll (91%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/PR43857.ll (86%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/arg-count-mismatch.ll (96%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/arg-type-mismatch.ll (92%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/deadarg.ll (77%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/multiple_callbacks.ll (96%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/naked-return.ll (97%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/openmp_parallel_for.ll (98%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/pthreads.ll (93%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/recursion.ll (90%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/return-argument.ll (91%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/return-constant.ll (91%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/return-constants.ll (70%) rename llvm/test/Transforms/{IPConstantProp => SCCP}/thread_local_acs.ll (92%) diff --git a/llvm/bindings/go/llvm/transforms_ipo.go b/llvm/bindings/go/llvm/transforms_ipo.go index 1dcb2af8bf243..8a158f208bef4 100644 --- a/llvm/bindings/go/llvm/transforms_ipo.go +++ b/llvm/bindings/go/llvm/transforms_ipo.go @@ -32,7 +32,6 @@ func (pm PassManager) AddFunctionAttrsPass() { C.LLVMAddFunctionAttrsPas func (pm PassManager) AddFunctionInliningPass() { C.LLVMAddFunctionInliningPass(pm.C) } func (pm PassManager) AddGlobalDCEPass() { C.LLVMAddGlobalDCEPass(pm.C) } func (pm PassManager) AddGlobalOptimizerPass() { C.LLVMAddGlobalOptimizerPass(pm.C) } -func (pm PassManager) AddIPConstantPropagationPass() { C.LLVMAddIPConstantPropagationPass(pm.C) } func (pm PassManager) AddPruneEHPass() { C.LLVMAddPruneEHPass(pm.C) } func (pm PassManager) AddIPSCCPPass() { C.LLVMAddIPSCCPPass(pm.C) } func (pm PassManager) AddInternalizePass(allButMain bool) { diff --git a/llvm/bindings/ocaml/transforms/ipo/ipo_ocaml.c b/llvm/bindings/ocaml/transforms/ipo/ipo_ocaml.c index 9fcaa10534f6b..c0e213714ed02 100644 --- a/llvm/bindings/ocaml/transforms/ipo/ipo_ocaml.c +++ b/llvm/bindings/ocaml/transforms/ipo/ipo_ocaml.c @@ -73,12 +73,6 @@ CAMLprim value llvm_add_global_optimizer(LLVMPassManagerRef PM) { return Val_unit; } -/* [`Module] Llvm.PassManager.t -> unit */ -CAMLprim value llvm_add_ip_constant_propagation(LLVMPassManagerRef PM) { - LLVMAddIPConstantPropagationPass(PM); - return Val_unit; -} - /* [`Module] Llvm.PassManager.t -> unit */ CAMLprim value llvm_add_prune_eh(LLVMPassManagerRef PM) { LLVMAddPruneEHPass(PM); diff --git a/llvm/bindings/ocaml/transforms/ipo/llvm_ipo.mli b/llvm/bindings/ocaml/transforms/ipo/llvm_ipo.mli index 6507c5d92c2b1..a581924c6d549 100644 --- a/llvm/bindings/ocaml/transforms/ipo/llvm_ipo.mli +++ b/llvm/bindings/ocaml/transforms/ipo/llvm_ipo.mli @@ -56,11 +56,6 @@ external add_global_optimizer : [ `Module ] Llvm.PassManager.t -> unit = "llvm_add_global_optimizer" -(** See the [llvm::createIPConstantPropagationPass] function. *) -external add_ipc_propagation - : [ `Module ] Llvm.PassManager.t -> unit - = "llvm_add_ip_constant_propagation" - (** See the [llvm::createPruneEHPass] function. *) external add_prune_eh : [ `Module ] Llvm.PassManager.t -> unit diff --git a/llvm/docs/Passes.rst b/llvm/docs/Passes.rst index e45adad98c157..13317ecc966e7 100644 --- a/llvm/docs/Passes.rst +++ b/llvm/docs/Passes.rst @@ -676,15 +676,6 @@ This pass loops over all of the functions in the input module, looking for a main function. If a main function is found, all other functions and all global variables with initializers are marked as internal. -``-ipconstprop``: Interprocedural constant propagation ------------------------------------------------------- - -This pass implements an *extremely* simple interprocedural constant propagation -pass. It could certainly be improved in many different ways, like using a -worklist. This pass makes arguments dead, but does not remove them. The -existing dead argument elimination pass should be run after this to clean up -the mess. - ``-ipsccp``: Interprocedural Sparse Conditional Constant Propagation -------------------------------------------------------------------- diff --git a/llvm/include/llvm-c/Transforms/IPO.h b/llvm/include/llvm-c/Transforms/IPO.h index cde3d24609208..3f2cadf32366b 100644 --- a/llvm/include/llvm-c/Transforms/IPO.h +++ b/llvm/include/llvm-c/Transforms/IPO.h @@ -57,9 +57,6 @@ void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM); /** See llvm::createGlobalOptimizerPass function. */ void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM); -/** See llvm::createIPConstantPropagationPass function. */ -void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM); - /** See llvm::createPruneEHPass function. */ void LLVMAddPruneEHPass(LLVMPassManagerRef PM); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index cce6a43504021..d23ecfb98ce92 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -179,7 +179,6 @@ void initializeGuardWideningLegacyPassPass(PassRegistry&); void initializeHardwareLoopsPass(PassRegistry&); void initializeHotColdSplittingLegacyPassPass(PassRegistry&); void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &); -void initializeIPCPPass(PassRegistry&); void initializeIPSCCPLegacyPassPass(PassRegistry&); void initializeIRCELegacyPassPass(PassRegistry&); void initializeIRTranslatorPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 90e2e24294d40..9d7ac2b3f3b99 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -116,7 +116,6 @@ namespace { (void) llvm::createGlobalsAAWrapperPass(); (void) llvm::createGuardWideningPass(); (void) llvm::createLoopGuardWideningPass(); - (void) llvm::createIPConstantPropagationPass(); (void) llvm::createIPSCCPPass(); (void) llvm::createInductiveRangeCheckEliminationPass(); (void) llvm::createIndVarSimplifyPass(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 28e454d3b0fc7..7b73eeaf8e45b 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -155,12 +155,6 @@ Pass *createArgumentPromotionPass(unsigned maxElements = 3); /// createOpenMPOptLegacyPass - OpenMP specific optimizations. Pass *createOpenMPOptLegacyPass(); -//===----------------------------------------------------------------------===// -/// createIPConstantPropagationPass - This pass propagates constants from call -/// sites into the bodies of functions. -/// -ModulePass *createIPConstantPropagationPass(); - //===----------------------------------------------------------------------===// /// createIPSCCPPass - This pass propagates constants from call sites into the /// bodies of functions, and keeps track of whether basic blocks are executable diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 3f30c0289e8c3..a17ef63371063 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -18,7 +18,6 @@ add_llvm_component_library(LLVMipo GlobalOpt.cpp GlobalSplit.cpp HotColdSplitting.cpp - IPConstantPropagation.cpp IPO.cpp InferFunctionAttrs.cpp InlineSimple.cpp diff --git a/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp deleted file mode 100644 index 8d05a72d68dac..0000000000000 --- a/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ /dev/null @@ -1,308 +0,0 @@ -//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass implements an _extremely_ simple interprocedural constant -// propagation pass. It could certainly be improved in many different ways, -// like using a worklist. This pass makes arguments dead, but does not remove -// them. The existing dead argument elimination pass should be run after this -// to clean up the mess. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/AbstractCallSite.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/IPO.h" -using namespace llvm; - -#define DEBUG_TYPE "ipconstprop" - -STATISTIC(NumArgumentsProped, "Number of args turned into constants"); -STATISTIC(NumReturnValProped, "Number of return values turned into constants"); - -namespace { - /// IPCP - The interprocedural constant propagation pass - /// - struct IPCP : public ModulePass { - static char ID; // Pass identification, replacement for typeid - IPCP() : ModulePass(ID) { - initializeIPCPPass(*PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override; - }; -} - -/// PropagateConstantsIntoArguments - Look at all uses of the specified -/// function. If all uses are direct call sites, and all pass a particular -/// constant in for an argument, propagate that constant in as the argument. -/// -static bool PropagateConstantsIntoArguments(Function &F) { - if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit. - - // For each argument, keep track of its constant value and whether it is a - // constant or not. The bool is driven to true when found to be non-constant. - SmallVector, 16> ArgumentConstants; - ArgumentConstants.resize(F.arg_size()); - - unsigned NumNonconstant = 0; - for (Use &U : F.uses()) { - User *UR = U.getUser(); - // Ignore blockaddress uses. - if (isa(UR)) continue; - - // If no abstract call site was created we did not understand the use, bail. - AbstractCallSite ACS(&U); - if (!ACS) - return false; - - // Mismatched argument count is undefined behavior. Simply bail out to avoid - // handling of such situations below (avoiding asserts/crashes). - unsigned NumActualArgs = ACS.getNumArgOperands(); - if (F.isVarArg() ? ArgumentConstants.size() > NumActualArgs - : ArgumentConstants.size() != NumActualArgs) - return false; - - // Check out all of the potentially constant arguments. Note that we don't - // inspect varargs here. - Function::arg_iterator Arg = F.arg_begin(); - for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) { - - // If this argument is known non-constant, ignore it. - if (ArgumentConstants[i].getInt()) - continue; - - Value *V = ACS.getCallArgOperand(i); - Constant *C = dyn_cast_or_null(V); - - // Mismatched argument type is undefined behavior. Simply bail out to avoid - // handling of such situations below (avoiding asserts/crashes). - if (C && Arg->getType() != C->getType()) - return false; - - // We can only propagate thread independent values through callbacks. - // This is different to direct/indirect call sites because for them we - // know the thread executing the caller and callee is the same. For - // callbacks this is not guaranteed, thus a thread dependent value could - // be different for the caller and callee, making it invalid to propagate. - if (C && ACS.isCallbackCall() && C->isThreadDependent()) { - // Argument became non-constant. If all arguments are non-constant now, - // give up on this function. - if (++NumNonconstant == ArgumentConstants.size()) - return false; - - ArgumentConstants[i].setInt(true); - continue; - } - - if (C && ArgumentConstants[i].getPointer() == nullptr) { - ArgumentConstants[i].setPointer(C); // First constant seen. - } else if (C && ArgumentConstants[i].getPointer() == C) { - // Still the constant value we think it is. - } else if (V == &*Arg) { - // Ignore recursive calls passing argument down. - } else { - // Argument became non-constant. If all arguments are non-constant now, - // give up on this function. - if (++NumNonconstant == ArgumentConstants.size()) - return false; - ArgumentConstants[i].setInt(true); - } - } - } - - // If we got to this point, there is a constant argument! - assert(NumNonconstant != ArgumentConstants.size()); - bool MadeChange = false; - Function::arg_iterator AI = F.arg_begin(); - for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { - // Do we have a constant argument? - if (ArgumentConstants[i].getInt() || AI->use_empty() || - (AI->hasByValAttr() && !F.onlyReadsMemory())) - continue; - - Value *V = ArgumentConstants[i].getPointer(); - if (!V) V = UndefValue::get(AI->getType()); - AI->replaceAllUsesWith(V); - ++NumArgumentsProped; - MadeChange = true; - } - return MadeChange; -} - - -// Check to see if this function returns one or more constants. If so, replace -// all callers that use those return values with the constant value. This will -// leave in the actual return values and instructions, but deadargelim will -// clean that up. -// -// Additionally if a function always returns one of its arguments directly, -// callers will be updated to use the value they pass in directly instead of -// using the return value. -static bool PropagateConstantReturn(Function &F) { - if (F.getReturnType()->isVoidTy()) - return false; // No return value. - - // We can infer and propagate the return value only when we know that the - // definition we'll get at link time is *exactly* the definition we see now. - // For more details, see GlobalValue::mayBeDerefined. - if (!F.isDefinitionExact()) - return false; - - // Don't touch naked functions. The may contain asm returning - // value we don't see, so we may end up interprocedurally propagating - // the return value incorrectly. - if (F.hasFnAttribute(Attribute::Naked)) - return false; - - // Check to see if this function returns a constant. - SmallVector RetVals; - StructType *STy = dyn_cast(F.getReturnType()); - if (STy) - for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) - RetVals.push_back(UndefValue::get(STy->getElementType(i))); - else - RetVals.push_back(UndefValue::get(F.getReturnType())); - - unsigned NumNonConstant = 0; - for (BasicBlock &BB : F) - if (ReturnInst *RI = dyn_cast(BB.getTerminator())) { - for (unsigned i = 0, e = RetVals.size(); i != e; ++i) { - // Already found conflicting return values? - Value *RV = RetVals[i]; - if (!RV) - continue; - - // Find the returned value - Value *V; - if (!STy) - V = RI->getOperand(0); - else - V = FindInsertedValue(RI->getOperand(0), i); - - if (V) { - // Ignore undefs, we can change them into anything - if (isa(V)) - continue; - - // Try to see if all the rets return the same constant or argument. - if (isa(V) || isa(V)) { - if (isa(RV)) { - // No value found yet? Try the current one. - RetVals[i] = V; - continue; - } - // Returning the same value? Good. - if (RV == V) - continue; - } - } - // Different or no known return value? Don't propagate this return - // value. - RetVals[i] = nullptr; - // All values non-constant? Stop looking. - if (++NumNonConstant == RetVals.size()) - return false; - } - } - - // If we got here, the function returns at least one constant value. Loop - // over all users, replacing any uses of the return value with the returned - // constant. - bool MadeChange = false; - for (Use &U : F.uses()) { - CallBase *CB = dyn_cast(U.getUser()); - - // Not a call instruction or a call instruction that's not calling F - // directly? - if (!CB || !CB->isCallee(&U)) - continue; - - // Call result not used? - if (CB->use_empty()) - continue; - - MadeChange = true; - - if (!STy) { - Value* New = RetVals[0]; - if (Argument *A = dyn_cast(New)) - // Was an argument returned? Then find the corresponding argument in - // the call instruction and use that. - New = CB->getArgOperand(A->getArgNo()); - CB->replaceAllUsesWith(New); - continue; - } - - for (auto I = CB->user_begin(), E = CB->user_end(); I != E;) { - Instruction *Ins = cast(*I); - - // Increment now, so we can remove the use - ++I; - - // Find the index of the retval to replace with - int index = -1; - if (ExtractValueInst *EV = dyn_cast(Ins)) - if (EV->getNumIndices() == 1) - index = *EV->idx_begin(); - - // If this use uses a specific return value, and we have a replacement, - // replace it. - if (index != -1) { - Value *New = RetVals[index]; - if (New) { - if (Argument *A = dyn_cast(New)) - // Was an argument returned? Then find the corresponding argument in - // the call instruction and use that. - New = CB->getArgOperand(A->getArgNo()); - Ins->replaceAllUsesWith(New); - Ins->eraseFromParent(); - } - } - } - } - - if (MadeChange) ++NumReturnValProped; - return MadeChange; -} - -char IPCP::ID = 0; -INITIALIZE_PASS(IPCP, "ipconstprop", - "Interprocedural constant propagation", false, false) - -ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } - -bool IPCP::runOnModule(Module &M) { - if (skipModule(M)) - return false; - - bool Changed = false; - bool LocalChange = true; - - // FIXME: instead of using smart algorithms, we just iterate until we stop - // making changes. - while (LocalChange) { - LocalChange = false; - for (Function &F : M) - if (!F.isDeclaration()) { - // Delete any klingons. - F.removeDeadConstantUsers(); - if (F.hasLocalLinkage()) - LocalChange |= PropagateConstantsIntoArguments(F); - Changed |= PropagateConstantReturn(F); - } - Changed |= LocalChange; - } - return Changed; -} diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp index d37b9236380d4..45aca100086d4 100644 --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -35,7 +35,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeGlobalOptLegacyPassPass(Registry); initializeGlobalSplitPass(Registry); initializeHotColdSplittingLegacyPassPass(Registry); - initializeIPCPPass(Registry); initializeAlwaysInlinerLegacyPassPass(Registry); initializeSimpleInlinerPass(Registry); initializeInferFunctionAttrsLegacyPassPass(Registry); @@ -104,10 +103,6 @@ void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createGlobalOptimizerPass()); } -void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createIPConstantPropagationPass()); -} - void LLVMAddPruneEHPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createPruneEHPass()); } diff --git a/llvm/test/Transforms/IPConstantProp/comdat-ipo.ll b/llvm/test/Transforms/IPConstantProp/comdat-ipo.ll deleted file mode 100644 index a19c89cb9bcf8..0000000000000 --- a/llvm/test/Transforms/IPConstantProp/comdat-ipo.ll +++ /dev/null @@ -1,34 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -ipconstprop -S | FileCheck %s - -; See PR26774 - -define i32 @baz() { -; CHECK-LABEL: @baz( -; CHECK-NEXT: ret i32 10 -; - ret i32 10 -} - -; We can const-prop @baz's return value *into* @foo, but cannot -; constprop @foo's return value into bar. - -define linkonce_odr i32 @foo() { -; CHECK-LABEL: @foo( -; CHECK-NEXT: [[VAL:%.*]] = call i32 @baz() -; CHECK-NEXT: ret i32 10 -; - - %val = call i32 @baz() - ret i32 %val -} - -define i32 @bar() { -; CHECK-LABEL: @bar( -; CHECK-NEXT: [[VAL:%.*]] = call i32 @foo() -; CHECK-NEXT: ret i32 [[VAL]] -; - - %val = call i32 @foo() - ret i32 %val -} diff --git a/llvm/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll b/llvm/test/Transforms/SCCP/2008-06-09-WeakProp.ll similarity index 91% rename from llvm/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll rename to llvm/test/Transforms/SCCP/2008-06-09-WeakProp.ll index 270115cf5ddd1..b6c5299ae7dc4 100644 --- a/llvm/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll +++ b/llvm/test/Transforms/SCCP/2008-06-09-WeakProp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -ipconstprop -S | FileCheck %s +; RUN: opt < %s -ipsccp -S | FileCheck %s ; Should not propagate the result of a weak function. ; PR2411 diff --git a/llvm/test/Transforms/IPConstantProp/PR43857.ll b/llvm/test/Transforms/SCCP/PR43857.ll similarity index 86% rename from llvm/test/Transforms/IPConstantProp/PR43857.ll rename to llvm/test/Transforms/SCCP/PR43857.ll index 0d0d14d398dea..efe4bca4f514c 100644 --- a/llvm/test/Transforms/IPConstantProp/PR43857.ll +++ b/llvm/test/Transforms/SCCP/PR43857.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -ipconstprop | FileCheck %s +; RUN: opt < %s -S -ipsccp | FileCheck %s %struct.wobble = type { i32 } %struct.zot = type { %struct.wobble, %struct.wobble, %struct.wobble } @@ -19,7 +19,7 @@ define void @baz(<8 x i32> %arg) local_unnamed_addr { ; CHECK-LABEL: @baz( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP:%.*]] = call [[STRUCT_ZOT:%.*]] @widget(<8 x i32> [[ARG:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_ZOT]] %tmp, 0, 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_ZOT]] undef, 0, 0 ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/IPConstantProp/arg-count-mismatch.ll b/llvm/test/Transforms/SCCP/arg-count-mismatch.ll similarity index 96% rename from llvm/test/Transforms/IPConstantProp/arg-count-mismatch.ll rename to llvm/test/Transforms/SCCP/arg-count-mismatch.ll index 7afe858e52d11..ba5f1a6d83f72 100644 --- a/llvm/test/Transforms/IPConstantProp/arg-count-mismatch.ll +++ b/llvm/test/Transforms/SCCP/arg-count-mismatch.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -ipconstprop -S -o - | FileCheck %s +; RUN: opt < %s -ipsccp -S -o - | FileCheck %s ; The original C source looked like this: ; @@ -53,7 +53,7 @@ define internal i16 @bar(i16 %p1, i16 %p2) { define internal i16 @vararg_prop(i16 %p1, ...) { ; CHECK-LABEL: @vararg_prop( -; CHECK-NEXT: ret i16 7 +; CHECK-NEXT: ret i16 undef ; ret i16 %p1 } diff --git a/llvm/test/Transforms/IPConstantProp/arg-type-mismatch.ll b/llvm/test/Transforms/SCCP/arg-type-mismatch.ll similarity index 92% rename from llvm/test/Transforms/IPConstantProp/arg-type-mismatch.ll rename to llvm/test/Transforms/SCCP/arg-type-mismatch.ll index ff924d73390b0..9a9da52174c2a 100644 --- a/llvm/test/Transforms/IPConstantProp/arg-type-mismatch.ll +++ b/llvm/test/Transforms/SCCP/arg-type-mismatch.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -ipconstprop -S -o - | FileCheck %s +; RUN: opt < %s -ipsccp -S -o - | FileCheck %s ; This test is just to verify that we do not crash/assert due to mismatch in ; argument type between the caller and callee. diff --git a/llvm/test/Transforms/SCCP/comdat-ipo.ll b/llvm/test/Transforms/SCCP/comdat-ipo.ll index 618075fd5e3fe..fc715f45406f3 100644 --- a/llvm/test/Transforms/SCCP/comdat-ipo.ll +++ b/llvm/test/Transforms/SCCP/comdat-ipo.ll @@ -1,8 +1,12 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -ipsccp -S | FileCheck %s ; See PR26774 define i32 @baz() { +; CHECK-LABEL: @baz( +; CHECK-NEXT: ret i32 10 +; ret i32 10 } @@ -11,8 +15,9 @@ define i32 @baz() { define linkonce_odr i32 @foo() { ; CHECK-LABEL: @foo( -; CHECK-NEXT: %val = call i32 @baz() -; CHECK-NEXT: ret i32 10 +; CHECK-NEXT: [[VAL:%.*]] = call i32 @baz() +; CHECK-NEXT: ret i32 10 +; %val = call i32 @baz() ret i32 %val @@ -20,8 +25,9 @@ define linkonce_odr i32 @foo() { define i32 @bar() { ; CHECK-LABEL: @bar( -; CHECK-NEXT: %val = call i32 @foo() -; CHECK-NEXT: ret i32 %val +; CHECK-NEXT: [[VAL:%.*]] = call i32 @foo() +; CHECK-NEXT: ret i32 [[VAL]] +; %val = call i32 @foo() ret i32 %val diff --git a/llvm/test/Transforms/IPConstantProp/deadarg.ll b/llvm/test/Transforms/SCCP/deadarg.ll similarity index 77% rename from llvm/test/Transforms/IPConstantProp/deadarg.ll rename to llvm/test/Transforms/SCCP/deadarg.ll index 25b9749b50790..1117acc7d0137 100644 --- a/llvm/test/Transforms/IPConstantProp/deadarg.ll +++ b/llvm/test/Transforms/SCCP/deadarg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -ipconstprop -disable-output +; RUN: opt < %s -ipsccp -disable-output define internal void @foo(i32 %X) { call void @foo( i32 %X ) ret void diff --git a/llvm/test/Transforms/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/SCCP/multiple_callbacks.ll similarity index 96% rename from llvm/test/Transforms/IPConstantProp/multiple_callbacks.ll rename to llvm/test/Transforms/SCCP/multiple_callbacks.ll index 6684044e24ce0..3d196f86e4a1c 100644 --- a/llvm/test/Transforms/IPConstantProp/multiple_callbacks.ll +++ b/llvm/test/Transforms/SCCP/multiple_callbacks.ll @@ -1,5 +1,6 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -ipconstprop -S < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes +; RUN: opt -ipsccp -S %s | FileCheck %s +; ; ; /---------------------------------------| ; | /----------------------|----| @@ -38,7 +39,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define internal i32 @cb0(i32 %zero) { ; CHECK-LABEL: @cb0( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 [[ZERO:%.*]] ; entry: ret i32 %zero diff --git a/llvm/test/Transforms/IPConstantProp/naked-return.ll b/llvm/test/Transforms/SCCP/naked-return.ll similarity index 97% rename from llvm/test/Transforms/IPConstantProp/naked-return.ll rename to llvm/test/Transforms/SCCP/naked-return.ll index 133662a211b41..daeb176b09972 100644 --- a/llvm/test/Transforms/IPConstantProp/naked-return.ll +++ b/llvm/test/Transforms/SCCP/naked-return.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -ipsccp -S %s | FileCheck %s -; RUN: opt -ipconstprop -S %s | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc19.0.24215" diff --git a/llvm/test/Transforms/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/SCCP/openmp_parallel_for.ll similarity index 98% rename from llvm/test/Transforms/IPConstantProp/openmp_parallel_for.ll rename to llvm/test/Transforms/SCCP/openmp_parallel_for.ll index 338cc8886e29d..27831c6e66196 100644 --- a/llvm/test/Transforms/IPConstantProp/openmp_parallel_for.ll +++ b/llvm/test/Transforms/SCCP/openmp_parallel_for.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -ipconstprop < %s | FileCheck %s +; RUN: opt -S -ipsccp < %s | FileCheck %s ; ; void bar(int, float, double); ; @@ -53,7 +53,7 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i64 4617315517961601024, i64* [[Q_ADDR]], align 8 +; CHECK-NEXT: store i64 [[Q:%.*]], i64* [[Q_ADDR]], align 8 ; CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double* ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[N:%.*]], align 4 ; CHECK-NEXT: [[SUB3:%.*]] = add nsw i32 [[TMP]], -3 diff --git a/llvm/test/Transforms/IPConstantProp/pthreads.ll b/llvm/test/Transforms/SCCP/pthreads.ll similarity index 93% rename from llvm/test/Transforms/IPConstantProp/pthreads.ll rename to llvm/test/Transforms/SCCP/pthreads.ll index dcad3858da13a..a9d2b942c1a5a 100644 --- a/llvm/test/Transforms/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/SCCP/pthreads.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -ipconstprop -S < %s | FileCheck %s +; RUN: opt -ipsccp -S < %s | FileCheck %s ; ; #include ; @@ -44,7 +44,7 @@ declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, define internal i8* @foo(i8* %arg) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8* null +; CHECK-NEXT: ret i8* [[ARG:%.*]] ; entry: ret i8* %arg @@ -53,7 +53,7 @@ entry: define internal i8* @bar(i8* %arg) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8* bitcast (i8** @GlobalVPtr to i8*) +; CHECK-NEXT: ret i8* [[ARG:%.*]] ; entry: ret i8* %arg diff --git a/llvm/test/Transforms/IPConstantProp/recursion.ll b/llvm/test/Transforms/SCCP/recursion.ll similarity index 90% rename from llvm/test/Transforms/IPConstantProp/recursion.ll rename to llvm/test/Transforms/SCCP/recursion.ll index ac8ff9ca00e32..e4f5f1d240e7c 100644 --- a/llvm/test/Transforms/IPConstantProp/recursion.ll +++ b/llvm/test/Transforms/SCCP/recursion.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -ipconstprop -deadargelim -S | FileCheck %s +; RUN: opt < %s -ipsccp -deadargelim -S | FileCheck %s ; CHECK-NOT: %X diff --git a/llvm/test/Transforms/IPConstantProp/return-argument.ll b/llvm/test/Transforms/SCCP/return-argument.ll similarity index 91% rename from llvm/test/Transforms/IPConstantProp/return-argument.ll rename to llvm/test/Transforms/SCCP/return-argument.ll index 6a3eac0c120d0..764b4898c9616 100644 --- a/llvm/test/Transforms/IPConstantProp/return-argument.ll +++ b/llvm/test/Transforms/SCCP/return-argument.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -ipconstprop -S | FileCheck %s +; RUN: opt < %s -ipsccp -S | FileCheck %s ;; This function returns its second argument on all return statements define internal i32* @incdec(i1 %C, i32* %V) { @@ -49,11 +49,13 @@ define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; CHECK-NEXT: [[Q:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[W:%.*]] = call i32* @incdec(i1 [[C:%.*]], i32* [[Q]]) ; CHECK-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 1, i32 2) +; CHECK-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 ; CHECK-NEXT: [[S2:%.*]] = invoke { i32, i32 } @foo(i32 3, i32 4) ; CHECK-NEXT: to label [[OK:%.*]] unwind label [[LPAD:%.*]] ; CHECK: OK: -; CHECK-NEXT: [[Z:%.*]] = add i32 1, 3 -; CHECK-NEXT: store i32 [[Z]], i32* [[Q]], align 4 +; CHECK-NEXT: [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0 +; CHECK-NEXT: [[Z:%.*]] = add i32 [[X1]], [[X2]] +; CHECK-NEXT: store i32 [[Z]], i32* [[W]], align 4 ; CHECK-NEXT: br label [[RET:%.*]] ; CHECK: LPAD: ; CHECK-NEXT: [[EXN:%.*]] = landingpad { i8*, i32 } diff --git a/llvm/test/Transforms/IPConstantProp/return-constant.ll b/llvm/test/Transforms/SCCP/return-constant.ll similarity index 91% rename from llvm/test/Transforms/IPConstantProp/return-constant.ll rename to llvm/test/Transforms/SCCP/return-constant.ll index d75aa9b969317..5cf53eea12b77 100644 --- a/llvm/test/Transforms/IPConstantProp/return-constant.ll +++ b/llvm/test/Transforms/SCCP/return-constant.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -ipconstprop -instcombine -S | FileCheck %s +; RUN: opt < %s -ipsccp -instcombine -S | FileCheck %s define internal i32 @foo(i1 %C) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] ; CHECK: T: -; CHECK-NEXT: ret i32 52 +; CHECK-NEXT: ret i32 undef ; CHECK: F: -; CHECK-NEXT: ret i32 52 +; CHECK-NEXT: ret i32 undef ; br i1 %C, label %T, label %F diff --git a/llvm/test/Transforms/IPConstantProp/return-constants.ll b/llvm/test/Transforms/SCCP/return-constants.ll similarity index 70% rename from llvm/test/Transforms/IPConstantProp/return-constants.ll rename to llvm/test/Transforms/SCCP/return-constants.ll index 4611067316141..cbf178d2efcaf 100644 --- a/llvm/test/Transforms/IPConstantProp/return-constants.ll +++ b/llvm/test/Transforms/SCCP/return-constants.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -ipconstprop -S | FileCheck %s +; RUN: opt < %s -ipsccp -S | FileCheck %s %0 = type { i32, i32 } @@ -7,13 +7,9 @@ define internal %0 @foo(i1 %Q) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: br i1 [[Q:%.*]], label [[T:%.*]], label [[F:%.*]] ; CHECK: T: -; CHECK-NEXT: [[MRV:%.*]] = insertvalue [[TMP0:%.*]] undef, i32 21, 0 -; CHECK-NEXT: [[MRV1:%.*]] = insertvalue [[TMP0]] %mrv, i32 22, 1 -; CHECK-NEXT: ret [[TMP0]] %mrv1 +; CHECK-NEXT: ret [[TMP0:%.*]] { i32 21, i32 22 } ; CHECK: F: -; CHECK-NEXT: [[MRV2:%.*]] = insertvalue [[TMP0]] undef, i32 21, 0 -; CHECK-NEXT: [[MRV3:%.*]] = insertvalue [[TMP0]] %mrv2, i32 23, 1 -; CHECK-NEXT: ret [[TMP0]] %mrv3 +; CHECK-NEXT: ret [[TMP0]] { i32 21, i32 23 } ; br i1 %Q, label %T, label %F @@ -30,14 +26,11 @@ F: ; preds = %0 define internal %0 @bar(i1 %Q) { ; CHECK-LABEL: @bar( -; CHECK-NEXT: [[A:%.*]] = insertvalue [[TMP0:%.*]] undef, i32 21, 0 ; CHECK-NEXT: br i1 [[Q:%.*]], label [[T:%.*]], label [[F:%.*]] ; CHECK: T: -; CHECK-NEXT: [[B:%.*]] = insertvalue [[TMP0]] %A, i32 22, 1 -; CHECK-NEXT: ret [[TMP0]] %B +; CHECK-NEXT: ret [[TMP0:%.*]] { i32 21, i32 22 } ; CHECK: F: -; CHECK-NEXT: [[C:%.*]] = insertvalue [[TMP0]] %A, i32 23, 1 -; CHECK-NEXT: ret [[TMP0]] %C +; CHECK-NEXT: ret [[TMP0]] { i32 21, i32 23 } ; %A = insertvalue %0 undef, i32 21, 0 br i1 %Q, label %T, label %F @@ -57,7 +50,6 @@ define %0 @caller(i1 %Q) { ; CHECK-NEXT: [[B:%.*]] = extractvalue [[TMP0]] %X, 1 ; CHECK-NEXT: [[Y:%.*]] = call [[TMP0]] @bar(i1 [[Q]]) ; CHECK-NEXT: [[D:%.*]] = extractvalue [[TMP0]] %Y, 1 -; CHECK-NEXT: [[M:%.*]] = add i32 21, 21 ; CHECK-NEXT: [[N:%.*]] = add i32 [[B]], [[D]] ; CHECK-NEXT: ret [[TMP0]] %X ; diff --git a/llvm/test/Transforms/IPConstantProp/thread_local_acs.ll b/llvm/test/Transforms/SCCP/thread_local_acs.ll similarity index 92% rename from llvm/test/Transforms/IPConstantProp/thread_local_acs.ll rename to llvm/test/Transforms/SCCP/thread_local_acs.ll index 3f843d26077ad..69c0cfeec163e 100644 --- a/llvm/test/Transforms/IPConstantProp/thread_local_acs.ll +++ b/llvm/test/Transforms/SCCP/thread_local_acs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -ipconstprop -S < %s | FileCheck %s +; RUN: opt -ipsccp -S < %s | FileCheck %s ; ; #include ; thread_local int gtl = 0; @@ -24,7 +24,7 @@ define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) { ; CHECK-LABEL: @callee( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[THREAD_LOCAL_PTR:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @gsh, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[SHARED_PTR:%.*]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP]], [[TMP1]] ; CHECK-NEXT: ret i32 [[ADD]] ; diff --git a/llvm/utils/findoptdiff b/llvm/utils/findoptdiff index 9a8803184384e..925e2dce4bf3f 100755 --- a/llvm/utils/findoptdiff +++ b/llvm/utils/findoptdiff @@ -70,7 +70,7 @@ dis2="$llvm2/Debug/bin/llvm-dis" opt1="$llvm1/Debug/bin/opt" opt2="$llvm2/Debug/bin/opt" -all_switches="-verify -lowersetjmp -simplifycfg -mem2reg -globalopt -globaldce -ipconstprop -deadargelim -instcombine -simplifycfg -prune-eh -inline -simplify-libcalls -argpromotion -tailduplicate -simplifycfg -sroa -instcombine -predsimplify -condprop -tailcallelim -simplifycfg -reassociate -licm -loop-unswitch -instcombine -indvars -loop-unroll -instcombine -load-vn -gcse -sccp -instcombine -condprop -dse -dce -simplifycfg -deadtypeelim -constmerge -internalize -ipsccp -globalopt -constmerge -deadargelim -inline -prune-eh -globalopt -globaldce -argpromotion -instcombine -predsimplify -sroa -globalsmodref-aa -licm -load-vn -gcse -dse -instcombine -simplifycfg -verify" +all_switches="-verify -lowersetjmp -simplifycfg -mem2reg -globalopt -globaldce -deadargelim -instcombine -simplifycfg -prune-eh -inline -simplify-libcalls -argpromotion -tailduplicate -simplifycfg -sroa -instcombine -predsimplify -condprop -tailcallelim -simplifycfg -reassociate -licm -loop-unswitch -instcombine -indvars -loop-unroll -instcombine -load-vn -gcse -sccp -instcombine -condprop -dse -dce -simplifycfg -deadtypeelim -constmerge -internalize -ipsccp -globalopt -constmerge -deadargelim -inline -prune-eh -globalopt -globaldce -argpromotion -instcombine -predsimplify -sroa -globalsmodref-aa -licm -load-vn -gcse -dse -instcombine -simplifycfg -verify" #counter=0 function tryit { diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn index 7d48256b6cfa2..168bb499dcffd 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn @@ -38,7 +38,6 @@ static_library("IPO") { "GlobalOpt.cpp", "GlobalSplit.cpp", "HotColdSplitting.cpp", - "IPConstantPropagation.cpp", "IPO.cpp", "InferFunctionAttrs.cpp", "InlineSimple.cpp", From 4ffa6a27aca17fe88fa6bdd605b198df6632a570 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 2 Aug 2020 22:36:53 +0100 Subject: [PATCH 140/600] [Bindings] Remove ipc_propagation. IPConstantPropagation has been removed, also remove the bindings. --- llvm/bindings/ocaml/transforms/ipo/llvm_ipo.ml | 3 --- llvm/test/Bindings/OCaml/ipo.ml | 1 - 2 files changed, 4 deletions(-) diff --git a/llvm/bindings/ocaml/transforms/ipo/llvm_ipo.ml b/llvm/bindings/ocaml/transforms/ipo/llvm_ipo.ml index 1fb5594fcc7d6..d5d959288d332 100644 --- a/llvm/bindings/ocaml/transforms/ipo/llvm_ipo.ml +++ b/llvm/bindings/ocaml/transforms/ipo/llvm_ipo.ml @@ -33,9 +33,6 @@ external add_global_dce external add_global_optimizer : [ `Module ] Llvm.PassManager.t -> unit = "llvm_add_global_optimizer" -external add_ipc_propagation - : [ `Module ] Llvm.PassManager.t -> unit - = "llvm_add_ip_constant_propagation" external add_prune_eh : [ `Module ] Llvm.PassManager.t -> unit = "llvm_add_prune_eh" diff --git a/llvm/test/Bindings/OCaml/ipo.ml b/llvm/test/Bindings/OCaml/ipo.ml index 6a67f37570edb..c3630ece7bd48 100644 --- a/llvm/test/Bindings/OCaml/ipo.ml +++ b/llvm/test/Bindings/OCaml/ipo.ml @@ -55,7 +55,6 @@ let test_transforms () = ++ add_always_inliner ++ add_global_dce ++ add_global_optimizer - ++ add_ipc_propagation ++ add_prune_eh ++ add_ipsccp ++ add_internalize ~all_but_main:true From b497665d98ad5026b1d3d67d5793a28fefe27bea Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 2 Aug 2020 18:05:17 -0700 Subject: [PATCH 141/600] Reland D64327 [MC][ELF] Allow STT_SECTION referencing SHF_MERGE on REL targets This drops a GNU gold workaround and reverts the revert commit rL366708. Before binutils 2.34, gold -O2 and above did not correctly handle R_386_GOTOFF to SHF_MERGE|SHF_STRINGS sections: https://sourceware.org/bugzilla/show_bug.cgi?id=16794 From the original review: ... it reduced the size of a big ARM-32 debug image by 33%. It contained ~68M of relocations symbols out of total ~71M symbols (96% of symbols table was generated for relocations with symbol). -Wl,-O2 (and -Wl,-O3) is so rare that we should just lower the optimization level for LLVM_LINKER_IS_GOLD rather than pessimizing all users. --- llvm/cmake/modules/AddLLVM.cmake | 9 +++++++-- llvm/lib/MC/ELFObjectWriter.cpp | 5 ----- llvm/test/MC/ELF/basic-elf-32.s | 4 ++-- llvm/test/MC/ELF/compression.s | 4 ++-- llvm/test/MC/ELF/relocation-386.s | 2 +- llvm/test/MC/Mips/elf-relsym.s | 10 ++-------- llvm/test/MC/Mips/xgot.s | 4 ++-- 7 files changed, 16 insertions(+), 22 deletions(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 333167bfb6b0d..a25f3e87af61c 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -222,8 +222,13 @@ function(add_link_opts target_name) # Pass -O3 to the linker. This enabled different optimizations on different # linkers. if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin|SunOS|AIX" OR WIN32)) - set_property(TARGET ${target_name} APPEND_STRING PROPERTY - LINK_FLAGS " -Wl,-O3") + # Before binutils 2.34, gold -O2 and above did not correctly handle R_386_GOTOFF to + # SHF_MERGE|SHF_STRINGS sections: https://sourceware.org/bugzilla/show_bug.cgi?id=16794 + if(LLVM_LINKER_IS_GOLD) + set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-O1") + else() + set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-O3") + endif() endif() if(LLVM_LINKER_IS_GOLD) diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 1ca9d0fe1e18c..5a5692c0cb636 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -1389,11 +1389,6 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, if (Flags & ELF::SHF_MERGE) { if (C != 0) return true; - - // It looks like gold has a bug (http://sourceware.org/PR16794) and can - // only handle section relocations to mergeable sections if using RELA. - if (!hasRelocationAddend()) - return true; } // Most TLS relocations use a got, so they need the symbol. Even those that diff --git a/llvm/test/MC/ELF/basic-elf-32.s b/llvm/test/MC/ELF/basic-elf-32.s index effc363d1c6c9..1a923106036a1 100644 --- a/llvm/test/MC/ELF/basic-elf-32.s +++ b/llvm/test/MC/ELF/basic-elf-32.s @@ -46,9 +46,9 @@ main: # @main // CHECK: Relocations [ // CHECK: Section {{.*}} .rel.text { -// CHECK: 0x6 R_386_32 .L.str1 +// CHECK: 0x6 R_386_32 .rodata.str1.1 0x0 // CHECK: 0xB R_386_PC32 puts -// CHECK: 0x12 R_386_32 .L.str2 +// CHECK: 0x12 R_386_32 .rodata.str1.1 0x0 // CHECK: 0x17 R_386_PC32 puts // CHECK: } // CHECK: ] diff --git a/llvm/test/MC/ELF/compression.s b/llvm/test/MC/ELF/compression.s index 93a77b3fcbdb1..9bc5c0ecc699c 100644 --- a/llvm/test/MC/ELF/compression.s +++ b/llvm/test/MC/ELF/compression.s @@ -38,7 +38,7 @@ // In x86 32 bit named symbols are used for temporary symbols in merge // sections, so make sure we handle symbols inside compressed sections -// 386-SYMBOLS-GNU: Name: .Linfo_string0 +// 386-SYMBOLS-GNU: Name: .zdebug_str // 386-SYMBOLS-GNU-NOT: } // 386-SYMBOLS-GNU: Section: .zdebug_str @@ -73,7 +73,7 @@ // ZLIB-STYLE-FLAGS32-NEXT: AddressAlignment: 4 // ZLIB-STYLE-FLAGS64-NEXT: AddressAlignment: 8 -// 386-SYMBOLS-ZLIB: Name: .Linfo_string0 +// 386-SYMBOLS-ZLIB: Name: .debug_str // 386-SYMBOLS-ZLIB-NOT: } // 386-SYMBOLS-ZLIB: Section: .debug_str diff --git a/llvm/test/MC/ELF/relocation-386.s b/llvm/test/MC/ELF/relocation-386.s index 4273750a41926..b9bf9b6a00c1d 100644 --- a/llvm/test/MC/ELF/relocation-386.s +++ b/llvm/test/MC/ELF/relocation-386.s @@ -8,7 +8,7 @@ // I386: Format: elf32-i386 // CHECK: Relocations [ // CHECK-NEXT: Section {{.*}} .rel.text { -// CHECK-NEXT: 0x2 R_386_GOTOFF .Lfoo 0x0 +// CHECK-NEXT: 0x2 R_386_GOTOFF .rodata.str1.16 0x0 // CHECK-NEXT: 0x{{[^ ]+}} R_386_PLT32 bar2 0x0 // CHECK-NEXT: 0x{{[^ ]+}} R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 0x0 // Relocation 3 (bar3@GOTOFF) is done with symbol 7 (bss) diff --git a/llvm/test/MC/Mips/elf-relsym.s b/llvm/test/MC/Mips/elf-relsym.s index b8c2f89e82e6d..d19b4e3c48208 100644 --- a/llvm/test/MC/Mips/elf-relsym.s +++ b/llvm/test/MC/Mips/elf-relsym.s @@ -4,16 +4,10 @@ // CHECK: Symbols [ // CHECK: Symbol { -// CHECK: Name: $.str +// CHECK: Name: .rodata.cst8 // CHECK: } // CHECK: Symbol { -// CHECK: Name: $.str1 -// CHECK: } -// CHECK: Symbol { -// CHECK: Name: $CPI0_0 -// CHECK: } -// CHECK: Symbol { -// CHECK: Name: $CPI0_1 +// CHECK: Name: .rodata.str1.1 // CHECK: } // CHECK: ] diff --git a/llvm/test/MC/Mips/xgot.s b/llvm/test/MC/Mips/xgot.s index 100d25e67223b..76490f0dec931 100644 --- a/llvm/test/MC/Mips/xgot.s +++ b/llvm/test/MC/Mips/xgot.s @@ -10,8 +10,8 @@ // CHECK: 0x1C R_MIPS_GOT_LO16 ext_1 // CHECK: 0x24 R_MIPS_CALL_HI16 printf // CHECK: 0x30 R_MIPS_CALL_LO16 printf -// CHECK: 0x2C R_MIPS_GOT16 $.str -// CHECK: 0x38 R_MIPS_LO16 $.str +// CHECK: 0x2C R_MIPS_GOT16 .rodata.str1.1 0x0 +// CHECK: 0x38 R_MIPS_LO16 .rodata.str1.1 0x0 // CHECK: ] .text From a96921afa7024533cf451ef13708082876233eef Mon Sep 17 00:00:00 2001 From: StephenFan Date: Fri, 31 Jul 2020 13:31:48 +0800 Subject: [PATCH 142/600] [RISCV] eliminate the repetition declare of SDLoc DL Differential revision: https://reviews.llvm.org/D85002 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index cad5f8e21185f..773a098690330 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1003,7 +1003,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: { assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && Subtarget.hasStdExtF() && "Unexpected custom legalisation"); - SDLoc DL(N); SDValue Op0 = N->getOperand(0); if (Op0.getValueType() != MVT::f32) return; From 62e4644616dc87dca73357b2a4bf1487ce74e90d Mon Sep 17 00:00:00 2001 From: QingShan Zhang Date: Mon, 3 Aug 2020 03:25:20 +0000 Subject: [PATCH 143/600] [NFC][PowerPC] Add a multiclass for fsetcc to define them in a uniform way This is a refactor patch to prepare for adding the support for strict-fsetcc in PowerPC backend. We want to move their definition into a uniform way so that, we could add the strict node easier. Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D81712 --- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 126 +++++++----------------- 1 file changed, 34 insertions(+), 92 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index be1e59d7fae71..771a715926fd7 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3901,105 +3901,47 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; -let Predicates = [HasFPU] in { -// Instantiations of CRNotPat for f32. -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; - -// Instantiations of CRNotPat for f64. -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; - -// Instantiations of CRNotPat for f128. -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; +multiclass FSetCCPat { + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; + + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOLT)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLT)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOGT)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGT)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOEQ)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETEQ)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>; + def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUO)), + (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>; } -// SETCC for f32. let Predicates = [HasFPU] in { -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; -def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), - (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; +// SETCC for f32. +defm : FSetCCPat; // SETCC for f64. -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; -def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), - (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; +defm : FSetCCPat; // SETCC for f128. -def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETLT)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; -def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOGT)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETGT)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; -def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOEQ)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; -def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)), - (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; - +defm : FSetCCPat; } // This must be in this file because it relies on patterns defined in this file From 594dec2884a4814dc97ebdfa7c83ef15bdfb379e Mon Sep 17 00:00:00 2001 From: compinder Date: Mon, 3 Aug 2020 08:57:17 +0530 Subject: [PATCH 144/600] [FLANG] Fix issues in SELECT TYPE construct when intrinsic type specification is specified in TYPE GUARD statement. Fix of PR46789 and PR46830. Differential Revision: https://reviews.llvm.org/D84290 --- flang/lib/Semantics/check-select-type.cpp | 43 ++++++++++++++--------- flang/test/Semantics/selecttype01.f90 | 18 ++++++++++ flang/test/Semantics/symbol11.f90 | 8 +++-- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/flang/lib/Semantics/check-select-type.cpp b/flang/lib/Semantics/check-select-type.cpp index 5b430440dffb1..ce675fa2f1dbe 100644 --- a/flang/lib/Semantics/check-select-type.cpp +++ b/flang/lib/Semantics/check-select-type.cpp @@ -39,7 +39,7 @@ class TypeCaseValues { if (std::holds_alternative(guard.u)) { typeCases_.emplace_back(stmt, std::nullopt); } else if (std::optional type{GetGuardType(guard)}) { - if (PassesChecksOnGuard(guard, *type)) { + if (PassesChecksOnGuard(stmt, *type)) { typeCases_.emplace_back(stmt, *type); } else { hasErrors_ = true; @@ -71,35 +71,46 @@ class TypeCaseValues { guard.u); } - bool PassesChecksOnGuard(const parser::TypeGuardStmt::Guard &guard, + bool PassesChecksOnGuard(const parser::Statement &stmt, const evaluate::DynamicType &guardDynamicType) { + const parser::TypeGuardStmt &typeGuardStmt{stmt.statement}; + const auto &guard{std::get(typeGuardStmt.t)}; return std::visit( common::visitors{ [](const parser::Default &) { return true; }, [&](const parser::TypeSpec &typeSpec) { - if (const DeclTypeSpec * spec{typeSpec.declTypeSpec}) { + const DeclTypeSpec *spec{typeSpec.declTypeSpec}; + CHECK(spec); + CHECK(spec->AsIntrinsic() || spec->AsDerived()); + bool typeSpecRetVal{false}; + if (spec->AsIntrinsic()) { + typeSpecRetVal = true; + if (!selectorType_.IsUnlimitedPolymorphic()) { // C1162 + context_.Say(stmt.source, + "If selector is not unlimited polymorphic, " + "an intrinsic type specification must not be specified " + "in the type guard statement"_err_en_US); + typeSpecRetVal = false; + } if (spec->category() == DeclTypeSpec::Character && !guardDynamicType.IsAssumedLengthCharacter()) { // C1160 context_.Say(parser::FindSourceLocation(typeSpec), "The type specification statement must have " "LEN type parameter as assumed"_err_en_US); - return false; + typeSpecRetVal = false; } - if (const DerivedTypeSpec * derived{spec->AsDerived()}) { - return PassesDerivedTypeChecks( - *derived, parser::FindSourceLocation(typeSpec)); - } - return false; + } else { + const DerivedTypeSpec *derived{spec->AsDerived()}; + typeSpecRetVal = PassesDerivedTypeChecks( + *derived, parser::FindSourceLocation(typeSpec)); } - return false; + return typeSpecRetVal; }, [&](const parser::DerivedTypeSpec &x) { - if (const semantics::DerivedTypeSpec * - derived{x.derivedTypeSpec}) { - return PassesDerivedTypeChecks( - *derived, parser::FindSourceLocation(x)); - } - return false; + CHECK(x.derivedTypeSpec); + const semantics::DerivedTypeSpec *derived{x.derivedTypeSpec}; + return PassesDerivedTypeChecks( + *derived, parser::FindSourceLocation(x)); }, }, guard.u); diff --git a/flang/test/Semantics/selecttype01.f90 b/flang/test/Semantics/selecttype01.f90 index fe9838ae2760f..c726c232e18de 100644 --- a/flang/test/Semantics/selecttype01.f90 +++ b/flang/test/Semantics/selecttype01.f90 @@ -119,6 +119,7 @@ subroutine CheckC1159b integer :: x !ERROR: Selector 'x' in SELECT TYPE statement must be polymorphic select type (a => x) + !ERROR: If selector is not unlimited polymorphic, an intrinsic type specification must not be specified in the type guard statement type is (integer) print *,'integer ',a end select @@ -127,6 +128,7 @@ subroutine CheckC1159b subroutine CheckC1159c !ERROR: Selector 'x' in SELECT TYPE statement must be polymorphic select type (a => x) + !ERROR: If selector is not unlimited polymorphic, an intrinsic type specification must not be specified in the type guard statement type is (integer) print *,'integer ',a end select @@ -164,6 +166,16 @@ subroutine CheckC1162 type is (extsquare) !Handle same types type is (rectangle) + !ERROR: If selector is not unlimited polymorphic, an intrinsic type specification must not be specified in the type guard statement + type is(integer) + !ERROR: If selector is not unlimited polymorphic, an intrinsic type specification must not be specified in the type guard statement + type is(real) + !ERROR: If selector is not unlimited polymorphic, an intrinsic type specification must not be specified in the type guard statement + type is(logical) + !ERROR: If selector is not unlimited polymorphic, an intrinsic type specification must not be specified in the type guard statement + type is(character(len=*)) + !ERROR: If selector is not unlimited polymorphic, an intrinsic type specification must not be specified in the type guard statement + type is(complex) end select !Unlimited polymorphic objects are allowed. @@ -187,6 +199,12 @@ subroutine CheckC1163 !ERROR: Type specification 'square' conflicts with previous type specification class is (square) end select + select type (unlim_polymorphic) + type is (INTEGER(4)) + type is (shape) + !ERROR: Type specification 'INTEGER(4)' conflicts with previous type specification + type is (INTEGER(4)) + end select end subroutine CheckC1164 diff --git a/flang/test/Semantics/symbol11.f90 b/flang/test/Semantics/symbol11.f90 index e6ae26c740e56..3d2be676967f6 100644 --- a/flang/test/Semantics/symbol11.f90 +++ b/flang/test/Semantics/symbol11.f90 @@ -71,10 +71,12 @@ subroutine s3 !DEF: /s3/Block1/y TARGET AssocEntity TYPE(t2) !REF: /s3/t2/a2 i = y%a2 - type is (integer(kind=8)) + !REF: /s3/t1 + type is (t1) !REF: /s3/i - !DEF: /s3/Block2/y TARGET AssocEntity INTEGER(8) - i = y + !DEF: /s3/Block2/y TARGET AssocEntity TYPE(t1) + !REF: /s3/t1/a1 + i = y%a1 class default !DEF: /s3/Block3/y TARGET AssocEntity CLASS(t1) print *, y From 40da58a04bea6879e1b52a4ba35559f9d26bee07 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 2 Aug 2020 22:13:59 -0700 Subject: [PATCH 145/600] [MC] Default MCAsmBackend::mayNeedRelaxation() to false --- llvm/include/llvm/MC/MCAsmBackend.h | 4 +++- .../Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 7 ------- llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h | 5 ----- llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp | 5 ----- llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp | 5 ----- llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp | 5 ----- llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h | 9 --------- llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 6 ------ llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp | 6 ------ .../Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp | 4 ---- .../WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp | 5 ----- 11 files changed, 3 insertions(+), 58 deletions(-) diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index cc9f42023bc29..8f95cfd55a3d7 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -144,7 +144,9 @@ class MCAsmBackend { /// \param STI - The MCSubtargetInfo in effect when the instruction was /// encoded. virtual bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const = 0; + const MCSubtargetInfo &STI) const { + return false; + } /// Target specific predicate for whether a given fixup requires the /// associated instruction to be relaxed. diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 9f7dfdf624829..dc44980ce218c 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -88,8 +88,6 @@ class AArch64AsmBackend : public MCAsmBackend { uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const override; - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override; @@ -463,11 +461,6 @@ void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, } } -bool AArch64AsmBackend::mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const { - return false; -} - bool AArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h index 43b26fd0c5653..46dc914adf784 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h @@ -48,11 +48,6 @@ class AVRAsmBackend : public MCAsmBackend { return AVR::NumTargetFixupKinds; } - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - return false; - } - bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 9d829ac45a10b..29e9d5da08364 100644 --- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -43,11 +43,6 @@ class BPFAsmBackend : public MCAsmBackend { unsigned getNumFixupKinds() const override { return 1; } - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - return false; - } - bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp index 0fb27a926003f..a17afe5e62f67 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp @@ -69,11 +69,6 @@ class LanaiAsmBackend : public MCAsmBackend { return Lanai::NumTargetFixupKinds; } - bool mayNeedRelaxation(const MCInst & /*Inst*/, - const MCSubtargetInfo &STI) const override { - return false; - } - bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp index 958212dc77c9c..071e1484196bc 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp @@ -90,11 +90,6 @@ class MSP430AsmBackend : public MCAsmBackend { return Infos[Kind - FirstTargetFixupKind]; } - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - return false; - } - bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 1126b871cb114..16c7befb2670a 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -53,15 +53,6 @@ class MipsAsmBackend : public MCAsmBackend { /// @name Target Relaxation Interfaces /// @{ - /// MayNeedRelaxation - Check whether the given instruction may need - /// relaxation. - /// - /// \param Inst - The instruction to test. - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - return false; - } - /// fixupNeedsRelaxation - Target specific predicate for whether a given /// fixup requires the associated instruction to be relaxed. bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 59cb2b994a4b3..72401668c8d07 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -182,12 +182,6 @@ class PPCAsmBackend : public MCAsmBackend { } } - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - // FIXME. - return false; - } - bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 83c44e0682cef..5a9ecfe74ecc8 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -255,12 +255,6 @@ namespace { } } - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - // FIXME. - return false; - } - /// fixupNeedsRelaxation - Target specific predicate for whether a given /// fixup requires the associated instruction to be relaxed. bool fixupNeedsRelaxation(const MCFixup &Fixup, diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index e62f5040898f0..5f276f7935784 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -54,10 +54,6 @@ class SystemZMCAsmBackend : public MCAsmBackend { const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const override; - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - return false; - } bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *Fragment, const MCAsmLayout &Layout) const override { diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 8ecd7c53621df..d88311197c1ad 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -59,11 +59,6 @@ class WebAssemblyAsmBackend final : public MCAsmBackend { return false; } - bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const override { - return false; - } - bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; From 160ff83765ac284f3c7dd7b25d4ef105b9952ac0 Mon Sep 17 00:00:00 2001 From: Saiyedul Islam Date: Mon, 3 Aug 2020 05:29:48 +0000 Subject: [PATCH 146/600] [OpenMP][AMDGCN] Support OpenMP offloading for AMDGCN architecture - Part 3 Provides AMDGCN and NVPTX specific specialization of getGPUWarpSize, getGPUThreadID, and getGPUNumThreads methods. Adds tests for AMDGCN codegen for these methods in generic and simd modes. Also changes the precondition in InitTempAlloca to be slightly more permissive. Useful for AMDGCN OpenMP codegen where allocas are created with a cast to an address space. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D84260 --- clang/lib/CodeGen/CGExpr.cpp | 9 ++- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp | 61 +++++++++++++++++++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h | 43 +++++++++++++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 50 +++++++-------- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h | 13 +++- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 17 +++++- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 10 ++- clang/lib/CodeGen/CMakeLists.txt | 1 + clang/lib/CodeGen/CodeGenModule.cpp | 6 ++ clang/test/OpenMP/amdgcn_target_codegen.cpp | 43 +++++++++++++ .../OpenMP/amdgcn_target_init_temp_alloca.cpp | 24 ++++++++ 11 files changed, 242 insertions(+), 35 deletions(-) create mode 100644 clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp create mode 100644 clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h create mode 100644 clang/test/OpenMP/amdgcn_target_codegen.cpp create mode 100644 clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index ab29e32929ceb..5d74d91065f56 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -125,8 +125,13 @@ Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty, } void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) { - assert(isa(Var.getPointer())); - auto *Store = new llvm::StoreInst(Init, Var.getPointer(), /*volatile*/ false, + auto *Alloca = Var.getPointer(); + assert(isa(Alloca) || + (isa(Alloca) && + isa( + cast(Alloca)->getPointerOperand()))); + + auto *Store = new llvm::StoreInst(Init, Alloca, /*volatile*/ false, Var.getAlignment().getAsAlign()); llvm::BasicBlock *Block = AllocaInsertPt->getParent(); Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp new file mode 100644 index 0000000000000..ccffdf43549fe --- /dev/null +++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp @@ -0,0 +1,61 @@ +//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides a class for OpenMP runtime code generation specialized to +// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. +// +//===----------------------------------------------------------------------===// + +#include "CGOpenMPRuntimeAMDGCN.h" +#include "CGOpenMPRuntimeGPU.h" +#include "CodeGenFunction.h" +#include "clang/AST/Attr.h" +#include "clang/AST/DeclOpenMP.h" +#include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Basic/Cuda.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" + +using namespace clang; +using namespace CodeGen; +using namespace llvm::omp; + +CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM) + : CGOpenMPRuntimeGPU(CGM) { + if (!CGM.getLangOpts().OpenMPIsDevice) + llvm_unreachable("OpenMP AMDGCN can only handle device code."); +} + +llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + // return constant compile-time target-specific warp size + unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size); + return Bld.getInt32(WarpSize); +} + +llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Function *F = + CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x); + return Bld.CreateCall(F, llvm::None, "nvptx_tid"); +} + +llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Module *M = &CGF.CGM.getModule(); + const char *LocSize = "__ockl_get_local_size"; + llvm::Function *F = M->getFunction(LocSize); + if (!F) { + F = llvm::Function::Create( + llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false), + llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); + } + return Bld.CreateTrunc( + Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty); +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h new file mode 100644 index 0000000000000..c1421261bfc19 --- /dev/null +++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h @@ -0,0 +1,43 @@ +//===--- CGOpenMPRuntimeAMDGCN.h - Interface to OpenMP AMDGCN Runtimes ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides a class for OpenMP runtime code generation specialized to +// AMDGCN targets from generalized CGOpenMPRuntimeGPU class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H +#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H + +#include "CGOpenMPRuntime.h" +#include "CGOpenMPRuntimeGPU.h" +#include "CodeGenFunction.h" +#include "clang/AST/StmtOpenMP.h" + +namespace clang { +namespace CodeGen { + +class CGOpenMPRuntimeAMDGCN final : public CGOpenMPRuntimeGPU { + +public: + explicit CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM); + + /// Get the GPU warp size. + llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; + + /// Get the id of the current thread on the GPU. + llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; + + /// Get the maximum number of threads in a block of the GPU. + llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override; +}; + +} // namespace CodeGen +} // namespace clang + +#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 1cd89c540f478..452eb15eb8d16 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This provides a generalized class for OpenMP runtime code generation -// specialized by GPU target NVPTX. +// specialized by GPU targets NVPTX and AMDGCN. // //===----------------------------------------------------------------------===// @@ -621,14 +621,6 @@ class CheckVarsEscapingDeclContext final }; } // anonymous namespace -/// Get the id of the current thread on the GPU. -static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { - return CGF.EmitRuntimeCall( - llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), - "nvptx_tid"); -} - /// Get the id of the warp in the block. /// We assume that the warp size is 32, which is always the case /// on the NVPTX device, to generate more efficient code. @@ -636,7 +628,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; unsigned LaneIDBits = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2); - return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id"); + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id"); } /// Get the id of the current lane in the Warp. @@ -646,18 +639,11 @@ static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue( llvm::omp::GV_Warp_Size_Log2_Mask); - return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask), + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask), "nvptx_lane_id"); } -/// Get the maximum number of threads in a block of the GPU. -static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { - return CGF.EmitRuntimeCall( - llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), - "nvptx_num_threads"); -} - /// Get the value of the thread_limit clause in the teams directive. /// For the 'generic' execution mode, the runtime encodes thread_limit in /// the launch parameters, always starting thread_limit+warpSize threads per @@ -668,9 +654,9 @@ static llvm::Value *getThreadLimit(CodeGenFunction &CGF, CGBuilderTy &Bld = CGF.Builder; auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); return IsInSPMDExecutionMode - ? getNVPTXNumThreads(CGF) - : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), RT.getGPUWarpSize(CGF), - "thread_limit"); + ? RT.getGPUNumThreads(CGF) + : Bld.CreateNUWSub(RT.getGPUNumThreads(CGF), + RT.getGPUWarpSize(CGF), "thread_limit"); } /// Get the thread id of the OMP master thread. @@ -682,8 +668,8 @@ static llvm::Value *getThreadLimit(CodeGenFunction &CGF, /// If NumThreads is 1024, master id is 992. static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; - llvm::Value *NumThreads = getNVPTXNumThreads(CGF); auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + llvm::Value *NumThreads = RT.getGPUNumThreads(CGF); // We assume that the warp size is a power of 2. llvm::Value *Mask = Bld.CreateNUWSub(RT.getGPUWarpSize(CGF), Bld.getInt32(1)); @@ -1235,8 +1221,9 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); EST.ExitBB = CGF.createBasicBlock(".exit"); + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); llvm::Value *IsWorker = - Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF)); + Bld.CreateICmpULT(RT.getGPUThreadID(CGF), getThreadLimit(CGF)); Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); CGF.EmitBlock(WorkerBB); @@ -1245,7 +1232,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, CGF.EmitBlock(MasterCheckBB); llvm::Value *IsMaster = - Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); + Bld.CreateICmpEQ(RT.getGPUThreadID(CGF), getMasterThreadID(CGF)); Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB); CGF.EmitBlock(MasterBB); @@ -2780,14 +2767,16 @@ void CGOpenMPRuntimeGPU::emitCriticalRegion( llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body"); llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit"); + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + // Get the mask of active threads in the warp. llvm::Value *Mask = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. - llvm::Value *ThreadID = getNVPTXThreadID(CGF); + llvm::Value *ThreadID = RT.getGPUThreadID(CGF); // Get the width of the team. - llvm::Value *TeamWidth = getNVPTXNumThreads(CGF); + llvm::Value *TeamWidth = RT.getGPUNumThreads(CGF); // Initialize the counter variable for the loop. QualType Int32Ty = @@ -3250,8 +3239,9 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, CGM.addCompilerUsedGlobal(TransferMedium); } + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); // Get the CUDA thread id of the current OpenMP thread on the GPU. - llvm::Value *ThreadID = getNVPTXThreadID(CGF); + llvm::Value *ThreadID = RT.getGPUThreadID(CGF); // nvptx_lane_id = nvptx_id % warpsize llvm::Value *LaneID = getNVPTXLaneID(CGF); // nvptx_warp_id = nvptx_id / warpsize @@ -4844,9 +4834,11 @@ void CGOpenMPRuntimeGPU::getDefaultDistScheduleAndChunk( CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, llvm::Value *&Chunk) const { + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) { ScheduleKind = OMPC_DIST_SCHEDULE_static; - Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF), + Chunk = CGF.EmitScalarConversion( + RT.getGPUNumThreads(CGF), CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), S.getIterationVariable()->getType(), S.getBeginLoc()); return; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index 316333072c5bc..7267511ca672a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This provides a generalized class for OpenMP runtime code generation -// specialized by GPU target NVPTX. +// specialized by GPU targets NVPTX and AMDGCN. // //===----------------------------------------------------------------------===// @@ -199,9 +199,18 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { void clear() override; /// Declare generalized virtual functions which need to be defined - /// by all specializations of OpenMPGPURuntime Targets. + /// by all specializations of OpenMPGPURuntime Targets like AMDGCN + /// and NVPTX. + + /// Get the GPU warp size. virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0; + /// Get the id of the current thread on the GPU. + virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0; + + /// Get the maximum number of threads in a block of the GPU. + virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0; + /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. virtual void emitProcBindClause(CodeGenFunction &CGF, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 5fefc95ee4130..1688d07b90b6e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -32,10 +32,25 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) llvm_unreachable("OpenMP NVPTX can only handle device code."); } -/// Get the GPU warp size. llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) { return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), "nvptx_warp_size"); } + +llvm::Value *CGOpenMPRuntimeNVPTX::getGPUThreadID(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Function *F; + F = llvm::Intrinsic::getDeclaration( + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x); + return Bld.CreateCall(F, llvm::None, "nvptx_tid"); +} + +llvm::Value *CGOpenMPRuntimeNVPTX::getGPUNumThreads(CodeGenFunction &CGF) { + CGBuilderTy &Bld = CGF.Builder; + llvm::Function *F; + F = llvm::Intrinsic::getDeclaration( + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x); + return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 6dab79e6e20ae..5f16029592665 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -22,11 +22,19 @@ namespace clang { namespace CodeGen { -class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntimeGPU { +class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU { public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); + + /// Get the GPU warp size. llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override; + + /// Get the id of the current thread on the GPU. + llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override; + + /// Get the maximum number of threads in a block of the GPU. + llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override; }; } // CodeGen namespace. diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index 88647a2007fb0..f47ecd9bf8465 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -58,6 +58,7 @@ add_clang_library(clangCodeGen CGObjCRuntime.cpp CGOpenCLRuntime.cpp CGOpenMPRuntime.cpp + CGOpenMPRuntimeAMDGCN.cpp CGOpenMPRuntimeGPU.cpp CGOpenMPRuntimeNVPTX.cpp CGRecordLayoutBuilder.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 48a1dddfb3315..f3712ea1f541d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -19,6 +19,7 @@ #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" +#include "CGOpenMPRuntimeAMDGCN.h" #include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" @@ -215,6 +216,11 @@ void CodeGenModule::createOpenMPRuntime() { "OpenMP NVPTX is only prepared to deal with device code."); OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this)); break; + case llvm::Triple::amdgcn: + assert(getLangOpts().OpenMPIsDevice && + "OpenMP AMDGCN is only prepared to deal with device code."); + OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this)); + break; default: if (LangOpts.OpenMPSimd) OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this)); diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp new file mode 100644 index 0000000000000..0b6f2d40ffe87 --- /dev/null +++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -0,0 +1,43 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +#define N 1000 + +int test_amdgcn_target_tid_threads() { +// CHECK-LABEL: define weak void @{{.*}}test_amdgcn_target_tid_threads + + int arr[N]; + +// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0) +// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32 +// CHECK-NEXT: sub nuw i32 [[VAR]], 64 +// CHECK: call i32 @llvm.amdgcn.workitem.id.x() +#pragma omp target + for (int i = 0; i < N; i++) { + arr[i] = 1; + } + + return arr[0]; +} + +int test_amdgcn_target_tid_threads_simd() { +// CHECK-LABEL: define weak void @{{.*}}test_amdgcn_target_tid_threads_simd + + int arr[N]; + +// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0) +// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32 +// CHECK-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[VAR]], i16 0, i16 0) +#pragma omp target simd + for (int i = 0; i < N; i++) { + arr[i] = 1; + } + return arr[0]; +} + +#endif diff --git a/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp b/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp new file mode 100644 index 0000000000000..4ed953a9ebf7b --- /dev/null +++ b/clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp @@ -0,0 +1,24 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#define N 100 + +int test_amdgcn_target_temp_alloca() { + // CHECK-LABEL: test_amdgcn_target_temp_alloca + + int arr[N]; + + // CHECK: [[VAR_ADDR:%.+]] = alloca [100 x i32]*, align 8, addrspace(5) + // CHECK-NEXT: [[VAR_ADDR_CAST:%.+]] = addrspacecast [100 x i32]* addrspace(5)* [[VAR_ADDR]] to [100 x i32]** + // CHECK: store [100 x i32]* [[VAR:%.+]], [100 x i32]** [[VAR_ADDR_CAST]], align 8 + +#pragma omp target + for (int i = 0; i < N; i++) { + arr[i] = 1; + } + + return arr[0]; +} From 5a4cd55e5d1452db7043ef9e9f1211172a6a10e1 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 3 Aug 2020 05:55:14 +0000 Subject: [PATCH 147/600] [gn build] Port 160ff83765a --- llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn index c290312bd67ea..b7ecb646429e3 100644 --- a/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn @@ -63,6 +63,7 @@ static_library("CodeGen") { "CGObjCRuntime.cpp", "CGOpenCLRuntime.cpp", "CGOpenMPRuntime.cpp", + "CGOpenMPRuntimeAMDGCN.cpp", "CGOpenMPRuntimeGPU.cpp", "CGOpenMPRuntimeNVPTX.cpp", "CGRecordLayoutBuilder.cpp", From c41a18cf61790fc898dcda1055c3efbf442c14c0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 2 Aug 2020 23:05:50 -0700 Subject: [PATCH 148/600] [CMake] Default ENABLE_X86_RELAX_RELOCATIONS to ON This makes clang default to -Wa,-mrelax-relocations=yes, which enables R_386_GOT32X (GNU as enables it regardless of -mrelax-relocations=) and R_X86_64_[REX_]GOTPCRELX in MC. The produced object files require GNU ld>=2.26 to link. binutils 2.26 is considered a very old release today. --- clang/CMakeLists.txt | 2 +- llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index c487e506cae11..7baf93cbf7924 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -259,7 +259,7 @@ set(DEFAULT_SYSROOT "" CACHE STRING set(ENABLE_LINKER_BUILD_ID OFF CACHE BOOL "pass --build-id to ld") -set(ENABLE_X86_RELAX_RELOCATIONS OFF CACHE BOOL +set(ENABLE_X86_RELAX_RELOCATIONS ON CACHE BOOL "enable x86 relax relocations by default") set(ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER FALSE CACHE BOOL diff --git a/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn b/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn index 7fbfb46a41c54..49fc477d35f67 100644 --- a/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn @@ -33,7 +33,7 @@ write_cmake_config("Config") { "GCC_INSTALL_PREFIX=", "BACKEND_PACKAGE_STRING=LLVM ${llvm_version}git", "ENABLE_LINKER_BUILD_ID=", - "ENABLE_X86_RELAX_RELOCATIONS=", + "ENABLE_X86_RELAX_RELOCATIONS=1", "ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER=", "CLANG_ENABLE_OBJC_REWRITER=1", # FIXME: flag? "CLANG_SYSTEMZ_DEFAULT_ARCH=z10", From 91f6a5f7854a542611ed76442acb1ec375a9feb2 Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Mon, 3 Aug 2020 09:31:08 +0300 Subject: [PATCH 149/600] [MLIR][SPIRV] Control attributes support for loop and selection This patch handles loopControl and selectionControl in parsing and printing. In order to reuse the functionality, and avoid handling cases when `{` of the region is parsed as a dictionary attribute, `control` keyword was introduced.`None` is a default control attribute. This functionality can be later extended to `spv.func`. Also, loopControl and selectionControl can now be (de)serialized. Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D84175 --- mlir/lib/Dialect/SPIRV/SPIRVOps.cpp | 42 +++++++--- .../SPIRV/Serialization/Deserializer.cpp | 78 +++++++++---------- .../SPIRV/Serialization/Serializer.cpp | 6 +- .../Dialect/SPIRV/Serialization/loop.mlir | 8 +- .../SPIRV/Serialization/selection.mlir | 4 +- mlir/test/Dialect/SPIRV/control-flow-ops.mlir | 20 +++++ 6 files changed, 97 insertions(+), 61 deletions(-) diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp index bac65a02f63de..b7d36f4a94875 100644 --- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp @@ -36,6 +36,7 @@ static constexpr const char kSourceAlignmentAttrName[] = "source_alignment"; static constexpr const char kBranchWeightAttrName[] = "branch_weights"; static constexpr const char kCallee[] = "callee"; static constexpr const char kClusterSize[] = "cluster_size"; +static constexpr const char kControl[] = "control"; static constexpr const char kDefaultValueAttrName[] = "default_value"; static constexpr const char kExecutionScopeAttrName[] = "execution_scope"; static constexpr const char kEqualSemanticsAttrName[] = "equal_semantics"; @@ -161,6 +162,25 @@ parseEnumKeywordAttr(EnumClass &value, OpAsmParser &parser, return success(); } +/// Parses Function, Selection and Loop control attributes. If no control is +/// specified, "None" is used as a default. +template +static ParseResult +parseControlAttribute(OpAsmParser &parser, OperationState &state, + StringRef attrName = spirv::attributeName()) { + if (succeeded(parser.parseOptionalKeyword(kControl))) { + EnumClass control; + if (parser.parseLParen() || parseEnumKeywordAttr(control, parser, state) || + parser.parseRParen()) + return failure(); + return success(); + } + // Set control to "None" otherwise. + Builder builder = parser.getBuilder(); + state.addAttribute(attrName, builder.getI32IntegerAttr(0)); + return success(); +} + /// Parses optional memory access attributes attached to a memory access /// operand/pointer. Specifically, parses the following syntax: /// (`[` memory-access `]`)? @@ -2082,12 +2102,8 @@ void spirv::LoopOp::build(OpBuilder &builder, OperationState &state) { } static ParseResult parseLoopOp(OpAsmParser &parser, OperationState &state) { - // TODO: support loop control properly - Builder builder = parser.getBuilder(); - state.addAttribute("loop_control", - builder.getI32IntegerAttr( - static_cast(spirv::LoopControl::None))); - + if (parseControlAttribute(parser, state)) + return failure(); return parser.parseRegion(*state.addRegion(), /*arguments=*/{}, /*argTypes=*/{}); } @@ -2096,6 +2112,9 @@ static void print(spirv::LoopOp loopOp, OpAsmPrinter &printer) { auto *op = loopOp.getOperation(); printer << spirv::LoopOp::getOperationName(); + auto control = loopOp.loop_control(); + if (control != spirv::LoopControl::None) + printer << " control(" << spirv::stringifyLoopControl(control) << ")"; printer.printRegion(op->getRegion(0), /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/true); } @@ -2445,12 +2464,8 @@ static LogicalResult verify(spirv::SelectOp op) { static ParseResult parseSelectionOp(OpAsmParser &parser, OperationState &state) { - // TODO: support selection control properly - Builder builder = parser.getBuilder(); - state.addAttribute("selection_control", - builder.getI32IntegerAttr( - static_cast(spirv::SelectionControl::None))); - + if (parseControlAttribute(parser, state)) + return failure(); return parser.parseRegion(*state.addRegion(), /*arguments=*/{}, /*argTypes=*/{}); } @@ -2459,6 +2474,9 @@ static void print(spirv::SelectionOp selectionOp, OpAsmPrinter &printer) { auto *op = selectionOp.getOperation(); printer << spirv::SelectionOp::getOperationName(); + auto control = selectionOp.selection_control(); + if (control != spirv::SelectionControl::None) + printer << " control(" << spirv::stringifySelectionControl(control) << ")"; printer.printRegion(op->getRegion(0), /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/true); } diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp index 4ba3f16feef07..eaa8f4d94833d 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp @@ -64,11 +64,14 @@ struct BlockMergeInfo { Block *mergeBlock; Block *continueBlock; // nullptr for spv.selection Location loc; - - BlockMergeInfo(Location location) - : mergeBlock(nullptr), continueBlock(nullptr), loc(location) {} - BlockMergeInfo(Location location, Block *m, Block *c = nullptr) - : mergeBlock(m), continueBlock(c), loc(location) {} + uint32_t control; + + BlockMergeInfo(Location location, uint32_t control) + : mergeBlock(nullptr), continueBlock(nullptr), loc(location), + control(control) {} + BlockMergeInfo(Location location, uint32_t control, Block *m, + Block *c = nullptr) + : mergeBlock(m), continueBlock(c), loc(location), control(control) {} }; /// A struct for containing OpLine instruction information. @@ -1681,16 +1684,12 @@ LogicalResult Deserializer::processSelectionMerge(ArrayRef operands) { "OpSelectionMerge must specify merge target and selection control"); } - if (static_cast(spirv::SelectionControl::None) != operands[1]) { - return emitError(unknownLoc, - "unimplmented OpSelectionMerge selection control: ") - << operands[2]; - } - auto *mergeBlock = getOrCreateBlock(operands[0]); auto loc = createFileLineColLoc(opBuilder); + auto selectionControl = operands[1]; - if (!blockMergeInfo.try_emplace(curBlock, loc, mergeBlock).second) { + if (!blockMergeInfo.try_emplace(curBlock, loc, selectionControl, mergeBlock) + .second) { return emitError( unknownLoc, "a block cannot have more than one OpSelectionMerge instruction"); @@ -1709,16 +1708,13 @@ LogicalResult Deserializer::processLoopMerge(ArrayRef operands) { "continue target and loop control"); } - if (static_cast(spirv::LoopControl::None) != operands[2]) { - return emitError(unknownLoc, "unimplmented OpLoopMerge loop control: ") - << operands[2]; - } - auto *mergeBlock = getOrCreateBlock(operands[0]); auto *continueBlock = getOrCreateBlock(operands[1]); auto loc = createFileLineColLoc(opBuilder); + uint32_t loopControl = operands[2]; - if (!blockMergeInfo.try_emplace(curBlock, loc, mergeBlock, continueBlock) + if (!blockMergeInfo + .try_emplace(curBlock, loc, loopControl, mergeBlock, continueBlock) .second) { return emitError( unknownLoc, @@ -1771,25 +1767,27 @@ class ControlFlowStructurizer { /// the `headerBlock` will be redirected to the `mergeBlock`. /// This method will also update `mergeInfo` by remapping all blocks inside to /// the newly cloned ones inside structured control flow op's regions. - static LogicalResult structurize(Location loc, BlockMergeInfoMap &mergeInfo, + static LogicalResult structurize(Location loc, uint32_t control, + BlockMergeInfoMap &mergeInfo, Block *headerBlock, Block *mergeBlock, Block *continueBlock) { - return ControlFlowStructurizer(loc, mergeInfo, headerBlock, mergeBlock, - continueBlock) + return ControlFlowStructurizer(loc, control, mergeInfo, headerBlock, + mergeBlock, continueBlock) .structurizeImpl(); } private: - ControlFlowStructurizer(Location loc, BlockMergeInfoMap &mergeInfo, - Block *header, Block *merge, Block *cont) - : location(loc), blockMergeInfo(mergeInfo), headerBlock(header), - mergeBlock(merge), continueBlock(cont) {} + ControlFlowStructurizer(Location loc, uint32_t control, + BlockMergeInfoMap &mergeInfo, Block *header, + Block *merge, Block *cont) + : location(loc), control(control), blockMergeInfo(mergeInfo), + headerBlock(header), mergeBlock(merge), continueBlock(cont) {} /// Creates a new spv.selection op at the beginning of the `mergeBlock`. - spirv::SelectionOp createSelectionOp(); + spirv::SelectionOp createSelectionOp(uint32_t selectionControl); /// Creates a new spv.loop op at the beginning of the `mergeBlock`. - spirv::LoopOp createLoopOp(); + spirv::LoopOp createLoopOp(uint32_t loopControl); /// Collects all blocks reachable from `headerBlock` except `mergeBlock`. void collectBlocksInConstruct(); @@ -1797,6 +1795,7 @@ class ControlFlowStructurizer { LogicalResult structurizeImpl(); Location location; + uint32_t control; BlockMergeInfoMap &blockMergeInfo; @@ -1808,26 +1807,26 @@ class ControlFlowStructurizer { }; } // namespace -spirv::SelectionOp ControlFlowStructurizer::createSelectionOp() { +spirv::SelectionOp +ControlFlowStructurizer::createSelectionOp(uint32_t selectionControl) { // Create a builder and set the insertion point to the beginning of the // merge block so that the newly created SelectionOp will be inserted there. OpBuilder builder(&mergeBlock->front()); - auto control = builder.getI32IntegerAttr( - static_cast(spirv::SelectionControl::None)); + auto control = builder.getI32IntegerAttr(selectionControl); auto selectionOp = builder.create(location, control); selectionOp.addMergeBlock(); return selectionOp; } -spirv::LoopOp ControlFlowStructurizer::createLoopOp() { +spirv::LoopOp ControlFlowStructurizer::createLoopOp(uint32_t loopControl) { // Create a builder and set the insertion point to the beginning of the // merge block so that the newly created LoopOp will be inserted there. OpBuilder builder(&mergeBlock->front()); - // TODO: handle loop control properly - auto loopOp = builder.create(location); + auto control = builder.getI32IntegerAttr(loopControl); + auto loopOp = builder.create(location, control); loopOp.addEntryAndMergeBlock(); return loopOp; @@ -1852,10 +1851,10 @@ LogicalResult ControlFlowStructurizer::structurizeImpl() { Operation *op = nullptr; bool isLoop = continueBlock != nullptr; if (isLoop) { - if (auto loopOp = createLoopOp()) + if (auto loopOp = createLoopOp(control)) op = loopOp.getOperation(); } else { - if (auto selectionOp = createSelectionOp()) + if (auto selectionOp = createSelectionOp(control)) op = selectionOp.getOperation(); } if (!op) @@ -1992,7 +1991,8 @@ LogicalResult ControlFlowStructurizer::structurizeImpl() { // The iterator should be erased before adding a new entry into // blockMergeInfo to avoid iterator invalidation. blockMergeInfo.erase(it); - blockMergeInfo.try_emplace(newHeader, loc, newMerge, newContinue); + blockMergeInfo.try_emplace(newHeader, loc, it->second.control, newMerge, + newContinue); } // The structured selection/loop's entry block does not have arguments. @@ -2096,9 +2096,9 @@ LogicalResult Deserializer::structurizeControlFlow() { // Erase this case before calling into structurizer, who will update // blockMergeInfo. blockMergeInfo.erase(blockMergeInfo.begin()); - if (failed(ControlFlowStructurizer::structurize(mergeInfo.loc, - blockMergeInfo, headerBlock, - mergeBlock, continueBlock))) + if (failed(ControlFlowStructurizer::structurize( + mergeInfo.loc, mergeInfo.control, blockMergeInfo, headerBlock, + mergeBlock, continueBlock))) return failure(); } diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp index 859ea556f39f2..223adf47ab2e0 100644 --- a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp @@ -1573,10 +1573,9 @@ LogicalResult Serializer::processSelectionOp(spirv::SelectionOp selectionOp) { auto emitSelectionMerge = [&]() { emitDebugLine(functionBody, loc); lastProcessedWasMergeInst = true; - // TODO: properly support selection control here encodeInstructionInto( functionBody, spirv::Opcode::OpSelectionMerge, - {mergeID, static_cast(spirv::SelectionControl::None)}); + {mergeID, static_cast(selectionOp.selection_control())}); }; // For structured selection, we cannot have blocks in the selection construct // branching to the selection header block. Entering the selection (and @@ -1636,10 +1635,9 @@ LogicalResult Serializer::processLoopOp(spirv::LoopOp loopOp) { auto emitLoopMerge = [&]() { emitDebugLine(functionBody, loc); lastProcessedWasMergeInst = true; - // TODO: properly support loop control here encodeInstructionInto( functionBody, spirv::Opcode::OpLoopMerge, - {mergeID, continueID, static_cast(spirv::LoopControl::None)}); + {mergeID, continueID, static_cast(loopOp.loop_control())}); }; if (failed(processBlock(headerBlock, /*omitLabel=*/false, emitLoopMerge))) return failure(); diff --git a/mlir/test/Dialect/SPIRV/Serialization/loop.mlir b/mlir/test/Dialect/SPIRV/Serialization/loop.mlir index d6e2090f02bbb..8f0b35ef6fc82 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/loop.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/loop.mlir @@ -119,8 +119,8 @@ spv.module Logical GLSL450 requires #spv.vce { // CHECK: spv.Branch ^bb1 // CHECK-NEXT: ^bb1: -// CHECK-NEXT: spv.loop - spv.loop { +// CHECK-NEXT: spv.loop control(Unroll) + spv.loop control(Unroll) { // CHECK-NEXT: spv.Branch ^bb1 spv.Branch ^header @@ -140,8 +140,8 @@ spv.module Logical GLSL450 requires #spv.vce { spv.Store "Function" %jvar, %zero : i32 // CHECK-NEXT: spv.Branch ^bb3 // CHECK-NEXT: ^bb3: -// CHECK-NEXT: spv.loop - spv.loop { +// CHECK-NEXT: spv.loop control(DontUnroll) + spv.loop control(DontUnroll) { // CHECK-NEXT: spv.Branch ^bb1 spv.Branch ^header diff --git a/mlir/test/Dialect/SPIRV/Serialization/selection.mlir b/mlir/test/Dialect/SPIRV/Serialization/selection.mlir index e391bae5b4861..9e6ff5698f8a6 100644 --- a/mlir/test/Dialect/SPIRV/Serialization/selection.mlir +++ b/mlir/test/Dialect/SPIRV/Serialization/selection.mlir @@ -11,10 +11,10 @@ spv.module Logical GLSL450 requires #spv.vce { %two = spv.constant 2: i32 %var = spv.Variable init(%zero) : !spv.ptr -// CHECK-NEXT: spv.selection { +// CHECK-NEXT: spv.selection control(Flatten) // CHECK-NEXT: spv.constant 0 // CHECK-NEXT: spv.Variable - spv.selection { + spv.selection control(Flatten) { // CHECK-NEXT: spv.BranchConditional %{{.*}} [5, 10], ^bb1, ^bb2 spv.BranchConditional %cond [5, 10], ^then, ^else diff --git a/mlir/test/Dialect/SPIRV/control-flow-ops.mlir b/mlir/test/Dialect/SPIRV/control-flow-ops.mlir index 97ee02d45f24b..267c45b080fa2 100644 --- a/mlir/test/Dialect/SPIRV/control-flow-ops.mlir +++ b/mlir/test/Dialect/SPIRV/control-flow-ops.mlir @@ -317,6 +317,16 @@ func @empty_region() -> () { // ----- +// CHECK-LABEL: @loop_with_control +func @loop_with_control() -> () { + // CHECK: spv.loop control(Unroll) + spv.loop control(Unroll) { + } + return +} + +// ----- + func @wrong_merge_block() -> () { // expected-error @+1 {{last block must be the merge block with only one 'spv._merge' op}} spv.loop { @@ -718,6 +728,16 @@ func @empty_region() -> () { // ----- +// CHECK-LABEL: @selection_with_control +func @selection_with_control() -> () { + // CHECK: spv.selection control(Flatten) + spv.selection control(Flatten) { + } + return +} + +// ----- + func @wrong_merge_block() -> () { // expected-error @+1 {{last block must be the merge block with only one 'spv._merge' op}} spv.selection { From 4fdc4d892b988bb9f2e06c3440971d28d6361722 Mon Sep 17 00:00:00 2001 From: Djordje Todorovic Date: Mon, 3 Aug 2020 09:01:37 +0200 Subject: [PATCH 150/600] [NFC] [MIR] Document the reg state flags This patch adds documentation for the RegState enumeration. Differential Revision: https://reviews.llvm.org/D84634 --- .../llvm/CodeGen/MachineInstrBuilder.h | 38 ++++++++++++------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index cabb9f1c97c96..b31e9cdb0e903 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -40,20 +40,30 @@ class MDNode; namespace RegState { - enum { - Define = 0x2, - Implicit = 0x4, - Kill = 0x8, - Dead = 0x10, - Undef = 0x20, - EarlyClobber = 0x40, - Debug = 0x80, - InternalRead = 0x100, - Renamable = 0x200, - DefineNoRead = Define | Undef, - ImplicitDefine = Implicit | Define, - ImplicitKill = Implicit | Kill - }; +enum { + /// Register definition. + Define = 0x2, + /// Not emitted register (e.g. carry, or temporary result). + Implicit = 0x4, + /// The last use of a register. + Kill = 0x8, + /// Unused definition. + Dead = 0x10, + /// Value of the register doesn't matter. + Undef = 0x20, + /// Register definition happens before uses. + EarlyClobber = 0x40, + /// Register 'use' is for debugging purpose. + Debug = 0x80, + /// Register reads a value that is defined inside the same instruction or + /// bundle. + InternalRead = 0x100, + /// Register that may be renamed. + Renamable = 0x200, + DefineNoRead = Define | Undef, + ImplicitDefine = Implicit | Define, + ImplicitKill = Implicit | Kill +}; } // end namespace RegState From 9c3f6fb68807c8100797b001c0621ae0c9a6d1fc Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Mon, 3 Aug 2020 09:40:35 +0200 Subject: [PATCH 151/600] [libunwind] Make the test depend on the libunwind explicitly. Before this patch the `ninja check-unwind` won't rebuild the unwind library. Reviewed By: jroelofs Differential Revision: https://reviews.llvm.org/D85004 --- libunwind/test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libunwind/test/CMakeLists.txt b/libunwind/test/CMakeLists.txt index 794a59f58f84a..2b945e6eff762 100644 --- a/libunwind/test/CMakeLists.txt +++ b/libunwind/test/CMakeLists.txt @@ -32,4 +32,4 @@ configure_lit_site_cfg( add_lit_testsuite(check-unwind "Running libunwind tests" ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS ${LIBUNWIND_TEST_DEPS}) + DEPENDS unwind ${LIBUNWIND_TEST_DEPS}) From 4e10a18972a4569fe6b13e60becb44514b35c52e Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Mon, 3 Aug 2020 15:03:37 +0700 Subject: [PATCH 152/600] [DebugInfo] Make DIELocList::SizeOf() more explicit. NFCI. DIELocList is used with a limited number of DWARF forms, see the only place where it is instantiated, DwarfCompileUnit::addLocationList(). The patch marks the unexpected execution path in DIELocList::SizeOf() as unreachable, to reduce ambiguity. Differential Revision: https://reviews.llvm.org/D84092 --- llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 713a15dd09391..03219637f216e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -809,13 +809,17 @@ void DIEBlock::print(raw_ostream &O) const { //===----------------------------------------------------------------------===// unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - if (Form == dwarf::DW_FORM_loclistx) + switch (Form) { + case dwarf::DW_FORM_loclistx: return getULEB128Size(Index); - if (Form == dwarf::DW_FORM_data4) + case dwarf::DW_FORM_data4: return 4; - if (Form == dwarf::DW_FORM_sec_offset) + case dwarf::DW_FORM_sec_offset: + // FIXME: add support for DWARF64 return 4; - return AP->MAI->getCodePointerSize(); + default: + llvm_unreachable("DIE Value form not supported yet"); + } } /// EmitValue - Emit label value. From 8feff8d14f75aafe4af77a6295d59d28e7a72829 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Mon, 3 Aug 2020 15:04:00 +0700 Subject: [PATCH 153/600] [DebugInfo] Fix a comment and a variable name. NFC. DebugLocListIndex keeps the index of an entry list, not the offset. Differential Revision: https://reviews.llvm.org/D84093 --- llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 6 +++--- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 2de6569767f69..871977df7a2ce 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -678,9 +678,9 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, // Add variable address. - unsigned Offset = DV.getDebugLocListIndex(); - if (Offset != ~0U) { - addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); + unsigned Index = DV.getDebugLocListIndex(); + if (Index != ~0U) { + addLocationList(*VariableDie, dwarf::DW_AT_location, Index); auto TagOffset = DV.getDebugLocListTagOffset(); if (TagOffset) addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1, diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 2587ee61b0599..34364134d92ac 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -114,7 +114,7 @@ class DbgEntity { /// /// Variables that have been optimized out use none of these fields. class DbgVariable : public DbgEntity { - /// Offset in DebugLocs. + /// Index of the entry list in DebugLocs. unsigned DebugLocListIndex = ~0u; /// DW_OP_LLVM_tag_offset value from DebugLocs. Optional DebugLocListTagOffset; From f98e03a35ded30893095f71be933ffc754d71d37 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Mon, 3 Aug 2020 15:04:08 +0700 Subject: [PATCH 154/600] [DebugInfo] Fix misleading using of DWARF forms with DIELabel. NFCI. DIELabel can emit only 32- or 64-bit values, while it was created in some places with DW_FORM_udata, which implies emitting uleb128. Nevertheless, these places also expected to emit U32 or U64, but just used a misleading DWARF form. The patch updates those places to use more appropriate DWARF forms and restricts DIELabel::SizeOf() to accept only forms that are actually used in the LLVM codebase. Differential Revision: https://reviews.llvm.org/D84094 --- llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 22 ++++++++++++------- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 4 ++-- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 2 +- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 03219637f216e..b041f94de075d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -495,19 +495,25 @@ void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; } /// EmitValue - Emit label value. /// void DIELabel::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { - AP->emitLabelReference( - Label, SizeOf(AP, Form), - Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset || - Form == dwarf::DW_FORM_ref_addr || Form == dwarf::DW_FORM_data4); + bool IsSectionRelative = Form != dwarf::DW_FORM_addr; + AP->emitLabelReference(Label, SizeOf(AP, Form), IsSectionRelative); } /// SizeOf - Determine size of label value in bytes. /// unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - if (Form == dwarf::DW_FORM_data4) return 4; - if (Form == dwarf::DW_FORM_sec_offset) return 4; - if (Form == dwarf::DW_FORM_strp) return 4; - return AP->MAI->getCodePointerSize(); + switch (Form) { + case dwarf::DW_FORM_data4: + return 4; + case dwarf::DW_FORM_sec_offset: + case dwarf::DW_FORM_strp: + // FIXME: add support for DWARF64 + return 4; + case dwarf::DW_FORM_addr: + return AP->MAI->getCodePointerSize(); + default: + llvm_unreachable("DIE Value form not supported yet"); + } } LLVM_DUMP_METHOD diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 871977df7a2ce..704fff246b6a4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -439,8 +439,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { true}); DIELoc *Loc = new (DIEValueAllocator) DIELoc; addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location); - addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind); - addLabel(*Loc, dwarf::DW_FORM_udata, SPSym); + addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC); + addLabel(*Loc, dwarf::DW_FORM_data4, SPSym); DD->addArangeLabel(SymbolCU(this, SPSym)); addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 1ba2afe48bf4b..2b45e50869edf 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -335,7 +335,7 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { } addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Die, dwarf::DW_FORM_udata, Sym); + addLabel(Die, dwarf::DW_FORM_addr, Sym); } void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, From 414b9bec6deb542f7ca729585b1e592cac8ccb30 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Mon, 3 Aug 2020 15:04:15 +0700 Subject: [PATCH 155/600] [DebugInfo] Make DIEDelta::SizeOf() more explicit. NFCI. The patch restricts DIEDelta::SizeOf() to accept only DWARF forms that are actually used in the LLVM codebase. This should make the use of the class more explicit and help to avoid issues similar to fixed in D83958 and D84094. Differential Revision: https://reviews.llvm.org/D84095 --- llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index b041f94de075d..0524d666810cb 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -549,10 +549,15 @@ void DIEDelta::emitValue(const AsmPrinter *AP, dwarf::Form Form) const { /// SizeOf - Determine size of delta value in bytes. /// unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { - if (Form == dwarf::DW_FORM_data4) return 4; - if (Form == dwarf::DW_FORM_sec_offset) return 4; - if (Form == dwarf::DW_FORM_strp) return 4; - return AP->MAI->getCodePointerSize(); + switch (Form) { + case dwarf::DW_FORM_data4: + return 4; + case dwarf::DW_FORM_sec_offset: + // FIXME: add support for DWARF64 + return 4; + default: + llvm_unreachable("DIE Value form not supported yet"); + } } LLVM_DUMP_METHOD From 434cf2ded3836075daa34bb4bd6286ff571d24e1 Mon Sep 17 00:00:00 2001 From: Shinji Okumura Date: Mon, 3 Aug 2020 17:02:49 +0900 Subject: [PATCH 156/600] [Attributor] Check nonnull attribute violation in AAUndefinedBehavior This patch makes it possible to handle nonnull attribute violation at callsites in AAUndefinedBehavior. If null pointer is passed to callee at a callsite and the corresponding argument of callee has nonnull attribute, the behavior of the callee is undefined. In this patch, violations of argument nonnull attributes is only handled. But violations of returned nonnull attributes can be handled and I will implement that in a follow-up patch. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D84733 --- .../Transforms/IPO/AttributorAttributes.cpp | 56 ++++ .../Attributor/undefined_behavior.ll | 295 ++++++++++++++++++ 2 files changed, 351 insertions(+) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index aa6bc94a3668e..5cd0c711ddde1 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1983,6 +1983,61 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { return true; }; + auto InspectCallSiteForUB = [&](Instruction &I) { + // Check whether a callsite always cause UB or not + + // Skip instructions that are already saved. + if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I)) + return true; + + // Check nonnull and noundef argument attribute violation for each + // callsite. + CallBase &CB = cast(I); + Function *Callee = CB.getCalledFunction(); + if (!Callee) + return true; + for (unsigned idx = 0; idx < CB.getNumArgOperands(); idx++) { + // If current argument is known to be simplified to null pointer and the + // corresponding argument position is known to have nonnull attribute, + // the argument is poison. Furthermore, if the argument is poison and + // the position is known to have noundef attriubte, this callsite is + // considered UB. + // TODO: Check also nopoison attribute if it is introduced. + if (idx >= Callee->arg_size()) + break; + Value *ArgVal = CB.getArgOperand(idx); + if(!ArgVal) + continue; + IRPosition CalleeArgumentIRP = + IRPosition::argument(*Callee->getArg(idx)); + if (!CalleeArgumentIRP.hasAttr({Attribute::NoUndef})) + continue; + auto &NonNullAA = A.getAAFor(*this, CalleeArgumentIRP); + if (!NonNullAA.isKnownNonNull()) + continue; + const auto &ValueSimplifyAA = + A.getAAFor(*this, IRPosition::value(*ArgVal)); + Optional SimplifiedVal = + ValueSimplifyAA.getAssumedSimplifiedValue(A); + + if (!ValueSimplifyAA.isKnown()) + continue; + // Here, we handle three cases. + // (1) Not having a value means it is dead. (we can replace the value + // with undef) + // (2) Simplified to null pointer. The argument is a poison value and + // violate noundef attribute. + // (3) Simplified to undef. The argument violate noundef attriubte. + if (!SimplifiedVal.hasValue() || + isa(*SimplifiedVal.getValue()) || + isa(*SimplifiedVal.getValue())) { + KnownUBInsts.insert(&I); + return true; + } + } + return true; + }; + A.checkForAllInstructions(InspectMemAccessInstForUB, *this, {Instruction::Load, Instruction::Store, Instruction::AtomicCmpXchg, @@ -1990,6 +2045,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { /* CheckBBLivenessOnly */ true); A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br}, /* CheckBBLivenessOnly */ true); + A.checkForAllCallLikeInstructions(InspectCallSiteForUB, *this); if (NoUBPrevSize != AssumedNoUBInsts.size() || UBPrevSize != KnownUBInsts.size()) return ChangeStatus::CHANGED; diff --git a/llvm/test/Transforms/Attributor/undefined_behavior.ll b/llvm/test/Transforms/Attributor/undefined_behavior.ll index 52761da4b869d..b5bf4c4726564 100644 --- a/llvm/test/Transforms/Attributor/undefined_behavior.ll +++ b/llvm/test/Transforms/Attributor/undefined_behavior.ll @@ -579,3 +579,298 @@ define i32 @foo() { %X = call i32 @callee(i1 false, i32* null) ret i32 %X } + +; Tests for nonnull attribute violation. + +define void @arg_nonnull_1(i32* nonnull %a) { +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_1 +; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__TUNIT____-NEXT: store i32 0, i32* [[A]], align 4 +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_1 +; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__CGSCC____-NEXT: store i32 0, i32* [[A]], align 4 +; IS__CGSCC____-NEXT: ret void +; + store i32 0, i32* %a + ret void +} + +define void @arg_nonnull_1_noundef_1(i32* nonnull noundef %a) { +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_1_noundef_1 +; IS__TUNIT____-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__TUNIT____-NEXT: store i32 0, i32* [[A]], align 4 +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_1_noundef_1 +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__CGSCC____-NEXT: store i32 0, i32* [[A]], align 4 +; IS__CGSCC____-NEXT: ret void +; + store i32 0, i32* %a + ret void +} + +define void @arg_nonnull_12(i32* nonnull %a, i32* nonnull %b, i32* %c) { +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_12 +; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) +; IS__TUNIT____-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null +; IS__TUNIT____-NEXT: br i1 [[D]], label [[T:%.*]], label [[F:%.*]] +; IS__TUNIT____: t: +; IS__TUNIT____-NEXT: store i32 0, i32* [[A]], align 4 +; IS__TUNIT____-NEXT: br label [[RET:%.*]] +; IS__TUNIT____: f: +; IS__TUNIT____-NEXT: store i32 1, i32* [[B]], align 4 +; IS__TUNIT____-NEXT: br label [[RET]] +; IS__TUNIT____: ret: +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_12 +; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) +; IS__CGSCC____-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null +; IS__CGSCC____-NEXT: br i1 [[D]], label [[T:%.*]], label [[F:%.*]] +; IS__CGSCC____: t: +; IS__CGSCC____-NEXT: store i32 0, i32* [[A]], align 4 +; IS__CGSCC____-NEXT: br label [[RET:%.*]] +; IS__CGSCC____: f: +; IS__CGSCC____-NEXT: store i32 1, i32* [[B]], align 4 +; IS__CGSCC____-NEXT: br label [[RET]] +; IS__CGSCC____: ret: +; IS__CGSCC____-NEXT: ret void +; + %d = icmp eq i32* %c, null + br i1 %d, label %t, label %f +t: + store i32 0, i32* %a + br label %ret +f: + store i32 1, i32* %b + br label %ret +ret: + ret void +} + +define void @arg_nonnull_12_noundef_2(i32* nonnull %a, i32* noundef nonnull %b, i32* %c) { +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_12_noundef_2 +; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree noundef nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) +; IS__TUNIT____-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null +; IS__TUNIT____-NEXT: br i1 [[D]], label [[T:%.*]], label [[F:%.*]] +; IS__TUNIT____: t: +; IS__TUNIT____-NEXT: store i32 0, i32* [[A]], align 4 +; IS__TUNIT____-NEXT: br label [[RET:%.*]] +; IS__TUNIT____: f: +; IS__TUNIT____-NEXT: store i32 1, i32* [[B]], align 4 +; IS__TUNIT____-NEXT: br label [[RET]] +; IS__TUNIT____: ret: +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_12_noundef_2 +; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree noundef nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) +; IS__CGSCC____-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null +; IS__CGSCC____-NEXT: br i1 [[D]], label [[T:%.*]], label [[F:%.*]] +; IS__CGSCC____: t: +; IS__CGSCC____-NEXT: store i32 0, i32* [[A]], align 4 +; IS__CGSCC____-NEXT: br label [[RET:%.*]] +; IS__CGSCC____: f: +; IS__CGSCC____-NEXT: store i32 1, i32* [[B]], align 4 +; IS__CGSCC____-NEXT: br label [[RET]] +; IS__CGSCC____: ret: +; IS__CGSCC____-NEXT: ret void +; + %d = icmp eq i32* %c, null + br i1 %d, label %t, label %f +t: + store i32 0, i32* %a + br label %ret +f: + store i32 1, i32* %b + br label %ret +ret: + ret void +} + +; Pass null directly to argument with nonnull attribute +define void @arg_nonnull_violation1_1() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1() +; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree nonnull writeonly align 536870912 null) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1() +; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree nonnull writeonly align 536870912 dereferenceable(4) null) +; IS__CGSCC____-NEXT: ret void +; + call void @arg_nonnull_1(i32* null) + ret void +} + +define void @arg_nonnull_violation1_2() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation1_2() +; IS__TUNIT____-NEXT: unreachable +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation1_2() +; IS__CGSCC____-NEXT: unreachable +; + call void @arg_nonnull_1_noundef_1(i32* null) + ret void +} + +; A case that depends on value simplification +define void @arg_nonnull_violation2_1(i1 %c) { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 +; IS__TUNIT____-SAME: (i1 [[C:%.*]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree nonnull writeonly align 536870912 null) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 +; IS__CGSCC____-SAME: (i1 [[C:%.*]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree nonnull writeonly align 536870912 dereferenceable(4) null) +; IS__CGSCC____-NEXT: ret void +; + %null = getelementptr i32, i32* null, i32 0 + %mustnull = select i1 %c, i32* null, i32* %null + call void @arg_nonnull_1(i32* %mustnull) + ret void +} + +define void @arg_nonnull_violation2_2(i1 %c) { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation2_2 +; IS__TUNIT____-SAME: (i1 [[C:%.*]]) +; IS__TUNIT____-NEXT: unreachable +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation2_2 +; IS__CGSCC____-SAME: (i1 [[C:%.*]]) +; IS__CGSCC____-NEXT: unreachable +; + %null = getelementptr i32, i32* null, i32 0 + %mustnull = select i1 %c, i32* null, i32* %null + call void @arg_nonnull_1_noundef_1(i32* %mustnull) + ret void +} + +; Cases for single and multiple violation at a callsite +define void @arg_nonnull_violation3_1(i1 %c) { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation3_1 +; IS__TUNIT____-SAME: (i1 [[C:%.*]]) +; IS__TUNIT____-NEXT: [[PTR:%.*]] = alloca i32, align 4 +; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__TUNIT____: t: +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: br label [[RET:%.*]] +; IS__TUNIT____: f: +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: br label [[RET]] +; IS__TUNIT____: ret: +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation3_1 +; IS__CGSCC____-SAME: (i1 [[C:%.*]]) +; IS__CGSCC____-NEXT: [[PTR:%.*]] = alloca i32, align 4 +; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__CGSCC____: t: +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: br label [[RET:%.*]] +; IS__CGSCC____: f: +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: br label [[RET]] +; IS__CGSCC____: ret: +; IS__CGSCC____-NEXT: ret void +; + %ptr = alloca i32 + br i1 %c, label %t, label %f +t: + call void @arg_nonnull_12(i32* %ptr, i32* %ptr, i32* %ptr) + call void @arg_nonnull_12(i32* %ptr, i32* %ptr, i32* null) + call void @arg_nonnull_12(i32* %ptr, i32* null, i32* %ptr) + call void @arg_nonnull_12(i32* %ptr, i32* null, i32* null) + br label %ret +f: + call void @arg_nonnull_12(i32* null, i32* %ptr, i32* %ptr) + call void @arg_nonnull_12(i32* null, i32* %ptr, i32* null) + call void @arg_nonnull_12(i32* null, i32* null, i32* %ptr) + call void @arg_nonnull_12(i32* null, i32* null, i32* null) + br label %ret +ret: + ret void +} + +define void @arg_nonnull_violation3_2(i1 %c) { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation3_2 +; IS__TUNIT____-SAME: (i1 [[C:%.*]]) +; IS__TUNIT____-NEXT: [[PTR:%.*]] = alloca i32, align 4 +; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__TUNIT____: t: +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: unreachable +; IS__TUNIT____: f: +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: unreachable +; IS__TUNIT____: ret: +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation3_2 +; IS__CGSCC____-SAME: (i1 [[C:%.*]]) +; IS__CGSCC____-NEXT: [[PTR:%.*]] = alloca i32, align 4 +; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__CGSCC____: t: +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: unreachable +; IS__CGSCC____: f: +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: unreachable +; IS__CGSCC____: ret: +; IS__CGSCC____-NEXT: ret void +; + %ptr = alloca i32 + br i1 %c, label %t, label %f +t: + call void @arg_nonnull_12_noundef_2(i32* %ptr, i32* %ptr, i32* %ptr) + call void @arg_nonnull_12_noundef_2(i32* %ptr, i32* %ptr, i32* null) + call void @arg_nonnull_12_noundef_2(i32* %ptr, i32* null, i32* %ptr) + call void @arg_nonnull_12_noundef_2(i32* %ptr, i32* null, i32* null) + br label %ret +f: + call void @arg_nonnull_12_noundef_2(i32* null, i32* %ptr, i32* %ptr) + call void @arg_nonnull_12_noundef_2(i32* null, i32* %ptr, i32* null) + call void @arg_nonnull_12_noundef_2(i32* null, i32* null, i32* %ptr) + call void @arg_nonnull_12_noundef_2(i32* null, i32* null, i32* null) + br label %ret +ret: + ret void +} From ef005f204b5d30a2bccfe2ab5431729dd644548c Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Mon, 3 Aug 2020 16:14:47 +0800 Subject: [PATCH 157/600] [MachOYAML] Remove redundant variable initialization. NFC. The value of `is64Bit` is initialized in the constructor body. --- llvm/lib/ObjectYAML/MachOEmitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index 9b454c528a7e2..3b1421440cb99 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -29,7 +29,7 @@ namespace { class MachOWriter { public: - MachOWriter(MachOYAML::Object &Obj) : Obj(Obj), is64Bit(true), fileStart(0) { + MachOWriter(MachOYAML::Object &Obj) : Obj(Obj), fileStart(0) { is64Bit = Obj.Header.magic == MachO::MH_MAGIC_64 || Obj.Header.magic == MachO::MH_CIGAM_64; memset(reinterpret_cast(&Header), 0, sizeof(MachO::mach_header_64)); From 11492be9d72d4215ac2f61626264da05fee35e78 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Mon, 3 Aug 2020 08:18:48 +0000 Subject: [PATCH 158/600] [MLIR][Shape] Lower `shape.broadcast` to `scf` Differential Revision: https://reviews.llvm.org/D85027 --- mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp | 94 ++++++++++++++++++- .../Conversion/ShapeToSCF/shape-to-scf.mlir | 50 ++++++++++ 2 files changed, 143 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp index a6c667f5641c3..ae326c5c513e6 100644 --- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp +++ b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp @@ -19,6 +19,98 @@ using namespace mlir; using namespace mlir::shape; using namespace mlir::scf; +namespace { +struct BroadcastOpConverter : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(BroadcastOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override; +}; +} // namespace + +LogicalResult BroadcastOpConverter::matchAndRewrite( + BroadcastOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const { + // For now, this lowering is only defined on `tensor` operands, not + // on shapes. + if (op.getType().isa()) + return failure(); + + assert(!op.lhs().getType().isa() && + !op.rhs().getType().isa()); + auto loc = op.getLoc(); + BroadcastOp::Adaptor transformed(operands); + Value zero = rewriter.create(loc, 0); + Value one = rewriter.create(loc, 1); + + // Find smaller and greater rank and extent tensor. + Value lhsRank = rewriter.create(loc, transformed.lhs(), zero); + Value rhsRank = rewriter.create(loc, transformed.rhs(), zero); + Value lhsSmaller = + rewriter.create(loc, CmpIPredicate::ule, lhsRank, rhsRank); + Type indexTy = rewriter.getIndexType(); + Type extentTensorTy = op.getType(); + auto ifOp = rewriter.create( + loc, TypeRange{indexTy, extentTensorTy, indexTy, extentTensorTy}, + lhsSmaller, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{lhsRank, transformed.lhs(), + rhsRank, transformed.rhs()}); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, ValueRange{rhsRank, transformed.rhs(), + lhsRank, transformed.lhs()}); + }); + Value smallerRank = ifOp.getResult(0); + Value smallerOperand = ifOp.getResult(1); + Value greaterRank = ifOp.getResult(2); + Value greaterOperand = ifOp.getResult(3); + + // Allocate stack memory for the broadcasted extent tensor. + Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); + Value mem = rewriter.create(loc, memTy, ValueRange{greaterRank}); + + // Copy extents from greater operand that are not challenged. + Value rankDiff = + rewriter.create(loc, indexTy, greaterRank, smallerRank); + rewriter.create(loc, zero, rankDiff, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value extent = b.create( + loc, greaterOperand, ValueRange{iv}); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Determine remaining broadcasted extents. + rewriter.create( + loc, rankDiff, greaterRank, one, llvm::None, + [&](OpBuilder &b, Location loc, Value iv, ValueRange) { + Value greaterOperandExtent = + b.create(loc, greaterOperand, ValueRange{iv}); + Value greaterOperandExtentIsOne = + b.create(loc, CmpIPredicate::eq, greaterOperandExtent, one); + auto ifOp = b.create( + loc, TypeRange{indexTy}, greaterOperandExtentIsOne, + [&](OpBuilder &b, Location loc) { + Value ivShifted = b.create(loc, indexTy, iv, rankDiff); + Value smallerOperandExtent = b.create( + loc, smallerOperand, ValueRange{ivShifted}); + b.create(loc, smallerOperandExtent); + }, + [&](OpBuilder &b, Location loc) { + b.create(loc, greaterOperandExtent); + }); + Value extent = ifOp.getResult(0); + b.create(loc, extent, mem, ValueRange{iv}); + b.create(loc); + }); + + // Load broadcasted shape as an extent tensor. + rewriter.replaceOpWithNewOp(op, mem); + return success(); +} + namespace { /// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is /// only defined on `tensor` operands. The test for equality first @@ -223,7 +315,6 @@ void ConvertShapeToSCFPass::runOnFunction() { // Setup target legality. ConversionTarget target(getContext()); target.addLegalDialect(); - target.addLegalOp(); // Apply conversion. if (failed(applyPartialConversion(getFunction(), target, patterns))) @@ -234,6 +325,7 @@ void mlir::populateShapeToSCFConversionPatterns( OwningRewritePatternList &patterns, MLIRContext *ctx) { // clang-format off patterns.insert< + BroadcastOpConverter, ShapeEqOpConverter, ReduceOpConverter, ShapeOfOpConverter>(ctx); diff --git a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir index 768a627208b8e..cc384496dff05 100644 --- a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir +++ b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir @@ -80,3 +80,53 @@ func @shape_eq(%a : tensor, %b : tensor) -> i1 { %result = shape.shape_eq %a, %b : tensor, tensor return %result : i1 } + +// ----- + +// Don't lower `shape.broadcast` if a `shape.shape` type is involved. +// CHECK-LABEL: @broadcast +func @broadcast(%a : tensor, %b : !shape.shape) -> !shape.shape { + // CHECK: shape.broadcast + %c = shape.broadcast %a, %b : tensor, !shape.shape -> !shape.shape + return %c : !shape.shape +} + +// ----- + +// CHECK-LABEL: @broadcast +// CHECK-SAME: (%[[LHS:.*]]: tensor, %[[RHS:.*]]: tensor) +func @broadcast(%a : tensor, %b : tensor) { + // CHECK: %[[C0:.*]] = constant 0 : index + // CHECK: %[[C1:.*]] = constant 1 : index + // CHECK: %[[LHS_RANK:.*]] = dim %[[LHS]], %[[C0]] : tensor + // CHECK: %[[RHS_RANK:.*]] = dim %[[RHS]], %[[C0]] : tensor + // CHECK: %[[LHS_SMALLER:.*]] = cmpi "ule", %[[LHS_RANK]], %[[RHS_RANK]] + // CHECK: %[[ARG:.*]]:4 = scf.if %[[LHS_SMALLER]] -> (index, tensor, index, tensor) { + // CHECK: scf.yield %[[LHS_RANK]], %[[LHS]], %[[RHS_RANK]], %[[RHS]] : index, tensor, index, tensor + // CHECK: } else { + // CHECK: scf.yield %[[RHS_RANK]], %[[RHS]], %[[LHS_RANK]], %[[LHS]] : index, tensor, index, tensor + // CHECK: } + // CHECK: %[[MEM:.*]] = alloca(%[[ARG]]#2) : memref + // CHECK: %[[RANK_DIFF:.*]] = subi %[[ARG]]#2, %[[ARG]]#0 : index + // CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[RANK_DIFF]] step %[[C1]] { + // CHECK: %[[EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: scf.for %[[IV:.*]] = %[[RANK_DIFF]] to %[[ARG]]#2 step %[[C1]] { + // CHECK: %[[GREATER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#3[%[[IV]]] : tensor + // CHECK: %[[GREATER_OPERAND_EXTENT_IS_ONE:.*]] = cmpi "eq", %[[GREATER_OPERAND_EXTENT]], %[[C1]] : index + // CHECK: %[[EXTENT:.*]] = scf.if %[[GREATER_OPERAND_EXTENT_IS_ONE]] -> (index) { + // CHECK: %[[IV_SHIFTED:.*]] = subi %[[IV]], %[[RANK_DIFF]] : index + // CHECK: %[[SMALLER_OPERAND_EXTENT:.*]] = extract_element %[[ARG]]#1[%[[IV_SHIFTED]]] : tensor + // CHECK: scf.yield %[[SMALLER_OPERAND_EXTENT]] : index + // CHECK: } else { + // CHECK: scf.yield %[[GREATER_OPERAND_EXTENT]] : index + // CHECK: } + // CHECK: store %[[EXTENT]], %[[MEM]][%[[IV]]] : memref + // CHECK: } + // CHECK: %[[BROADCASTED:.*]] = tensor_load %[[MEM]] : memref + %0 = shape.broadcast %a, %b + : tensor, tensor -> tensor + return +} + From 8aeb212887024a615ca02437cd12fa055bd54b6f Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 3 Aug 2020 10:23:48 +0200 Subject: [PATCH 159/600] [debugserver] Fix that is_dot_app is producing unused warnings Some build configurations don't use this static function. --- lldb/tools/debugserver/source/debugserver.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lldb/tools/debugserver/source/debugserver.cpp b/lldb/tools/debugserver/source/debugserver.cpp index 4e6aa39e52d37..04cbd2c8b503e 100644 --- a/lldb/tools/debugserver/source/debugserver.cpp +++ b/lldb/tools/debugserver/source/debugserver.cpp @@ -212,19 +212,21 @@ RNBRunLoopMode RNBRunLoopLaunchInferior(RNBRemote *remote, // Our default launch method is posix spawn launch_flavor = eLaunchFlavorPosixSpawn; + const bool dot_app = is_dot_app(inferior_argv[0]); + (void)dot_app; #if defined WITH_FBS // Check if we have an app bundle, if so launch using BackBoard Services. - if (is_dot_app(inferior_argv[0])) { + if (dot_app) { launch_flavor = eLaunchFlavorFBS; } #elif defined WITH_BKS // Check if we have an app bundle, if so launch using BackBoard Services. - if (is_dot_app(inferior_argv[0])) { + if (dot_app) { launch_flavor = eLaunchFlavorBKS; } #elif defined WITH_SPRINGBOARD // Check if we have an app bundle, if so launch using SpringBoard. - if (is_dot_app(inferior_argv[0])) { + if (dot_app) { launch_flavor = eLaunchFlavorSpringBoard; } #endif From 35b65be041127db9fe23d3128a004c888893cbae Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 3 Aug 2020 04:39:18 -0400 Subject: [PATCH 160/600] [mlir][Vector] Add transformation + pattern to split vector.transfer_read into full and partial copies. This revision adds a transformation and a pattern that rewrites a "maybe masked" `vector.transfer_read %view[...], %pad `into a pattern resembling: ``` %1:3 = scf.if (%inBounds) { scf.yield %view : memref, index, index } else { %2 = vector.transfer_read %view[...], %pad : memref, vector<...> %3 = vector.type_cast %extra_alloc : memref<...> to memref> store %2, %3[] : memref> %4 = memref_cast %extra_alloc: memref to memref scf.yield %4 : memref, index, index } %res= vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} ``` where `extra_alloc` is a top of the function alloca'ed buffer of one vector. This rewrite makes it possible to realize the "always full tile" abstraction where vector.transfer_read operations are guaranteed to read from a padded full buffer. The extra work only occurs on the boundary tiles. Differential Revision: https://reviews.llvm.org/D84631 --- .../mlir/Dialect/Vector/VectorTransforms.h | 64 +++++ .../mlir/Interfaces/VectorInterfaces.td | 13 + mlir/lib/Dialect/Vector/VectorTransforms.cpp | 234 ++++++++++++++++++ .../vector-transfer-full-partial-split.mlir | 102 ++++++++ .../lib/Transforms/TestVectorTransforms.cpp | 16 ++ 5 files changed, 429 insertions(+) create mode 100644 mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h index 0d18c5aa782d1..835ad18a79ad2 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h @@ -17,6 +17,11 @@ namespace mlir { class MLIRContext; class OwningRewritePatternList; +class VectorTransferOpInterface; + +namespace scf { +class IfOp; +} // namespace scf /// Collect a set of patterns to convert from the Vector dialect to itself. /// Should be merged with populateVectorToSCFLoweringPattern. @@ -104,6 +109,65 @@ struct UnrollVectorPattern : public OpRewritePattern { FilterConstraintType filter; }; +/// Split a vector.transfer operation into an unmasked fastpath vector.transfer +/// and a slowpath masked vector.transfer. If `ifOp` is not null and the result +/// is `success, the `ifOp` points to the newly created conditional upon +/// function return. To accomodate for the fact that the original +/// vector.transfer indexing may be arbitrary and the slow path indexes @[0...0] +/// in the temporary buffer, the scf.if op returns a view and values of type +/// index. At this time, only vector.transfer_read is implemented. +/// +/// Example (a 2-D vector.transfer_read): +/// ``` +/// %1 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// ``` +/// is transformed into: +/// ``` +/// %1:3 = scf.if (%inBounds) { +/// scf.yield %0 : memref, index, index +/// } else { +/// %2 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// %3 = vector.type_cast %extra_alloc : memref<...> to +/// memref> store %2, %3[] : memref> %4 = +/// memref_cast %extra_alloc: memref to memref scf.yield %4 : +/// memref, index, index +// } +/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} +/// ``` +/// where `extra_alloc` is a top of the function alloca'ed buffer of one vector. +/// +/// Preconditions: +/// 1. `xferOp.permutation_map()` must be a minor identity map +/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` +/// must be equal. This will be relaxed in the future but requires +/// rank-reducing subviews. +LogicalResult +splitFullAndPartialTransferPrecondition(VectorTransferOpInterface xferOp); +LogicalResult splitFullAndPartialTransfer(OpBuilder &b, + VectorTransferOpInterface xferOp, + scf::IfOp *ifOp = nullptr); + +/// Apply `splitFullAndPartialTransfer` selectively via a pattern. This pattern +/// may take an extra filter to perform selection at a finer granularity. +struct VectorTransferFullPartialRewriter : public RewritePattern { + using FilterConstraintType = + std::function; + + explicit VectorTransferFullPartialRewriter( + MLIRContext *context, + FilterConstraintType filter = + [](VectorTransferOpInterface op) { return success(); }, + PatternBenefit benefit = 1) + : RewritePattern(benefit, MatchAnyOpTypeTag()), filter(filter) {} + + /// Performs the rewrite. + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override; + +private: + FilterConstraintType filter; +}; + } // namespace vector //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Interfaces/VectorInterfaces.td b/mlir/include/mlir/Interfaces/VectorInterfaces.td index aefbb7d471172..218715318a867 100644 --- a/mlir/include/mlir/Interfaces/VectorInterfaces.td +++ b/mlir/include/mlir/Interfaces/VectorInterfaces.td @@ -160,6 +160,19 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { /*defaultImplementation=*/ "return $_op.getMemRefType().getRank() - $_op.getTransferRank();" >, + InterfaceMethod< + /*desc=*/[{ Returns true if at least one of the dimensions is masked.}], + /*retTy=*/"bool", + /*methodName=*/"hasMaskedDim", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + for (unsigned idx = 0, e = $_op.getTransferRank(); idx < e; ++idx) + if ($_op.isMaskedDim(idx)) + return true; + return false; + }] + >, InterfaceMethod< /*desc=*/[{ Helper function to account for the fact that `permutationMap` results and diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 197b1c62274b2..573b822503f3a 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -12,9 +12,13 @@ #include +#include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/SCF/EDSC/Intrinsics.h" +#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/EDSC/Intrinsics.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Dialect/Vector/VectorTransforms.h" #include "mlir/Dialect/Vector/VectorUtils.h" @@ -1985,6 +1989,236 @@ Value ContractionOpLowering::lowerReduction(vector::ContractionOp op, } // namespace mlir +static Optional extractConstantIndex(Value v) { + if (auto cstOp = v.getDefiningOp()) + return cstOp.getValue(); + if (auto affineApplyOp = v.getDefiningOp()) + if (affineApplyOp.getAffineMap().isSingleConstant()) + return affineApplyOp.getAffineMap().getSingleConstantResult(); + return None; +} + +// Missing foldings of scf.if make it necessary to perform poor man's folding +// eagerly, especially in the case of unrolling. In the future, this should go +// away once scf.if folds properly. +static Value createScopedFoldedSLE(Value v, Value ub) { + using namespace edsc::op; + auto maybeCstV = extractConstantIndex(v); + auto maybeCstUb = extractConstantIndex(ub); + if (maybeCstV && maybeCstUb && *maybeCstV < *maybeCstUb) + return Value(); + return sle(v, ub); +} + +// Operates under a scoped context to build the condition to ensure that a +// particular VectorTransferOpInterface is unmasked. +static Value createScopedInBoundsCond(VectorTransferOpInterface xferOp) { + assert(xferOp.permutation_map().isMinorIdentity() && + "Expected minor identity map"); + Value inBoundsCond; + xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) { + // Zip over the resulting vector shape and memref indices. + // If the dimension is known to be unmasked, it does not participate in the + // construction of `inBoundsCond`. + if (!xferOp.isMaskedDim(resultIdx)) + return; + int64_t vectorSize = xferOp.getVectorType().getDimSize(resultIdx); + using namespace edsc::op; + using namespace edsc::intrinsics; + // Fold or create the check that `index + vector_size` <= `memref_size`. + Value sum = xferOp.indices()[indicesIdx] + std_constant_index(vectorSize); + Value cond = + createScopedFoldedSLE(sum, std_dim(xferOp.memref(), indicesIdx)); + if (!cond) + return; + // Conjunction over all dims for which we are in-bounds. + inBoundsCond = inBoundsCond ? inBoundsCond && cond : cond; + }); + return inBoundsCond; +} + +LogicalResult mlir::vector::splitFullAndPartialTransferPrecondition( + VectorTransferOpInterface xferOp) { + // TODO: expand support to these 2 cases. + if (!xferOp.permutation_map().isMinorIdentity()) + return failure(); + // TODO: relax this precondition. This will require rank-reducing subviews. + if (xferOp.getMemRefType().getRank() != xferOp.getTransferRank()) + return failure(); + // Must have some masked dimension to be a candidate for splitting. + if (!xferOp.hasMaskedDim()) + return failure(); + // Don't split transfer operations under IfOp, this avoids applying the + // pattern recursively. + // TODO: improve the condition to make it more applicable. + if (xferOp.getParentOfType()) + return failure(); + return success(); +} + +MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { + if (MemRefCastOp::areCastCompatible(aT, bT)) + return aT; + if (aT.getRank() != bT.getRank()) + return MemRefType(); + int64_t aOffset, bOffset; + SmallVector aStrides, bStrides; + if (failed(getStridesAndOffset(aT, aStrides, aOffset)) || + failed(getStridesAndOffset(bT, bStrides, bOffset)) || + aStrides.size() != bStrides.size()) + return MemRefType(); + + ArrayRef aShape = aT.getShape(), bShape = bT.getShape(); + int64_t resOffset; + SmallVector resShape(aT.getRank(), 0), + resStrides(bT.getRank(), 0); + for (int64_t idx = 0, e = aT.getRank(); idx < e; ++idx) { + resShape[idx] = + (aShape[idx] == bShape[idx]) ? aShape[idx] : MemRefType::kDynamicSize; + resStrides[idx] = (aStrides[idx] == bStrides[idx]) + ? aStrides[idx] + : MemRefType::kDynamicStrideOrOffset; + } + resOffset = + (aOffset == bOffset) ? aOffset : MemRefType::kDynamicStrideOrOffset; + return MemRefType::get( + resShape, aT.getElementType(), + makeStridedLinearLayoutMap(resStrides, resOffset, aT.getContext())); +} + +/// Split a vector.transfer operation into an unmasked fastpath vector.transfer +/// and a slowpath masked vector.transfer. If `ifOp` is not null and the result +/// is `success, the `ifOp` points to the newly created conditional upon +/// function return. To accomodate for the fact that the original +/// vector.transfer indexing may be arbitrary and the slow path indexes @[0...0] +/// in the temporary buffer, the scf.if op returns a view and values of type +/// index. At this time, only vector.transfer_read is implemented. +/// +/// Example (a 2-D vector.transfer_read): +/// ``` +/// %1 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// ``` +/// is transformed into: +/// ``` +/// %1:3 = scf.if (%inBounds) { +/// scf.yield %0 : memref, index, index +/// } else { +/// %2 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// %3 = vector.type_cast %extra_alloc : memref<...> to +/// memref> store %2, %3[] : memref> %4 = +/// memref_cast %extra_alloc: memref to memref scf.yield %4 : +/// memref, index, index +// } +/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} +/// ``` +/// where `extra_alloc` is a top of the function alloca'ed buffer of one vector. +/// +/// Preconditions: +/// 1. `xferOp.permutation_map()` must be a minor identity map +/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` +/// must be equal. This will be relaxed in the future but requires +/// rank-reducing subviews. +LogicalResult mlir::vector::splitFullAndPartialTransfer( + OpBuilder &b, VectorTransferOpInterface xferOp, scf::IfOp *ifOp) { + using namespace edsc; + using namespace edsc::intrinsics; + + assert(succeeded(splitFullAndPartialTransferPrecondition(xferOp)) && + "Expected splitFullAndPartialTransferPrecondition to hold"); + auto xferReadOp = dyn_cast(xferOp.getOperation()); + + // TODO: add support for write case. + if (!xferReadOp) + return failure(); + + OpBuilder::InsertionGuard guard(b); + if (xferOp.memref().getDefiningOp()) + b.setInsertionPointAfter(xferOp.memref().getDefiningOp()); + else + b.setInsertionPoint(xferOp); + ScopedContext scope(b, xferOp.getLoc()); + Value inBoundsCond = createScopedInBoundsCond( + cast(xferOp.getOperation())); + if (!inBoundsCond) + return failure(); + + // Top of the function `alloc` for transient storage. + Value alloc; + { + FuncOp funcOp = xferOp.getParentOfType(); + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(&funcOp.getRegion().front()); + auto shape = xferOp.getVectorType().getShape(); + Type elementType = xferOp.getVectorType().getElementType(); + alloc = std_alloca(MemRefType::get(shape, elementType), ValueRange{}, + b.getI64IntegerAttr(32)); + } + + Value memref = xferOp.memref(); + SmallVector bools(xferOp.getTransferRank(), false); + auto unmaskedAttr = b.getBoolArrayAttr(bools); + + MemRefType compatibleMemRefType = getCastCompatibleMemRefType( + xferOp.getMemRefType(), alloc.getType().cast()); + + // Read case: full fill + partial copy -> unmasked vector.xfer_read. + Value zero = std_constant_index(0); + SmallVector returnTypes(1 + xferOp.getTransferRank(), + b.getIndexType()); + returnTypes[0] = compatibleMemRefType; + scf::IfOp fullPartialIfOp; + conditionBuilder( + returnTypes, inBoundsCond, + [&]() -> scf::ValueVector { + Value res = memref; + if (compatibleMemRefType != xferOp.getMemRefType()) + res = std_memref_cast(memref, compatibleMemRefType); + scf::ValueVector viewAndIndices{res}; + viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(), + xferOp.indices().end()); + return viewAndIndices; + }, + [&]() -> scf::ValueVector { + Operation *newXfer = + ScopedContext::getBuilderRef().clone(*xferOp.getOperation()); + Value vector = cast(newXfer).vector(); + std_store(vector, vector_type_cast( + MemRefType::get({}, vector.getType()), alloc)); + + Value casted = std_memref_cast(alloc, compatibleMemRefType); + scf::ValueVector viewAndIndices{casted}; + viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(), + zero); + + return viewAndIndices; + }, + &fullPartialIfOp); + if (ifOp) + *ifOp = fullPartialIfOp; + + // Unmask the existing read op, it always reads from a full buffer. + for (unsigned i = 0, e = returnTypes.size(); i != e; ++i) + xferReadOp.setOperand(i, fullPartialIfOp.getResult(i)); + xferOp.setAttr(vector::TransferReadOp::getMaskedAttrName(), unmaskedAttr); + + return success(); +} + +LogicalResult mlir::vector::VectorTransferFullPartialRewriter::matchAndRewrite( + Operation *op, PatternRewriter &rewriter) const { + auto xferOp = dyn_cast(op); + if (!xferOp || failed(splitFullAndPartialTransferPrecondition(xferOp)) || + failed(filter(xferOp))) + return failure(); + rewriter.startRootUpdate(xferOp); + if (succeeded(splitFullAndPartialTransfer(rewriter, xferOp))) { + rewriter.finalizeRootUpdate(xferOp); + return success(); + } + rewriter.cancelRootUpdate(xferOp); + return failure(); +} + // TODO: Add pattern to rewrite ExtractSlices(ConstantMaskOp). // TODO: Add this as DRR pattern. void mlir::vector::populateVectorToVectorTransformationPatterns( diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir new file mode 100644 index 0000000000000..ef76247ee9d4b --- /dev/null +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -0,0 +1,102 @@ +// RUN: mlir-opt %s -test-vector-transfer-full-partial-split | FileCheck %s + +// CHECK-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)> +// CHECK-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)> +// CHECK-DAG: #[[$map_2d_stride_1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> + +// CHECK-LABEL: split_vector_transfer_read_2d( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index +// CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index +func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: index) -> vector<4x8xf32> { + %c0 = constant 0 : index + %f0 = constant 0.0 : f32 + + // CHECK-DAG: %[[c0:.*]] = constant 0 : index + // CHECK-DAG: %[[c8:.*]] = constant 8 : index + // CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32 + // alloca for boundary full tile + // CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32> + // %i + 4 <= dim(%A, 0) + // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] + // CHECK: %[[d0:.*]] = dim %[[A]], %[[c0]] : memref + // CHECK: %[[cmp0:.*]] = cmpi "sle", %[[idx0]], %[[d0]] : index + // %j + 8 <= dim(%A, 1) + // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] + // CHECK: %[[cmp1:.*]] = cmpi "sle", %[[idx1]], %[[c8]] : index + // are both conds true + // CHECK: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1 + // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref, index, index) { + // inBounds, just yield %A + // CHECK: scf.yield %[[A]], %[[i]], %[[j]] : memref, index, index + // CHECK: } else { + // slow path, fill tmp alloc and yield a memref_casted version of it + // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst : + // CHECK-SAME: memref, vector<4x8xf32> + // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] : + // CHECK-SAME: memref<4x8xf32> to memref> + // CHECK: store %[[slow]], %[[cast_alloc]][] : memref> + // CHECK: %[[yielded:.*]] = memref_cast %[[alloc]] : + // CHECK-SAME: memref<4x8xf32> to memref + // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : + // CHECK-SAME: memref, index, index + // CHECK: } + // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %[[cst]] + // CHECK_SAME: {masked = [false, false]} : memref, vector<4x8xf32> + %1 = vector.transfer_read %A[%i, %j], %f0 : memref, vector<4x8xf32> + + // CHECK: return %[[res]] : vector<4x8xf32> + return %1: vector<4x8xf32> +} + +// CHECK-LABEL: split_vector_transfer_read_strided_2d( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index +// CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index +func @split_vector_transfer_read_strided_2d( + %A: memref<7x8xf32, offset:?, strides:[?, 1]>, + %i: index, %j: index) -> vector<4x8xf32> { + %c0 = constant 0 : index + %f0 = constant 0.0 : f32 + + // CHECK-DAG: %[[c0:.*]] = constant 0 : index + // CHECK-DAG: %[[c7:.*]] = constant 7 : index + // CHECK-DAG: %[[c8:.*]] = constant 8 : index + // CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32 + // alloca for boundary full tile + // CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32> + // %i + 4 <= dim(%A, 0) + // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] + // CHECK: %[[cmp0:.*]] = cmpi "sle", %[[idx0]], %[[c7]] : index + // %j + 8 <= dim(%A, 1) + // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] + // CHECK: %[[cmp1:.*]] = cmpi "sle", %[[idx1]], %[[c8]] : index + // are both conds true + // CHECK: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1 + // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref, index, index) { + // inBounds but not cast-compatible: yield a memref_casted form of %A + // CHECK: %[[casted:.*]] = memref_cast %arg0 : + // CHECK-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]> to memref + // CHECK: scf.yield %[[casted]], %[[i]], %[[j]] : + // CHECK-SAME: memref, index, index + // CHECK: } else { + // slow path, fill tmp alloc and yield a memref_casted version of it + // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst : + // CHECK-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]>, vector<4x8xf32> + // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] : + // CHECK-SAME: memref<4x8xf32> to memref> + // CHECK: store %[[slow]], %[[cast_alloc]][] : + // CHECK-SAME: memref> + // CHECK: %[[yielded:.*]] = memref_cast %[[alloc]] : + // CHECK-SAME: memref<4x8xf32> to memref + // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : + // CHECK-SAME: memref, index, index + // CHECK: } + // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {masked = [false, false]} : + // CHECK-SAME: memref, vector<4x8xf32> + %1 = vector.transfer_read %A[%i, %j], %f0 : + memref<7x8xf32, offset:?, strides:[?, 1]>, vector<4x8xf32> + + // CHECK: return %[[res]] : vector<4x8xf32> + return %1 : vector<4x8xf32> +} diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp index 2058706dcbdd3..0bba74e76385e 100644 --- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -122,6 +122,17 @@ struct TestVectorUnrollingPatterns } }; +struct TestVectorTransferFullPartialSplitPatterns + : public PassWrapper { + void runOnFunction() override { + MLIRContext *ctx = &getContext(); + OwningRewritePatternList patterns; + patterns.insert(ctx); + applyPatternsAndFoldGreedily(getFunction(), patterns); + } +}; + } // end anonymous namespace namespace mlir { @@ -141,5 +152,10 @@ void registerTestVectorConversions() { PassRegistration contractionUnrollingPass( "test-vector-unrolling-patterns", "Test conversion patterns to unroll contract ops in the vector dialect"); + + PassRegistration + vectorTransformFullPartialPass("test-vector-transfer-full-partial-split", + "Test conversion patterns to split " + "transfer ops via scf.if + linalg ops"); } } // namespace mlir From 98db27711d86d4085db4a4a8ff68f8baa1b094ef Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 3 Aug 2020 09:47:16 +0100 Subject: [PATCH 161/600] [LV] Do not check widening decision for instrs outside of loop. No widening decisions will be computed for instructions outside the loop. Do not try to get a widening decision. The load/store will be just a scalar load, so treating at as normal should be fine I think. Fixes PR46950. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D85087 --- .../Transforms/Vectorize/LoopVectorize.cpp | 2 +- .../pr46950-load-cast-context-crash.ll | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5fffcc8cf0f3a..33bd31f6b9833 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6463,7 +6463,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, assert((isa(I) || isa(I)) && "Expected a load or a store!"); - if (VF == 1) + if (VF == 1 || !TheLoop->contains(I)) return TTI::CastContextHint::Normal; switch (getWideningDecision(I, VF)) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll new file mode 100644 index 0000000000000..e357acca3fbf5 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll @@ -0,0 +1,25 @@ +; RUN: opt -loop-vectorize %s -mtriple=arm64-apple-iphoneos -S | FileCheck %s + +; CHECK-LABEL: define void @test( +; CHECK: vector.body + +define void @test(i64* %dst, i32* %src) { +entry: + %l = load i32, i32* %src + br label %loop.ph + +loop.ph: + br label %loop + +loop: + %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ] + %l.cast = sext i32 %l to i64 + %dst.idx = getelementptr i64, i64* %dst, i64 %iv + store i64 %l.cast, i64* %dst.idx + %iv.next = add nuw nsw i64 %iv, 1 + %cmp9.us = icmp ult i64 %iv.next, 20 + br i1 %cmp9.us, label %loop, label %exit + +exit: + ret void +} From 18d4069503e729442158476960a797df963cf293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Sun, 2 Aug 2020 17:44:24 +0200 Subject: [PATCH 162/600] fix lldb test on lib64 systems Differential revision: https://reviews.llvm.org/D85096 --- lldb/unittests/Expression/ClangParserTest.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/unittests/Expression/ClangParserTest.cpp b/lldb/unittests/Expression/ClangParserTest.cpp index 81f9ed839fcc1..4df557475314e 100644 --- a/lldb/unittests/Expression/ClangParserTest.cpp +++ b/lldb/unittests/Expression/ClangParserTest.cpp @@ -11,6 +11,7 @@ #include "Plugins/ExpressionParser/Clang/ClangHost.h" #include "TestingSupport/SubsystemRAII.h" #include "TestingSupport/TestUtilities.h" +#include "lldb/Host/Config.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Utility/FileSpec.h" @@ -36,7 +37,7 @@ static std::string ComputeClangResourceDir(std::string lldb_shlib_path, TEST_F(ClangHostTest, ComputeClangResourceDirectory) { #if !defined(_WIN32) std::string path_to_liblldb = "/foo/bar/lib/"; - std::string path_to_clang_dir = "/foo/bar/lib/clang/" CLANG_VERSION_STRING; + std::string path_to_clang_dir = "/foo/bar/lib" LLDB_LIBDIR_SUFFIX "/clang/" CLANG_VERSION_STRING; #else std::string path_to_liblldb = "C:\\foo\\bar\\lib"; std::string path_to_clang_dir = "C:\\foo\\bar\\lib\\clang\\" CLANG_VERSION_STRING; From 6d47431d7eeed44ae46dd1e58cf5d04e9210c048 Mon Sep 17 00:00:00 2001 From: Julian Gross Date: Fri, 31 Jul 2020 11:42:31 +0200 Subject: [PATCH 163/600] [mlir] Extended Buffer Assignment to support AllocaOps. Added support for AllocaOps in Buffer Assignment. Differential Revision: https://reviews.llvm.org/D85017 --- mlir/lib/Transforms/BufferPlacement.cpp | 15 +- mlir/test/Transforms/buffer-placement.mlir | 192 +++++++++++++++++++++ 2 files changed, 201 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp index 0c24621c36668..66d175dc1f3e8 100644 --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -264,12 +264,15 @@ class BufferPlacement { opInterface.getEffects(effects); SmallVector allocateResultEffects; - llvm::copy_if(effects, std::back_inserter(allocateResultEffects), - [=](MemoryEffects::EffectInstance &it) { - Value value = it.getValue(); - return isa(it.getEffect()) && - value && value.isa(); - }); + llvm::copy_if( + effects, std::back_inserter(allocateResultEffects), + [=](MemoryEffects::EffectInstance &it) { + Value value = it.getValue(); + return isa(it.getEffect()) && value && + value.isa() && + it.getResource() != + SideEffects::AutomaticAllocationScopeResource::get(); + }); // If there is one result only, we will be able to move the allocation and // (possibly existing) deallocation ops. if (allocateResultEffects.size() != 1) diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir index c3bce4ea54583..2ac212c7d681f 100644 --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -933,3 +933,195 @@ func @subview(%arg0 : index, %arg1 : index, %arg2 : memref) { // CHECK-NEXT: linalg.copy // CHECK-NEXT: dealloc %[[ALLOC]] // CHECK-NEXT: return + +// ----- + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @condBranchAlloca +func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + cond_br %arg0, ^bb1, ^bb2 +^bb1: + br ^bb3(%arg1 : memref<2xf32>) +^bb2: + %0 = alloca() : memref<2xf32> + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + br ^bb3(%0 : memref<2xf32>) +^bb3(%1: memref<2xf32>): + "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK-NEXT: cond_br +// CHECK: %[[ALLOCA:.*]] = alloca() +// CHECK: br ^bb3(%[[ALLOCA:.*]]) +// CHECK-NEXT: ^bb3 +// CHECK-NEXT: linalg.copy +// CHECK-NEXT: return + +// ----- + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @ifElseAlloca +func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + cond_br %arg0, + ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), + ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) +^bb1(%1: memref<2xf32>, %2: memref<2xf32>): + br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>) +^bb2(%3: memref<2xf32>, %4: memref<2xf32>): + br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>) +^bb3(%5: memref<2xf32>, %6: memref<2xf32>): + %7 = alloca() : memref<2xf32> + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %5, %7 { + ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: memref<2xf32>, memref<2xf32> + "linalg.copy"(%7, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK-NEXT: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic +// CHECK: %[[ALLOCA:.*]] = alloca() +// CHECK-NEXT: linalg.generic +// CHECK: dealloc %[[ALLOC]] +// CHECK: linalg.copy +// CHECK-NEXT: return + +// ----- + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @ifElseNestedAlloca +func @ifElseNestedAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + %0 = alloca() : memref<2xf32> + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + cond_br %arg0, + ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>), + ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>) +^bb1(%1: memref<2xf32>, %2: memref<2xf32>): + br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>) +^bb2(%3: memref<2xf32>, %4: memref<2xf32>): + cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>) +^bb3(%5: memref<2xf32>): + br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>) +^bb4(%6: memref<2xf32>): + br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>) +^bb5(%7: memref<2xf32>, %8: memref<2xf32>): + %9 = alloc() : memref<2xf32> + linalg.generic { + args_in = 1 : i64, + args_out = 1 : i64, + indexing_maps = [#map0, #map0], + iterator_types = ["parallel"]} %7, %9 { + ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: memref<2xf32>, memref<2xf32> + "linalg.copy"(%9, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK-NEXT: %[[ALLOCA:.*]] = alloca() +// CHECK-NEXT: linalg.generic +// CHECK: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: linalg.generic +// CHECK: linalg.copy +// CHECK-NEXT: dealloc %[[ALLOC]] +// CHECK-NEXT: return + +// ----- + +#map0 = affine_map<(d0) -> (d0)> + +// CHECK-LABEL: func @nestedRegionsAndCondBranchAlloca +func @nestedRegionsAndCondBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + cond_br %arg0, ^bb1, ^bb2 +^bb1: + br ^bb3(%arg1 : memref<2xf32>) +^bb2: + %0 = alloc() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %0 { + ^bb0(%gen1_arg0: f32, %gen1_arg1: f32): + %1 = alloca() : memref<2xf32> + linalg.generic {args_in = 1 : i64, args_out = 1 : i64, indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} %arg1, %1 { + ^bb0(%gen2_arg0: f32, %gen2_arg1: f32): + %tmp2 = exp %gen2_arg0 : f32 + linalg.yield %tmp2 : f32 + }: memref<2xf32>, memref<2xf32> + %tmp1 = exp %gen1_arg0 : f32 + linalg.yield %tmp1 : f32 + }: memref<2xf32>, memref<2xf32> + br ^bb3(%0 : memref<2xf32>) +^bb3(%1: memref<2xf32>): + "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> () + return +} +// CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}}) +// CHECK-NEXT: %[[ALLOC:.*]] = alloc() +// CHECK-NEXT: cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]] +// CHECK: ^[[BB2]]: +// CHECK-NEXT: linalg.generic {{{.*}}} %[[ARG1]], %[[ALLOC]] +// CHECK: %[[ALLOCA:.*]] = alloca() +// CHECK-NEXT: linalg.generic {{{.*}}} %[[ARG1]], %[[ALLOCA]] +// CHECK: %{{.*}} = exp +// CHECK: ^[[BB3:.*]]({{.*}}): +// CHECK: linalg.copy +// CHECK-NEXT: dealloc %[[ALLOC]] + +// ----- + +// CHECK-LABEL: func @nestedRegionControlFlowAlloca +func @nestedRegionControlFlowAlloca( + %arg0 : index, + %arg1 : index) -> memref { + %0 = cmpi "eq", %arg0, %arg1 : index + %1 = alloc(%arg0, %arg0) : memref + %2 = scf.if %0 -> (memref) { + scf.yield %1 : memref + } else { + %3 = alloca(%arg0, %arg1) : memref + scf.yield %1 : memref + } + return %2 : memref +} + +// CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0) +// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if +// CHECK: scf.yield %[[ALLOC0]] +// CHECK: %[[ALLOCA:.*]] = alloca(%arg0, %arg1) +// CHECK-NEXT: scf.yield %[[ALLOC0]] +// CHECK: return %[[ALLOC1]] From d919ae9df8721a56c8457fd5f9cfd50a71c87262 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 23 Jul 2020 15:26:23 +0300 Subject: [PATCH 164/600] [yaml2obj] - Add a support for "" value for all optional fields. It implements an approach suggested in the D84398 thread. With it the following: ``` Sections: - Name: .bar Type: SHT_PROGBITS Offset: [[MACRO=]] ``` works just like the `Offset` key was not specified. It is useful for tests that want to have a default value for a field and to have a way to override it at the same time. Differential revision: https://reviews.llvm.org/D84526 --- llvm/include/llvm/Support/YAMLTraits.h | 53 +++++++++++++------- llvm/test/tools/yaml2obj/ELF/none-value.yaml | 45 +++++++++++++++++ 2 files changed, 80 insertions(+), 18 deletions(-) create mode 100644 llvm/test/tools/yaml2obj/ELF/none-value.yaml diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index 44e34a4a09b46..e52bf7892d711 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -902,24 +902,7 @@ class IO { template void processKeyWithDefault(const char *Key, Optional &Val, const Optional &DefaultValue, bool Required, - Context &Ctx) { - assert(DefaultValue.hasValue() == false && - "Optional shouldn't have a value!"); - void *SaveInfo; - bool UseDefault = true; - const bool sameAsDefault = outputting() && !Val.hasValue(); - if (!outputting() && !Val.hasValue()) - Val = T(); - if (Val.hasValue() && - this->preflightKey(Key, Required, sameAsDefault, UseDefault, - SaveInfo)) { - yamlize(*this, Val.getValue(), Required, Ctx); - this->postflightKey(SaveInfo); - } else { - if (UseDefault) - Val = DefaultValue; - } - } + Context &Ctx); template void processKeyWithDefault(const char *Key, T &Val, const T &DefaultValue, @@ -1625,6 +1608,40 @@ class Output : public IO { StringRef PaddingBeforeContainer; }; +template +void IO::processKeyWithDefault(const char *Key, Optional &Val, + const Optional &DefaultValue, bool Required, + Context &Ctx) { + assert(DefaultValue.hasValue() == false && + "Optional shouldn't have a value!"); + void *SaveInfo; + bool UseDefault = true; + const bool sameAsDefault = outputting() && !Val.hasValue(); + if (!outputting() && !Val.hasValue()) + Val = T(); + if (Val.hasValue() && + this->preflightKey(Key, Required, sameAsDefault, UseDefault, SaveInfo)) { + + // When reading an Optional key from a YAML description, we allow the + // special "" value, which can be used to specify that no value was + // requested, i.e. the DefaultValue will be assigned. The DefaultValue is + // usually None. + bool IsNone = false; + if (!outputting()) + if (auto *Node = dyn_cast(((Input *)this)->getCurrentNode())) + IsNone = Node->getRawValue() == ""; + + if (IsNone) + Val = DefaultValue; + else + yamlize(*this, Val.getValue(), Required, Ctx); + this->postflightKey(SaveInfo); + } else { + if (UseDefault) + Val = DefaultValue; + } +} + /// YAML I/O does conversion based on types. But often native data types /// are just a typedef of built in intergral types (e.g. int). But the C++ /// type matching system sees through the typedef and all the typedefed types diff --git a/llvm/test/tools/yaml2obj/ELF/none-value.yaml b/llvm/test/tools/yaml2obj/ELF/none-value.yaml new file mode 100644 index 0000000000000..786a9b53aba78 --- /dev/null +++ b/llvm/test/tools/yaml2obj/ELF/none-value.yaml @@ -0,0 +1,45 @@ +## We have a special "" value for all keys that are implemented +## as Optional<> in the code. Setting a key to "" means no-op and +## works in the same way as when a field was not specified at all. + +## Test a few keys for which the "" value is supported. +## We do not test all possible keys, because it would be too verbose. +## It reasonable to test all keys for a section, because normally many +## of them would conflict or intersect when specified together. +# RUN: yaml2obj %s --docnum=1 -o %t-none +# RUN: yaml2obj %s --docnum=2 -o %t-base +# RUN: cmp %t-none %t-base + +## We do not use the TEST macro. It exists to +## demonstrate the expected use case for the word. +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .bar + Type: SHT_PROGBITS + Offset: [[TEST=]] + Address: [[TEST=]] + Content: [[TEST=]] + Size: [[TEST=]] + ContentArray: [[TEST=]] + Info: [[TEST=]] + EntSize: [[TEST=]] + ShName: [[TEST=]] + ShOffset: [[TEST=]] + ShSize: [[TEST=]] + ShFlags: [[TEST=]] + +## The same document, but all fields that were set to are removed. +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .bar + Type: SHT_PROGBITS From d76057c1fe6a368e9e422cf586e09fba827c96e1 Mon Sep 17 00:00:00 2001 From: Ilya Golovenko Date: Mon, 3 Aug 2020 11:34:14 +0200 Subject: [PATCH 165/600] Add document outline symbols from unnamed contexts, e.g. extern "C". It is necessary to traverse children of unnamed declaration contexts to get symbols which are currently missing in document outline, e.g.: extern "C" { void foo(); } Reviewed By: kadircet Differential Revision: https://reviews.llvm.org/D84839 --- clang-tools-extra/clangd/FindSymbols.cpp | 31 ++++++++++++----- .../clangd/unittests/FindSymbolsTests.cpp | 34 +++++++++++++++++++ 2 files changed, 56 insertions(+), 9 deletions(-) diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index f5d6a95aa713d..2471656988250 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -188,7 +188,7 @@ class DocumentOutline { } private: - enum class VisitKind { No, OnlyDecl, DeclAndChildren }; + enum class VisitKind { No, OnlyDecl, OnlyChildren, DeclAndChildren }; void traverseDecl(Decl *D, std::vector &Results) { if (auto *Templ = llvm::dyn_cast(D)) { @@ -196,18 +196,25 @@ class DocumentOutline { if (auto *TD = Templ->getTemplatedDecl()) D = TD; } - auto *ND = llvm::dyn_cast(D); - if (!ND) - return; - VisitKind Visit = shouldVisit(ND); + + VisitKind Visit = shouldVisit(D); if (Visit == VisitKind::No) return; - llvm::Optional Sym = declToSym(AST.getASTContext(), *ND); + + if (Visit == VisitKind::OnlyChildren) + return traverseChildren(D, Results); + + auto *ND = llvm::cast(D); + auto Sym = declToSym(AST.getASTContext(), *ND); if (!Sym) return; - if (Visit == VisitKind::DeclAndChildren) - traverseChildren(D, Sym->children); Results.push_back(std::move(*Sym)); + + if (Visit == VisitKind::OnlyDecl) + return; + + assert(Visit == VisitKind::DeclAndChildren && "Unexpected VisitKind"); + traverseChildren(ND, Results.back().children); } void traverseChildren(Decl *D, std::vector &Results) { @@ -218,10 +225,16 @@ class DocumentOutline { traverseDecl(C, Results); } - VisitKind shouldVisit(NamedDecl *D) { + VisitKind shouldVisit(Decl *D) { if (D->isImplicit()) return VisitKind::No; + if (llvm::isa(D) || llvm::isa(D)) + return VisitKind::OnlyChildren; + + if (!llvm::isa(D)) + return VisitKind::No; + if (auto Func = llvm::dyn_cast(D)) { // Some functions are implicit template instantiations, those should be // ignored. diff --git a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp index 07c42fcf20304..8576e11a5f21a 100644 --- a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp @@ -429,6 +429,40 @@ TEST(DocumentSymbols, ExternSymbol) { EXPECT_THAT(getSymbols(TU.build()), IsEmpty()); } +TEST(DocumentSymbols, ExternContext) { + TestTU TU; + TU.Code = R"cpp( + extern "C" { + void foo(); + class Foo {}; + } + namespace ns { + extern "C" { + void bar(); + class Bar {}; + } + })cpp"; + + EXPECT_THAT(getSymbols(TU.build()), + ElementsAre(WithName("foo"), WithName("Foo"), + AllOf(WithName("ns"), + Children(WithName("bar"), WithName("Bar"))))); +} + +TEST(DocumentSymbols, ExportContext) { + TestTU TU; + TU.ExtraArgs = {"-std=c++20"}; + TU.Code = R"cpp( + export module test; + export { + void foo(); + class Foo {}; + })cpp"; + + EXPECT_THAT(getSymbols(TU.build()), + ElementsAre(WithName("foo"), WithName("Foo"))); +} + TEST(DocumentSymbols, NoLocals) { TestTU TU; TU.Code = R"cpp( From 7e32797552c69155676e45ec7d39f948779daa92 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 27 Jul 2020 16:03:03 +0300 Subject: [PATCH 166/600] [llvm-readobj] - Don't call `unwrapOrErr` in `findSectionByName`. We have a `findSectionByName` helper that tries to find a section by it name. It is used in a few places, but never tested. I'd like to reuse this helper for a different place. For this, I've changed it to return Expected<> and now it doesn't use `unwrapOrErr` anymore. It also now a member of Dumper class and might report warnings. Differential revision: https://reviews.llvm.org/D84651 --- .../tools/llvm-readobj/ELF/mips-abiflags.test | 28 ++++- .../test/tools/llvm-readobj/ELF/mips-got.test | 32 ++++++ .../llvm-readobj/ELF/mips-options-sec.test | 36 +++++++ .../tools/llvm-readobj/ELF/mips-reginfo.test | 35 ++++++ llvm/tools/llvm-readobj/ELFDumper.cpp | 101 +++++++++++------- 5 files changed, 193 insertions(+), 39 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-abiflags.test b/llvm/test/tools/llvm-readobj/ELF/mips-abiflags.test index f4432efb6aeba..791e3e9cc6e69 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-abiflags.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-abiflags.test @@ -338,7 +338,7 @@ Sections: [] # RUN: llvm-readelf -A %t.err1 2>&1 | FileCheck %s -DFILE=%t.err1 --check-prefix=CONTENT-ERR # RUN: llvm-readobj -A %t.err1 2>&1 | FileCheck %s -DFILE=%t.err1 --check-prefix=CONTENT-ERR -# CONTENT-ERR: warning: '[[FILE]]': unable to read the .MIPS.abiflags section: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x18) that is greater than the file size (0x240) +# CONTENT-ERR: warning: '[[FILE]]': unable to read the .MIPS.abiflags section: section [index 2] has a sh_offset (0xffffffff) + sh_size (0x18) that is greater than the file size (0x2c0) # CONTENT-ERR-NEXT: There is no .MIPS.options section in the file. # CONTENT-ERR-NEXT: There is no .reginfo section in the file. @@ -349,12 +349,17 @@ FileHeader: Type: ET_REL Machine: EM_MIPS Sections: + - Type: SHT_PROGBITS + ShName: [[NAME=]] - Name: .MIPS.abiflags Type: SHT_MIPS_ABIFLAGS ISA: MIPS32 Offset: 0x100 ShOffset: [[SHOFFSET=0x100]] ShSize: [[SHSIZE=24]] + ShName: [[ABIFLAGSNAME=]] + - Type: SHT_PROGBITS + ShName: [[NAME=]] ## Check we report a warning when the .MIPS.abiflags section has an unexpected size. # RUN: yaml2obj --docnum=3 -DSHSIZE=23 %s -o %t.err2 @@ -364,3 +369,24 @@ Sections: # SIZE-ERR: warning: '[[FILE]]': unable to read the .MIPS.abiflags section: it has a wrong size (23) # SIZE-ERR-NEXT: There is no .MIPS.options section in the file. # SIZE-ERR-NEXT: There is no .reginfo section in the file. + +## Check that we try to dump the .MIPS.abiflags section when we are able to locate it by name. +# RUN: yaml2obj --docnum=3 -DNAME=0xffff %s -o %t.err3 +# RUN: llvm-readelf -A %t.err3 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err3 --check-prefixes=NAME-ERR-FOUND,NAME-ERR-FOUND-GNU --implicit-check-not=warning: +# RUN: llvm-readobj -A %t.err3 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err3 --check-prefixes=NAME-ERR-FOUND,NAME-ERR-FOUND-LLVM --implicit-check-not=warning: + +# NAME-ERR-FOUND: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 1: a section [index 1] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table +# NAME-ERR-FOUND-GNU-NEXT: MIPS ABI Flags Version: 0 +# NAME-ERR-FOUND-LLVM-NEXT: MIPS ABI Flags { +# NAME-ERR-FOUND: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 3: a section [index 3] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table + +## Check we report a warning when we are unable to find the .MIPS.abiflags section due to an error. +# RUN: yaml2obj --docnum=3 -DABIFLAGSNAME=0xffff %s -o %t.err4 +# RUN: llvm-readelf -A %t.err4 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err4 --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: --implicit-check-not="MIPS ABI Flags" +# RUN: llvm-readobj -A %t.err4 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err4 --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: --implicit-check-not="MIPS ABI Flags" + +# NAME-ERR-NOTFOUND: warning: '[[FILE]]': unable to read the name of SHT_MIPS_ABIFLAGS section with index 2: a section [index 2] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-got.test b/llvm/test/tools/llvm-readobj/ELF/mips-got.test index cfbf1c4f37a3e..7475a6d57d578 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-got.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-got.test @@ -547,3 +547,35 @@ Sections: - Tag: DT_PLTGOT Value: [[VAL2=0]] DynamicSymbols: [] + +## Check that we do not report a warning about the .got section when we are able to locate it by name. +# RUN: yaml2obj --docnum=3 -DNAME=0xffff %s -o %t.err6.o +# RUN: llvm-readobj -A %t.err6.o 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err6.o -check-prefix=NAME-ERR-FOUND --implicit-check-not=warning: +# RUN: llvm-readelf -A %t.err6.o 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err6.o -check-prefix=NAME-ERR-FOUND --implicit-check-not=warning: + +# NAME-ERR-FOUND: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 1: a section [index 1] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table +# NAME-ERR-FOUND-NEXT: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 3: a section [index 3] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_MIPS +Sections: + - Type: SHT_PROGBITS + ShName: [[NAME=]] + - Name: .got + Type: SHT_PROGBITS + ShName: [[GOTNAME=]] + - Type: SHT_PROGBITS + ShName: [[NAME=]] + +## Check we report a warning when we are unable to find the .got section due to an error. +# RUN: yaml2obj --docnum=3 -DGOTNAME=0xffff %s -o %t.err7.o +# RUN: llvm-readelf -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: +# RUN: llvm-readobj -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: + +# NAME-ERR-NOTFOUND: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 2: a section [index 2] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-options-sec.test b/llvm/test/tools/llvm-readobj/ELF/mips-options-sec.test index f53f04c55e9bf..ef5ece472821e 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-options-sec.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-options-sec.test @@ -12,3 +12,39 @@ # CHECK-NEXT: Co-Proc Mask3: 0x0 # CHECK-NEXT: } # CHECK-NEXT: } + +## Check that we try to dump the .MIPS.options section when we are able to locate it by name. +# RUN: yaml2obj --docnum=1 -DNAME=0xffff %s -o %t.err1 +# RUN: llvm-readelf -A %t.err1 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err1 --check-prefix=NAME-ERR-FOUND --implicit-check-not=warning: +# RUN: llvm-readobj -A %t.err1 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err1 --check-prefix=NAME-ERR-FOUND --implicit-check-not=warning: + +# NAME-ERR-FOUND: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 1: a section [index 1] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table +# NAME-ERR-FOUND-NEXT: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 3: a section [index 3] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table +# NAME-ERR-FOUND: MIPS Options { +# NAME-ERR-FOUND-NEXT: } + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_MIPS +Sections: + - Type: SHT_PROGBITS + ShName: [[NAME=]] + - Name: .MIPS.options + Type: SHT_MIPS_OPTIONS + ShName: [[OPTNAME=]] + - Type: SHT_PROGBITS + ShName: [[NAME=]] + +## Check we report a warning when we are unable to find the .MIPS.options section due to an error. +# RUN: yaml2obj --docnum=1 -DOPTNAME=0xffff %s -o %t.err2 +# RUN: llvm-readelf -A %t.err2 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err2 --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: --implicit-check-not="MIPS Options" +# RUN: llvm-readobj -A %t.err2 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err2 --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: --implicit-check-not="MIPS Options" + +# NAME-ERR-NOTFOUND: warning: '[[FILE]]': unable to read the name of SHT_MIPS_OPTIONS section with index 2: a section [index 2] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-reginfo.test b/llvm/test/tools/llvm-readobj/ELF/mips-reginfo.test index 0074631843cd0..9f200e9d95259 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-reginfo.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-reginfo.test @@ -10,3 +10,38 @@ # CHECK-NEXT: Co-Proc Mask2: 0x0 # CHECK-NEXT: Co-Proc Mask3: 0x0 # CHECK-NEXT: } + +## Check that we try to dump the .reginfo section when we are able to locate it by name. +# RUN: yaml2obj --docnum=1 -DNAME=0xffff %s -o %t.err1 +# RUN: llvm-readelf -A %t.err1 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err1 --check-prefix=NAME-ERR-FOUND --implicit-check-not=warning: +# RUN: llvm-readobj -A %t.err1 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err1 --check-prefix=NAME-ERR-FOUND --implicit-check-not=warning: + +# NAME-ERR-FOUND: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 1: a section [index 1] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table +# NAME-ERR-FOUND-NEXT: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 3: a section [index 3] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table +# NAME-ERR-FOUND: The .reginfo section has a wrong size. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_MIPS +Sections: + - Type: SHT_PROGBITS + ShName: [[NAME=]] + - Name: .reginfo + Type: SHT_MIPS_REGINFO + ShName: [[REGINFONAME=]] + - Type: SHT_PROGBITS + ShName: [[NAME=]] + +## Check we report a warning when we are unable to find the .reginfo section due to an error. +# RUN: yaml2obj --docnum=1 -DREGINFONAME=0xffff %s -o %t.err2 +# RUN: llvm-readelf -A %t.err2 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err2 --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: +# RUN: llvm-readobj -A %t.err2 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.err2 --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: + +# NAME-ERR-NOTFOUND: warning: '[[FILE]]': unable to read the name of SHT_MIPS_REGINFO section with index 2: a section [index 2] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 0e9551edfb1b7..35b5e2637b4d4 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -353,6 +353,8 @@ template class ELFDumper : public ObjDumper { void printSymbolsHelper(bool IsDynamic) const; std::string getDynamicEntry(uint64_t Type, uint64_t Value) const; + Expected findSectionByName(StringRef Name) const; + const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; } const Elf_Shdr *getDotCGProfileSec() const { return DotCGProfileSec; } const Elf_Shdr *getDotAddrsigSec() const { return DotAddrsigSec; } @@ -1286,15 +1288,6 @@ findNotEmptySectionByAddress(const ELFO *Obj, StringRef FileName, return nullptr; } -template -static const typename ELFO::Elf_Shdr * -findSectionByName(const ELFO &Obj, StringRef FileName, StringRef Name) { - for (const typename ELFO::Elf_Shdr &Shdr : cantFail(Obj.sections())) - if (Name == unwrapOrError(FileName, Obj.getSectionName(&Shdr))) - return &Shdr; - return nullptr; -} - static const EnumEntry ElfClass[] = { {"None", "none", ELF::ELFCLASSNONE}, {"32-bit", "ELF32", ELF::ELFCLASS32}, @@ -2457,6 +2450,23 @@ void printFlags(T Value, ArrayRef> Flags, raw_ostream &OS) { } } +template +Expected +ELFDumper::findSectionByName(StringRef Name) const { + const ELFFile *Obj = ObjF->getELFFile(); + for (const Elf_Shdr &Shdr : cantFail(Obj->sections())) { + if (Expected NameOrErr = Obj->getSectionName(&Shdr)) { + if (*NameOrErr == Name) + return &Shdr; + } else { + reportUniqueWarning(createError("unable to read the name of " + + describe(Shdr) + ": " + + toString(NameOrErr.takeError()))); + } + } + return nullptr; +} + template std::string ELFDumper::getDynamicEntry(uint64_t Type, uint64_t Value) const { @@ -2864,9 +2874,7 @@ template void ELFDumper::printArchSpecificInfo() { ELFDumperStyle->printMipsABIFlags(ObjF); printMipsOptions(); printMipsReginfo(); - - MipsGOTParser Parser(Obj, ObjF->getFileName(), dynamic_table(), - dynamic_symbols()); + MipsGOTParser Parser(*this); if (Error E = Parser.findGOT(dynamic_table(), dynamic_symbols())) reportError(std::move(E), ObjF->getFileName()); else if (!Parser.isGotEmpty()) @@ -2933,9 +2941,9 @@ template class MipsGOTParser { const bool IsStatic; const ELFO * const Obj; + const ELFDumper &Dumper; - MipsGOTParser(const ELFO *Obj, StringRef FileName, Elf_Dyn_Range DynTable, - Elf_Sym_Range DynSyms); + MipsGOTParser(const ELFDumper &D); Error findGOT(Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms); Error findPLT(Elf_Dyn_Range DynTable); @@ -2983,12 +2991,11 @@ template class MipsGOTParser { } // end anonymous namespace template -MipsGOTParser::MipsGOTParser(const ELFO *Obj, StringRef FileName, - Elf_Dyn_Range DynTable, - Elf_Sym_Range DynSyms) - : IsStatic(DynTable.empty()), Obj(Obj), GotSec(nullptr), LocalNum(0), - GlobalNum(0), PltSec(nullptr), PltRelSec(nullptr), PltSymTable(nullptr), - FileName(FileName) {} +MipsGOTParser::MipsGOTParser(const ELFDumper &D) + : IsStatic(D.dynamic_table().empty()), Obj(D.getElfObject()->getELFFile()), + Dumper(D), GotSec(nullptr), LocalNum(0), GlobalNum(0), PltSec(nullptr), + PltRelSec(nullptr), PltSymTable(nullptr), + FileName(D.getElfObject()->getFileName()) {} template Error MipsGOTParser::findGOT(Elf_Dyn_Range DynTable, @@ -2999,7 +3006,12 @@ Error MipsGOTParser::findGOT(Elf_Dyn_Range DynTable, // Find static GOT secton. if (IsStatic) { - GotSec = findSectionByName(*Obj, FileName, ".got"); + Expected GotOrErr = Dumper.findSectionByName(".got"); + if (!GotOrErr) + return GotOrErr.takeError(); + else + GotSec = *GotOrErr; + if (!GotSec) return Error::success(); @@ -3318,13 +3330,19 @@ static void printMipsReginfoData(ScopedPrinter &W, template void ELFDumper::printMipsReginfo() { const ELFFile *Obj = ObjF->getELFFile(); - const Elf_Shdr *Shdr = findSectionByName(*Obj, ObjF->getFileName(), ".reginfo"); - if (!Shdr) { + Expected RegInfoOrErr = findSectionByName(".reginfo"); + if (!RegInfoOrErr) { + reportUniqueWarning(RegInfoOrErr.takeError()); + return; + } + + if ((*RegInfoOrErr) == nullptr) { W.startLine() << "There is no .reginfo section in the file.\n"; return; } - ArrayRef Sec = - unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); + + ArrayRef Sec = unwrapOrError(ObjF->getFileName(), + Obj->getSectionContents(*RegInfoOrErr)); if (Sec.size() != sizeof(Elf_Mips_RegInfo)) { W.startLine() << "The .reginfo section has a wrong size.\n"; return; @@ -3337,17 +3355,21 @@ template void ELFDumper::printMipsReginfo() { template void ELFDumper::printMipsOptions() { const ELFFile *Obj = ObjF->getELFFile(); - const Elf_Shdr *Shdr = - findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.options"); - if (!Shdr) { + Expected MipsOptOrErr = findSectionByName(".MIPS.options"); + if (!MipsOptOrErr) { + reportUniqueWarning(MipsOptOrErr.takeError()); + return; + } + + if ((*MipsOptOrErr) == nullptr) { W.startLine() << "There is no .MIPS.options section in the file.\n"; return; } DictScope GS(W, "MIPS Options"); - ArrayRef Sec = - unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(Shdr)); + ArrayRef Sec = unwrapOrError(ObjF->getFileName(), + Obj->getSectionContents(*MipsOptOrErr)); while (!Sec.empty()) { if (Sec.size() < sizeof(Elf_Mips_Options)) { W.startLine() << "The .MIPS.options section has a wrong size.\n"; @@ -5959,15 +5981,18 @@ void GNUStyle::printMipsPLT(const MipsGOTParser &Parser) { template Expected *> -getMipsAbiFlagsSection(const ELFObjectFile *ObjF) { - const ELFFile *Obj = ObjF->getELFFile(); - const typename ELFT::Shdr *Shdr = - findSectionByName(*Obj, ObjF->getFileName(), ".MIPS.abiflags"); - if (!Shdr) +getMipsAbiFlagsSection(const ELFObjectFile *ObjF, + const ELFDumper &Dumper) { + Expected SecOrErr = + Dumper.findSectionByName(".MIPS.abiflags"); + if (!SecOrErr) + return SecOrErr.takeError(); + if (*SecOrErr == nullptr) return nullptr; + const ELFFile *Obj = ObjF->getELFFile(); constexpr StringRef ErrPrefix = "unable to read the .MIPS.abiflags section: "; - Expected> DataOrErr = Obj->getSectionContents(Shdr); + Expected> DataOrErr = Obj->getSectionContents(*SecOrErr); if (!DataOrErr) return createError(ErrPrefix + toString(DataOrErr.takeError())); @@ -5981,7 +6006,7 @@ template void GNUStyle::printMipsABIFlags(const ELFObjectFile *ObjF) { const Elf_Mips_ABIFlags *Flags = nullptr; if (Expected *> SecOrErr = - getMipsAbiFlagsSection(ObjF)) + getMipsAbiFlagsSection(ObjF, *this->dumper())) Flags = *SecOrErr; else this->reportUniqueWarning(SecOrErr.takeError()); @@ -6926,7 +6951,7 @@ template void LLVMStyle::printMipsABIFlags(const ELFObjectFile *ObjF) { const Elf_Mips_ABIFlags *Flags; if (Expected *> SecOrErr = - getMipsAbiFlagsSection(ObjF)) { + getMipsAbiFlagsSection(ObjF, *this->dumper())) { Flags = *SecOrErr; if (!Flags) { W.startLine() << "There is no .MIPS.abiflags section in the file.\n"; From 87de54dbb6efa0fc5e304f94b350a39066bc2759 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Sat, 1 Aug 2020 18:55:30 +0200 Subject: [PATCH 167/600] [clang][Tooling] Fix addTargetAndModeForProgramName to use correct flag names The logic was using incorrect flag versions. For example: - `-target=` can't be a prefix, it must be `--target=`. - `--driver-mode` can't appear on its own, value must be attached to it. While fixing those, also changes the append logic to make use of new `--target=X` format instead of the legacy `-target X` version. In addition to that makes use of the OPTTable instead of hardcoded strings to make sure helper also gets updated if clang's options are modified. Differential Revision: https://reviews.llvm.org/D85076 --- clang/lib/Tooling/Tooling.cpp | 52 +++++++++++++++---------- clang/unittests/Tooling/ToolingTest.cpp | 16 +++----- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index 40b6cff0d627a..0593f0cc1d195 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -245,27 +245,37 @@ std::string getAbsolutePath(StringRef File) { void addTargetAndModeForProgramName(std::vector &CommandLine, StringRef InvokedAs) { - if (!CommandLine.empty() && !InvokedAs.empty()) { - bool AlreadyHasTarget = false; - bool AlreadyHasMode = false; - // Skip CommandLine[0]. - for (auto Token = ++CommandLine.begin(); Token != CommandLine.end(); - ++Token) { - StringRef TokenRef(*Token); - AlreadyHasTarget |= - (TokenRef == "-target" || TokenRef.startswith("-target=")); - AlreadyHasMode |= (TokenRef == "--driver-mode" || - TokenRef.startswith("--driver-mode=")); - } - auto TargetMode = - driver::ToolChain::getTargetAndModeFromProgramName(InvokedAs); - if (!AlreadyHasMode && TargetMode.DriverMode) { - CommandLine.insert(++CommandLine.begin(), TargetMode.DriverMode); - } - if (!AlreadyHasTarget && TargetMode.TargetIsValid) { - CommandLine.insert(++CommandLine.begin(), {"-target", - TargetMode.TargetPrefix}); - } + if (CommandLine.empty() || InvokedAs.empty()) + return; + const auto &Table = driver::getDriverOptTable(); + // --target=X + const std::string TargetOPT = + Table.getOption(driver::options::OPT_target).getPrefixedName(); + // -target X + const std::string TargetOPTLegacy = + Table.getOption(driver::options::OPT_target_legacy_spelling) + .getPrefixedName(); + // --driver-mode=X + const std::string DriverModeOPT = + Table.getOption(driver::options::OPT_driver_mode).getPrefixedName(); + bool AlreadyHasTarget = false; + bool AlreadyHasMode = false; + // Skip CommandLine[0]. + for (auto Token = ++CommandLine.begin(); Token != CommandLine.end(); + ++Token) { + StringRef TokenRef(*Token); + AlreadyHasTarget |= + TokenRef.startswith(TargetOPT) || TokenRef.equals(TargetOPTLegacy); + AlreadyHasMode |= TokenRef.startswith(DriverModeOPT); + } + auto TargetMode = + driver::ToolChain::getTargetAndModeFromProgramName(InvokedAs); + if (!AlreadyHasMode && TargetMode.DriverMode) { + CommandLine.insert(++CommandLine.begin(), TargetMode.DriverMode); + } + if (!AlreadyHasTarget && TargetMode.TargetIsValid) { + CommandLine.insert(++CommandLine.begin(), + TargetOPT + TargetMode.TargetPrefix); } } diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp index 5bd2864b5ba1e..cc6f453284d71 100644 --- a/clang/unittests/Tooling/ToolingTest.cpp +++ b/clang/unittests/Tooling/ToolingTest.cpp @@ -621,7 +621,7 @@ TEST(addTargetAndModeForProgramName, AddsTargetAndMode) { addTargetAndModeForProgramName(Args, ""); EXPECT_EQ((std::vector{"clang", "-foo"}), Args); addTargetAndModeForProgramName(Args, Target + "-g++"); - EXPECT_EQ((std::vector{"clang", "-target", Target, + EXPECT_EQ((std::vector{"clang", "--target=" + Target, "--driver-mode=g++", "-foo"}), Args); } @@ -635,7 +635,7 @@ TEST(addTargetAndModeForProgramName, PathIgnored) { std::vector Args = {"clang", "-foo"}; addTargetAndModeForProgramName(Args, ToolPath); - EXPECT_EQ((std::vector{"clang", "-target", Target, + EXPECT_EQ((std::vector{"clang", "--target=" + Target, "--driver-mode=g++", "-foo"}), Args); } @@ -650,10 +650,10 @@ TEST(addTargetAndModeForProgramName, IgnoresExistingTarget) { "-target", "something"}), Args); - std::vector ArgsAlt = {"clang", "-foo", "-target=something"}; + std::vector ArgsAlt = {"clang", "-foo", "--target=something"}; addTargetAndModeForProgramName(ArgsAlt, Target + "-g++"); EXPECT_EQ((std::vector{"clang", "--driver-mode=g++", "-foo", - "-target=something"}), + "--target=something"}), ArgsAlt); } @@ -663,15 +663,9 @@ TEST(addTargetAndModeForProgramName, IgnoresExistingMode) { std::vector Args = {"clang", "-foo", "--driver-mode=abc"}; addTargetAndModeForProgramName(Args, Target + "-g++"); - EXPECT_EQ((std::vector{"clang", "-target", Target, "-foo", + EXPECT_EQ((std::vector{"clang", "--target=" + Target, "-foo", "--driver-mode=abc"}), Args); - - std::vector ArgsAlt = {"clang", "-foo", "--driver-mode", "abc"}; - addTargetAndModeForProgramName(ArgsAlt, Target + "-g++"); - EXPECT_EQ((std::vector{"clang", "-target", Target, "-foo", - "--driver-mode", "abc"}), - ArgsAlt); } #ifndef _WIN32 From 895878f4568d3c5fe470bd811c2dfdbbe285e5aa Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 3 Aug 2020 10:04:01 +0100 Subject: [PATCH 168/600] [asan][tsan] Mark tests failing with debug checks as XFAIL See https://llvm.org/PR46862. This does not fix the underlying issue but at least it allows me to run check-all again without having to disable building compiler-rt. Reviewed By: #sanitizers, vitalybuka Differential Revision: https://reviews.llvm.org/D84650 --- compiler-rt/test/asan/TestCases/Linux/activation-options.cpp | 2 ++ compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp | 2 ++ compiler-rt/test/asan/TestCases/Linux/odr-vtable.cpp | 3 +++ compiler-rt/test/asan/TestCases/Posix/start-deactivated.cpp | 2 ++ compiler-rt/test/asan/TestCases/handle_noreturn_bug.cpp | 2 ++ .../test/sanitizer_common/TestCases/Linux/allow_user_segv.cpp | 2 ++ .../test/sanitizer_common/TestCases/Linux/signal_line.cpp | 2 ++ .../TestCases/Posix/dedup_token_length_test.cpp | 2 ++ .../sanitizer_common/TestCases/Posix/illegal_read_test.cpp | 2 ++ .../sanitizer_common/TestCases/Posix/illegal_write_test.cpp | 2 ++ .../TestCases/Posix/sanitizer_set_report_fd_test.cpp | 2 ++ 11 files changed, 23 insertions(+) diff --git a/compiler-rt/test/asan/TestCases/Linux/activation-options.cpp b/compiler-rt/test/asan/TestCases/Linux/activation-options.cpp index 0074a6699d39e..59c4d5c46e771 100644 --- a/compiler-rt/test/asan/TestCases/Linux/activation-options.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/activation-options.cpp @@ -1,6 +1,8 @@ // Test for ASAN_OPTIONS=start_deactivated=1 mode. // Main executable is uninstrumented, but linked to ASan runtime. The shared // library is instrumented. +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46862 +// XFAIL: !compiler-rt-optimized // RUN: %clangxx_asan -O0 -DSHARED_LIB %s -fPIC -shared -o %t-so.so // RUN: %clangxx -O0 %s -c -o %t.o diff --git a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp index 9cdafca91b1f3..a14b59b67f114 100644 --- a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp @@ -1,5 +1,7 @@ // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316 // XFAIL: android +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46862 +// XFAIL: !compiler-rt-optimized // // We use fast_unwind_on_malloc=0 to have full unwinding even w/o frame // pointers. This setting is not on by default because it's too expensive. diff --git a/compiler-rt/test/asan/TestCases/Linux/odr-vtable.cpp b/compiler-rt/test/asan/TestCases/Linux/odr-vtable.cpp index fdbab4bb1fd82..eb0805784df1a 100644 --- a/compiler-rt/test/asan/TestCases/Linux/odr-vtable.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/odr-vtable.cpp @@ -1,3 +1,6 @@ +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46862 +// XFAIL: !compiler-rt-optimized + // RUN: %clangxx_asan -fno-rtti -DBUILD_SO1 -fPIC -shared %s -o %dynamiclib1 // RUN: %clangxx_asan -fno-rtti -DBUILD_SO2 -fPIC -shared %s -o %dynamiclib2 // RUN: %clangxx_asan -fno-rtti %s %ld_flags_rpath_exe1 %ld_flags_rpath_exe2 -o %t diff --git a/compiler-rt/test/asan/TestCases/Posix/start-deactivated.cpp b/compiler-rt/test/asan/TestCases/Posix/start-deactivated.cpp index 9c674ac57e46a..c4f5f8c74a024 100644 --- a/compiler-rt/test/asan/TestCases/Posix/start-deactivated.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/start-deactivated.cpp @@ -1,6 +1,8 @@ // Test for ASAN_OPTIONS=start_deactivated=1 mode. // Main executable is uninstrumented, but linked to ASan runtime. The shared // library is instrumented. Memory errors before dlopen are not detected. +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46862 +// XFAIL: !compiler-rt-optimized // RUN: %clangxx_asan -O0 -DSHARED_LIB %s -std=c++11 -fPIC -shared -o %t-so.so // RUN: %clangxx -O0 %s -std=c++11 -c -o %t.o diff --git a/compiler-rt/test/asan/TestCases/handle_noreturn_bug.cpp b/compiler-rt/test/asan/TestCases/handle_noreturn_bug.cpp index 8c3c66a423d50..1639ad7d0859b 100644 --- a/compiler-rt/test/asan/TestCases/handle_noreturn_bug.cpp +++ b/compiler-rt/test/asan/TestCases/handle_noreturn_bug.cpp @@ -1,4 +1,6 @@ // Regression test: __asan_handle_no_return should unpoison stack even with poison_heap=0. +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46862 +// XFAIL: !compiler-rt-optimized // RUN: %clangxx_asan -O0 %s -o %t && \ // RUN: %env_asan_opts=poison_heap=1 %run %t && \ // RUN: %env_asan_opts=poison_heap=0 %run %t diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/allow_user_segv.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/allow_user_segv.cpp index bd58f4bd92654..03ddf06ae2d41 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/allow_user_segv.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/allow_user_segv.cpp @@ -1,5 +1,7 @@ // Regression test for // https://code.google.com/p/address-sanitizer/issues/detail?id=180 +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46860 +// XFAIL: !compiler-rt-optimized && tsan // RUN: %clangxx -O0 %s -o %t diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_line.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_line.cpp index 1c9ea982b95b2..208ece3e05af4 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_line.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_line.cpp @@ -1,4 +1,6 @@ // Test line numbers in signal handlers +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46860 +// XFAIL: !compiler-rt-optimized && tsan // RUN: %clangxx %s -o %t -O0 // RUN: %env_tool_opts=handle_segv=1:print_stacktrace=1 not %run %t 1 2>&1 | FileCheck --check-prefixes=CHECK1,CHECK %s diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cpp index 94c50be169b48..1630003e519bf 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/dedup_token_length_test.cpp @@ -1,4 +1,6 @@ // Test dedup_token_length +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46860 +// XFAIL: !compiler-rt-optimized && tsan // RUN: %clangxx -O0 %s -o %t // RUN: env %tool_options='abort_on_error=0' not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK0 --match-full-lines // RUN: env %tool_options='abort_on_error=0, dedup_token_length=0' not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK0 --match-full-lines diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/illegal_read_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/illegal_read_test.cpp index 9615d7132da5e..2addb9bd47508 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Posix/illegal_read_test.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/illegal_read_test.cpp @@ -1,4 +1,6 @@ // Test that there was an illegal READ memory access. +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46860 +// XFAIL: !compiler-rt-optimized && tsan // RUN: %clangxx -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s // REQUIRES: stable-runtime diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/illegal_write_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/illegal_write_test.cpp index 13d1c6a06905a..531b4d294b8fb 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Posix/illegal_write_test.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/illegal_write_test.cpp @@ -1,4 +1,6 @@ // Test that there was an illegal WRITE memory access. +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46860 +// XFAIL: !compiler-rt-optimized && tsan // RUN: %clangxx -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s // REQUIRES: stable-runtime diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cpp index cc7de193f0a41..6ba7025bf7578 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/sanitizer_set_report_fd_test.cpp @@ -1,4 +1,6 @@ // Test __sanitizer_set_report_fd: +// Fails with debug checks: https://bugs.llvm.org/show_bug.cgi?id=46860 +// XFAIL: !compiler-rt-optimized && tsan // RUN: %clangxx -O2 %s -o %t // RUN: not %run %t 2>&1 | FileCheck %s // RUN: not %run %t stdout | FileCheck %s From 03affa8099cba3876aca209daa213dcb880bc34a Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 3 Aug 2020 10:31:31 +0100 Subject: [PATCH 169/600] [msan] Compile the libatomic.c test with a C compiler Otherwise we end up compiling in C++ mode and on FreeBSD /usr/include/stdatomic.h is not compatible with C++ since it uses _Bool. Reviewed By: guiand, eugenis, vitalybuka, emaste Differential Revision: https://reviews.llvm.org/D84510 --- compiler-rt/test/msan/libatomic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/msan/libatomic.c b/compiler-rt/test/msan/libatomic.c index 5d8aa9ab6a60f..a8c030b7dbb26 100644 --- a/compiler-rt/test/msan/libatomic.c +++ b/compiler-rt/test/msan/libatomic.c @@ -1,6 +1,6 @@ -// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -latomic -DTEST_STORE -O0 %s -o %t && %run %t 2>&1 -// RUN: %clangxx_msan -fsanitize-memory-track-origins=0 -latomic -DTEST_LOAD -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK -// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -latomic -DTEST_LOAD -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SHADOW +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -latomic -DTEST_STORE -O0 %s -o %t && %run %t 2>&1 +// RUN: %clang_msan -fsanitize-memory-track-origins=0 -latomic -DTEST_LOAD -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK +// RUN: %clang_msan -fsanitize-memory-track-origins=2 -latomic -DTEST_LOAD -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SHADOW // PPC has no libatomic // UNSUPPORTED: powerpc64-target-arch From d23b15cc160775dcc5b6e2c2d7706a67be1225f1 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 3 Aug 2020 10:51:14 +0100 Subject: [PATCH 170/600] Execute llvm-lit with the python found by CMake by default The check-* targets run ${Python3_EXECUTABLE} $BUILD/bin/llvm-lit, but running `./bin/llvm-lit $ARGS` from the build directory currently always uses "python" to run llvm-lit. On most systems this will be python2.7 even if we found python3 at CMake time. Reviewed By: compnerd Differential Revision: https://reviews.llvm.org/D84625 --- llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn | 1 + llvm/utils/llvm-lit/llvm-lit.in | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn index 526b93b4f0b99..648646e4188d6 100644 --- a/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/utils/llvm-lit/BUILD.gn @@ -86,6 +86,7 @@ write_cmake_config("llvm-lit") { values = [ "LLVM_SOURCE_DIR=" + rebase_path("//llvm", dir), + "Python3_EXECUTABLE=$python_path", "BUILD_MODE=.", "LLVM_LIT_CONFIG_MAP=" + config_map, ] diff --git a/llvm/utils/llvm-lit/llvm-lit.in b/llvm/utils/llvm-lit/llvm-lit.in index bfa55c6abfac1..33ec8017cf05f 100755 --- a/llvm/utils/llvm-lit/llvm-lit.in +++ b/llvm/utils/llvm-lit/llvm-lit.in @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!@Python3_EXECUTABLE@ # -*- coding: utf-8 -*- import os From c3339e3e921c2e064936c30f5eacba4e41c432da Mon Sep 17 00:00:00 2001 From: Tatyana Krasnukha Date: Fri, 17 Jul 2020 15:58:28 +0300 Subject: [PATCH 171/600] [cmake] Make MSVC generate appropriate __cplusplus macro definition Differential Revision: https://reviews.llvm.org/D84023 --- llvm/cmake/modules/AddLLVM.cmake | 7 ------- llvm/cmake/modules/HandleLLVMOptions.cmake | 6 ++++++ llvm/utils/unittest/CMakeLists.txt | 5 ----- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index a25f3e87af61c..5b5be953767b5 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -1405,13 +1405,6 @@ function(add_unittest test_suite test_name) set(EXCLUDE_FROM_ALL ON) endif() - # Our current version of gtest does not properly recognize C++11 support - # with MSVC, so it falls back to tr1 / experimental classes. Since LLVM - # itself requires C++11, we can safely force it on unconditionally so that - # we don't have to fight with the buggy gtest check. - add_definitions(-DGTEST_LANG_CXX11=1) - add_definitions(-DGTEST_HAS_TR1_TUPLE=0) - include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include) if (NOT LLVM_ENABLE_THREADS) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 0df9334363b43..4feb4b7a7f300 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -421,6 +421,12 @@ if( MSVC ) append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + # Some projects use the __cplusplus preprocessor macro to check support for + # a particular version of the C++ standard. When this option is not specified + # explicitly, macro's value is "199711L" that implies C++98 Standard. + # https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ + append("/Zc:__cplusplus" CMAKE_CXX_FLAGS) + # Allow users to request PDBs in release mode. CMake offeres the # RelWithDebInfo configuration, but it uses different optimization settings # (/Ob1 vs /Ob2 or -O2 vs -O3). LLVM provides this flag so that users can get diff --git a/llvm/utils/unittest/CMakeLists.txt b/llvm/utils/unittest/CMakeLists.txt index bcae36fa150d1..9127ebce7b4c3 100644 --- a/llvm/utils/unittest/CMakeLists.txt +++ b/llvm/utils/unittest/CMakeLists.txt @@ -19,11 +19,6 @@ include_directories( googlemock ) -# LLVM requires C++11 but gtest doesn't correctly detect the availability -# of C++11 on MSVC, so we force it on. -add_definitions(-DGTEST_LANG_CXX11=1) -add_definitions(-DGTEST_HAS_TR1_TUPLE=0) - if(WIN32) add_definitions(-DGTEST_OS_WINDOWS=1) endif() From e97c693bb0ece2d9a2b0db75034927405fe3bfdf Mon Sep 17 00:00:00 2001 From: Tatyana Krasnukha Date: Thu, 30 Jul 2020 20:13:23 +0300 Subject: [PATCH 172/600] [lldb/Process/Windows] Attempting to kill exited/detached process in not an error The lldb test-suite on Windows reports a 'CLEANUP ERROR' when attempting to kill an exited/detached process. This change makes ProcessWindows consistent with the other processes which only log the error. After this change a number of 'CLEANUP ERROR' messages are now removed. Differential Revision: https://reviews.llvm.org/D84957 --- .../Windows/Common/ProcessDebugger.cpp | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp index 8a85c8ba6f4e4..07a81cdf69ccd 100644 --- a/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp @@ -227,22 +227,20 @@ Status ProcessDebugger::DestroyProcess(const lldb::StateType state) { debugger_thread = m_session_data->m_debugger; } - Status error; - if (state != eStateExited && state != eStateDetached) { - LLDB_LOG( - log, "Shutting down process {0}.", - debugger_thread->GetProcess().GetNativeProcess().GetSystemHandle()); - error = debugger_thread->StopDebugging(true); - - // By the time StopDebugging returns, there is no more debugger thread, so - // we can be assured that no other thread will race for the session data. - m_session_data.reset(); - } else { - error.SetErrorStringWithFormat("cannot destroy process %" PRIx64 - " while state = %d", - GetDebuggedProcessId(), state); - LLDB_LOG(log, "error: {0}", error); + if (state == eStateExited || state == eStateDetached) { + LLDB_LOG(log, "warning: cannot destroy process {0} while state = {1}.", + GetDebuggedProcessId(), state); + return Status(); } + + LLDB_LOG(log, "Shutting down process {0}.", + debugger_thread->GetProcess().GetNativeProcess().GetSystemHandle()); + auto error = debugger_thread->StopDebugging(true); + + // By the time StopDebugging returns, there is no more debugger thread, so + // we can be assured that no other thread will race for the session data. + m_session_data.reset(); + return error; } From d1007478f19d3ff19a2ecd5ecb04b467933041e6 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 3 Aug 2020 11:18:01 +0100 Subject: [PATCH 173/600] Fix update_cc_test_checks.py --llvm-bin after D78478 Not passing --clang would result in a python exception after this change: (TypeError: expected str, bytes or os.PathLike object, not NoneType) because the --clang argument default was only being populated in the initial argument parsing pass but not later on. Fix this by adding an argparse callback to set the default values. Reviewed By: vitalybuka, MaskRay Differential Revision: https://reviews.llvm.org/D84511 --- llvm/utils/UpdateTestChecks/common.py | 17 +++++++++++------ llvm/utils/update_cc_test_checks.py | 27 ++++++++++++++++----------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 17f738601f61b..35b7ba648d36d 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -44,8 +44,9 @@ def __init__(self, line, line_number, args, argv): class TestInfo(object): def __init__(self, test, parser, script_name, input_lines, args, argv, - comment_prefix): + comment_prefix, argparse_callback): self.parser = parser + self.argparse_callback = argparse_callback self.path = test self.args = args self.argv = argv @@ -68,14 +69,14 @@ def iterlines(self, output_lines): if input_line.startswith(self.autogenerated_note_prefix): continue self.args, self.argv = check_for_command(input_line, self.parser, - self.args, self.argv) + self.args, self.argv, self.argparse_callback) if not self.args.enabled: output_lines.append(input_line) continue yield InputLineInfo(input_line, line_num, self.args, self.argv) -def itertests(test_patterns, parser, script_name, comment_prefix=None): +def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None): for pattern in test_patterns: # On Windows we must expand the patterns ourselves. tests_list = glob.glob(pattern) @@ -86,19 +87,21 @@ def itertests(test_patterns, parser, script_name, comment_prefix=None): with open(test) as f: input_lines = [l.rstrip() for l in f] args = parser.parse_args() + if argparse_callback is not None: + argparse_callback(args) argv = sys.argv[:] first_line = input_lines[0] if input_lines else "" if UTC_ADVERT in first_line: if script_name not in first_line and not args.force_update: warn("Skipping test which wasn't autogenerated by " + script_name, test) continue - args, argv = check_for_command(first_line, parser, args, argv) + args, argv = check_for_command(first_line, parser, args, argv, argparse_callback) elif args.update_only: assert UTC_ADVERT not in first_line warn("Skipping test which isn't autogenerated: " + test) continue yield TestInfo(test, parser, script_name, input_lines, args, argv, - comment_prefix) + comment_prefix, argparse_callback) def should_add_line_to_output(input_line, prefix_set): @@ -510,10 +513,12 @@ def get_autogennote_suffix(parser, args): return autogenerated_note_args -def check_for_command(line, parser, args, argv): +def check_for_command(line, parser, args, argv, argparse_callback): cmd_m = UTC_ARGS_CMD.match(line) if cmd_m: cmd = cmd_m.group('cmd').strip().split(' ') argv = argv + cmd args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv)) + if argparse_callback is not None: + argparse_callback(args) return args, argv diff --git a/llvm/utils/update_cc_test_checks.py b/llvm/utils/update_cc_test_checks.py index fa7b4fa73b98c..ba8a68b8669f9 100755 --- a/llvm/utils/update_cc_test_checks.py +++ b/llvm/utils/update_cc_test_checks.py @@ -112,6 +112,20 @@ def str_to_commandline(value): return [] return shlex.split(value) + +def infer_dependent_args(args): + if not args.clang: + if not args.llvm_bin: + args.clang = 'clang' + else: + args.clang = os.path.join(args.llvm_bin, 'clang') + if not args.opt: + if not args.llvm_bin: + args.opt = 'opt' + else: + args.opt = os.path.join(args.llvm_bin, 'opt') + + def config(): parser = argparse.ArgumentParser( description=__doc__, @@ -135,12 +149,8 @@ def config(): help='Check "Function Attributes" for functions') parser.add_argument('tests', nargs='+') args = common.parse_commandline_args(parser) + infer_dependent_args(args) - if args.clang is None: - if args.llvm_bin is None: - args.clang = 'clang' - else: - args.clang = os.path.join(args.llvm_bin, 'clang') if not distutils.spawn.find_executable(args.clang): print('Please specify --llvm-bin or --clang', file=sys.stderr) sys.exit(1) @@ -157,11 +167,6 @@ def config(): common.warn('Could not determine clang builtins directory, some tests ' 'might not update correctly.') - if args.opt is None: - if args.llvm_bin is None: - args.opt = 'opt' - else: - args.opt = os.path.join(args.llvm_bin, 'opt') if not distutils.spawn.find_executable(args.opt): # Many uses of this tool will not need an opt binary, because it's only # needed for updating a test that runs clang | opt | FileCheck. So we @@ -203,7 +208,7 @@ def main(): script_name = os.path.basename(__file__) for ti in common.itertests(initial_args.tests, parser, 'utils/' + script_name, - comment_prefix='//'): + comment_prefix='//', argparse_callback=infer_dependent_args): # Build a list of clang command lines and check prefixes from RUN lines. run_list = [] line2spell_and_mangled_list = collections.defaultdict(list) From 2bca784ab840a9fe0f4e508cece9cbcce95838bb Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 30 Jul 2020 17:55:47 +0300 Subject: [PATCH 174/600] [llvm-readobj] - Massive test cases cleanup. This patch does the following: 1) Starts using YAML macro to reduce the number of YAML documents in tests. 2) Adds `#` before 'RUN'/`CHECK` lines in a few tests where it is missing. 3) Removes unused YAML keys. 4) Starts using `ENTSIZE=` to simplify tests (see D84526). 5) Removes trailing white spaces in a few places. Differential revision: https://reviews.llvm.org/D85013 --- .../ELF/amdgpu-elf-definitions.test | 20 +- .../ELF/broken-dynamic-reloc.test | 246 ++---------------- .../llvm-readobj/ELF/broken-dynsym-link.test | 18 +- .../llvm-readobj/ELF/call-graph-profile.test | 3 +- .../llvm-readobj/ELF/dependent-libraries.test | 25 +- .../tools/llvm-readobj/ELF/dyn-symbols.test | 51 ++-- .../ELF/dynamic-reloc-no-section-headers.test | 6 +- .../llvm-readobj/ELF/dynamic-table-dtnull.s | 50 ++-- .../tools/llvm-readobj/ELF/dynamic-tags.test | 204 ++------------- .../ELF/file-header-abi-version.test | 38 +-- .../llvm-readobj/ELF/file-header-os-abi.test | 184 ++----------- .../tools/llvm-readobj/ELF/file-types.test | 76 +----- .../tools/llvm-readobj/ELF/gnu-notes.test | 4 +- .../tools/llvm-readobj/ELF/gnu-phdrs.test | 5 +- .../ELF/gnu-section-mapping-no-phdrs.test | 2 +- .../llvm-readobj/ELF/gnu-section-mapping.test | 2 +- .../tools/llvm-readobj/ELF/gnu-sections.test | 93 +++---- .../tools/llvm-readobj/ELF/hidden-versym.test | 52 ++-- llvm/test/tools/llvm-readobj/ELF/merged.test | 22 +- .../llvm-readobj/ELF/mips-got-overlapped.test | 76 +++--- .../tools/llvm-readobj/ELF/mips-options.test | 40 +-- .../llvm-readobj/ELF/mips-rld-map-rel.test | 44 ++-- .../tools/llvm-readobj/ELF/needed-libs.test | 2 +- .../tools/llvm-readobj/ELF/no-action.test | 22 +- .../tools/llvm-readobj/ELF/packed-relocs.test | 90 +++---- .../llvm-readobj/ELF/pt-gnu-property.test | 2 + .../ELF/reloc-symbol-with-versioning.test | 149 +++++------ .../ELF/reloc-types-elf-aarch64.test | 10 +- .../llvm-readobj/ELF/reloc-types-elf-arm.test | 11 +- .../ELF/reloc-types-elf-i386.test | 23 +- .../ELF/reloc-types-elf-lanai.test | 12 +- .../ELF/reloc-types-elf-mips.test | 11 +- .../ELF/reloc-types-elf-mips64.test | 11 +- .../llvm-readobj/ELF/reloc-types-elf-x64.test | 14 +- .../test/tools/llvm-readobj/ELF/sections.test | 25 +- llvm/test/tools/llvm-readobj/ELF/types.test | 68 +---- .../llvm-readobj/ELF/verdef-invalid.test | 35 +-- .../llvm-readobj/ELF/versym-invalid.test | 34 +-- 38 files changed, 463 insertions(+), 1317 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-definitions.test b/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-definitions.test index cb0e6213cafd1..19627378094e3 100644 --- a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-definitions.test +++ b/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-definitions.test @@ -1,11 +1,11 @@ -RUN: llvm-readobj --file-headers -l -S --symbols %p/Inputs/trivial.obj.elf-amdhsa-gfx803 | FileCheck %s +# RUN: llvm-readobj --file-headers -l -S --symbols %p/Inputs/trivial.obj.elf-amdhsa-gfx803 | FileCheck %s -CHECK: Format: elf64-amdgpu -CHECK: Arch: unknown -CHECK: ElfHeader { -CHECK: Ident { -CHECK: OS/ABI: AMDGPU_HSA (0x40) -CHECK: ABIVersion: 0 -CHECK: } -CHECK: Machine: EM_AMDGPU (0xE0) -CHECK: } +# CHECK: Format: elf64-amdgpu +# CHECK: Arch: unknown +# CHECK: ElfHeader { +# CHECK: Ident { +# CHECK: OS/ABI: AMDGPU_HSA (0x40) +# CHECK: ABIVersion: 0 +# CHECK: } +# CHECK: Machine: EM_AMDGPU (0xE0) +# CHECK: } diff --git a/llvm/test/tools/llvm-readobj/ELF/broken-dynamic-reloc.test b/llvm/test/tools/llvm-readobj/ELF/broken-dynamic-reloc.test index 9142fc65a025f..4a079c29b83ec 100644 --- a/llvm/test/tools/llvm-readobj/ELF/broken-dynamic-reloc.test +++ b/llvm/test/tools/llvm-readobj/ELF/broken-dynamic-reloc.test @@ -50,7 +50,7 @@ ProgramHeaders: - Section: .dynamic ## Show we print a warning for an invalid relocation table size stored in a DT_RELASZ entry. -# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: yaml2obj --docnum=2 -DRELTYPE=RELA -DTAG1=DT_RELASZ -DTAG1VAL=0xFF -DTAG2=DT_RELAENT %s -o %t2 # RUN: llvm-readobj --dyn-relocations %t2 2>&1 | FileCheck %s -DFILE=%t2 --check-prefix=INVALID-DT-RELASZ # RUN: llvm-readelf --dyn-relocations %t2 2>&1 | FileCheck %s -DFILE=%t2 --check-prefix=INVALID-DT-RELASZ @@ -63,277 +63,73 @@ FileHeader: Type: ET_DYN Machine: EM_X86_64 Sections: - - Name: .rela.dyn - Type: SHT_RELA - Relocations: - - Type: R_X86_64_NONE + - Name: .relx.dyn + Type: SHT_[[RELTYPE]] - Name: .dynamic Type: SHT_DYNAMIC Entries: - - Tag: DT_RELA + - Tag: DT_[[RELTYPE]] Value: 0x0 - - Tag: DT_RELASZ - Value: 0xFF - - Tag: DT_RELAENT - Value: 0x18 + - Tag: [[TAG1]] + Value: [[TAG1VAL=0x18]] + - Tag: [[TAG2]] + Value: [[TAG2VAL=0x18]] - Tag: DT_NULL Value: 0x0 DynamicSymbols: [] ProgramHeaders: - Type: PT_LOAD Sections: - - Section: .rela.dyn + - Section: .relx.dyn - Section: .dynamic ## Show we print a warning for an invalid relocation table entry size stored in a DT_RELAENT entry. -# RUN: yaml2obj --docnum=3 %s -o %t3 +# RUN: yaml2obj --docnum=2 -DRELTYPE=RELA -DTAG1=DT_RELASZ -DTAG2=DT_RELAENT -DTAG2VAL=0xFF %s -o %t3 # RUN: llvm-readobj --dyn-relocations %t3 2>&1 | FileCheck %s -DFILE=%t3 --check-prefix=INVALID-DT-RELAENT # RUN: llvm-readelf --dyn-relocations %t3 2>&1 | FileCheck %s -DFILE=%t3 --check-prefix=INVALID-DT-RELAENT ## INVALID-DT-RELAENT: warning: '[[FILE]]': invalid DT_RELASZ value (0x18) or DT_RELAENT value (0xff) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .rela.dyn - Type: SHT_RELA - Relocations: - - Type: R_X86_64_NONE - - Name: .dynamic - Type: SHT_DYNAMIC - Entries: - - Tag: DT_RELA - Value: 0x0 - - Tag: DT_RELASZ - Value: 0x18 - - Tag: DT_RELAENT - Value: 0xFF - - Tag: DT_NULL - Value: 0x0 -DynamicSymbols: [] -ProgramHeaders: - - Type: PT_LOAD - Sections: - - Section: .rela.dyn - - Section: .dynamic - ## Show we print a warning for an invalid relocation table size stored in a DT_RELSZ entry. -# RUN: yaml2obj --docnum=4 %s -o %t4 +# RUN: yaml2obj --docnum=2 -DRELTYPE=REL -DTAG1=DT_RELSZ -DTAG1VAL=0xFF -DTAG2=DT_RELENT %s -o %t4 # RUN: llvm-readobj --dyn-relocations %t4 2>&1 | FileCheck %s -DFILE=%t4 --check-prefix=INVALID-DT-RELSZ # RUN: llvm-readelf --dyn-relocations %t4 2>&1 | FileCheck %s -DFILE=%t4 --check-prefix=INVALID-DT-RELSZ ## INVALID-DT-RELSZ: warning: '[[FILE]]': invalid DT_RELSZ value (0xff) or DT_RELENT value (0x18) ---- !ELF -FileHeader: - Class: ELFCLASS32 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_386 -Sections: - - Name: .rela.dyn - Type: SHT_REL - Relocations: - - Type: R_386_NONE - - Name: .dynamic - Type: SHT_DYNAMIC - Entries: - - Tag: DT_REL - Value: 0x0 - - Tag: DT_RELSZ - Value: 0xFF - - Tag: DT_RELENT - Value: 0x18 - - Tag: DT_NULL - Value: 0x0 -DynamicSymbols: [] -ProgramHeaders: - - Type: PT_LOAD - Sections: - - Section: .rela.dyn - - Section: .dynamic - ## Show we print a warning for an invalid relocation table entry size stored in a DT_RELENT entry. -# RUN: yaml2obj --docnum=5 %s -o %t5 +# RUN: yaml2obj --docnum=2 -DRELTYPE=REL -DTAG1=DT_RELSZ -DTAG2=DT_RELENT -DTAG2VAL=0xFF %s -o %t5 # RUN: llvm-readobj --dyn-relocations %t5 2>&1 | FileCheck %s -DFILE=%t5 --check-prefix=INVALID-DT-RELENT # RUN: llvm-readelf --dyn-relocations %t5 2>&1 | FileCheck %s -DFILE=%t5 --check-prefix=INVALID-DT-RELENT ## INVALID-DT-RELENT: warning: '[[FILE]]': invalid DT_RELSZ value (0x18) or DT_RELENT value (0xff) ---- !ELF -FileHeader: - Class: ELFCLASS32 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_386 -Sections: - - Name: .rela.dyn - Type: SHT_REL - Relocations: - - Type: R_386_NONE - - Name: .dynamic - Type: SHT_DYNAMIC - Entries: - - Tag: DT_REL - Value: 0x0 - - Tag: DT_RELSZ - Value: 0x18 - - Tag: DT_RELENT - Value: 0xFF - - Tag: DT_NULL - Value: 0x0 -DynamicSymbols: [] -ProgramHeaders: - - Type: PT_LOAD - Sections: - - Section: .rela.dyn - - Section: .dynamic - ## Show we print a warning for an invalid relocation table size stored in a DT_RELRSZ/DT_ANDROID_RELRSZ entry. -# RUN: yaml2obj --docnum=6 %s -o %t6 +# RUN: yaml2obj --docnum=2 -DRELTYPE=RELR -DTAG1=DT_RELRSZ -DTAG1VAL=0xFF -DTAG2=DT_RELRENT %s -o %t6 # RUN: llvm-readobj --dyn-relocations %t6 2>&1 | FileCheck %s -DFILE=%t6 --check-prefix=INVALID-DT-RELRSZ # RUN: llvm-readelf --dyn-relocations %t6 2>&1 | FileCheck %s -DFILE=%t6 --check-prefix=INVALID-DT-RELRSZ -# RUN: yaml2obj --docnum=7 %s -o %t7 + +# RUN: yaml2obj --docnum=2 -DRELTYPE=RELR -DTAG1=DT_ANDROID_RELRSZ -DTAG1VAL=0xFF -DTAG2=DT_ANDROID_RELRENT %s -o %t7 # RUN: llvm-readobj --dyn-relocations %t7 2>&1 | FileCheck %s -DFILE=%t7 --check-prefix=INVALID-DT-ANDROID-RELRSZ # RUN: llvm-readelf --dyn-relocations %t7 2>&1 | FileCheck %s -DFILE=%t7 --check-prefix=INVALID-DT-ANDROID-RELRSZ ## INVALID-DT-RELRSZ: warning: '[[FILE]]': invalid DT_RELRSZ value (0xff) or DT_RELRENT value (0x18) ## INVALID-DT-ANDROID-RELRSZ: warning: '[[FILE]]': invalid DT_ANDROID_RELRSZ value (0xff) or DT_ANDROID_RELRENT value (0x18) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .relr.dyn - Type: SHT_RELR - Flags: [ SHF_ALLOC ] - Content: "" - - Name: .dynamic - Type: SHT_DYNAMIC - Entries: - - Tag: DT_RELR - Value: 0x0 - - Tag: DT_RELRSZ - Value: 0xFF - - Tag: DT_RELRENT - Value: 0x18 - - Tag: DT_NULL - Value: 0x0 -DynamicSymbols: [] -ProgramHeaders: - - Type: PT_LOAD - Sections: - - Section: .relr.dyn - - Section: .dynamic - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .relr.dyn - Type: SHT_RELR - Flags: [ SHF_ALLOC ] - Content: "" - - Name: .dynamic - Type: SHT_DYNAMIC - Entries: - - Tag: DT_RELR - Value: 0x0 - - Tag: DT_ANDROID_RELRSZ - Value: 0xFF - - Tag: DT_ANDROID_RELRENT - Value: 0x18 - - Tag: DT_NULL - Value: 0x0 -DynamicSymbols: [] -ProgramHeaders: - - Type: PT_LOAD - Sections: - - Section: .relr.dyn - - Section: .dynamic - ## Show we print a warning for an invalid relocation table entry size stored in a DT_RELRENT/DT_ANDROID_RELRENT entry. -# RUN: yaml2obj --docnum=8 %s -o %t8 +# RUN: yaml2obj --docnum=2 -DRELTYPE=RELR -DTAG1=DT_RELRSZ -DTAG2=DT_RELRENT -DTAG2VAL=0xFF %s -o %t8 # RUN: llvm-readobj --dyn-relocations %t8 2>&1 | FileCheck %s -DFILE=%t8 --check-prefix=INVALID-DT-RELRENT # RUN: llvm-readelf --dyn-relocations %t8 2>&1 | FileCheck %s -DFILE=%t8 --check-prefix=INVALID-DT-RELRENT -# RUN: yaml2obj --docnum=9 %s -o %t9 +# RUN: yaml2obj --docnum=2 -DRELTYPE=RELR -DTAG1=DT_ANDROID_RELRSZ -DTAG2=DT_ANDROID_RELRENT -DTAG2VAL=0xFF %s -o %t9 # RUN: llvm-readobj --dyn-relocations %t9 2>&1 | FileCheck %s -DFILE=%t9 --check-prefix=INVALID-DT-ANDROID-RELRENT # RUN: llvm-readelf --dyn-relocations %t9 2>&1 | FileCheck %s -DFILE=%t9 --check-prefix=INVALID-DT-ANDROID-RELRENT ## INVALID-DT-RELRENT: invalid DT_RELRSZ value (0x18) or DT_RELRENT value (0xff) ## INVALID-DT-ANDROID-RELRENT: invalid DT_ANDROID_RELRSZ value (0x18) or DT_ANDROID_RELRENT value (0xff) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .relr.dyn - Type: SHT_RELR - Flags: [ SHF_ALLOC ] - Content: "" - - Name: .dynamic - Type: SHT_DYNAMIC - Entries: - - Tag: DT_RELR - Value: 0x0 - - Tag: DT_RELRSZ - Value: 0x18 - - Tag: DT_RELRENT - Value: 0xFF - - Tag: DT_NULL - Value: 0x0 -DynamicSymbols: [] -ProgramHeaders: - - Type: PT_LOAD - Sections: - - Section: .relr.dyn - - Section: .dynamic - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .relr.dyn - Type: SHT_RELR - Flags: [ SHF_ALLOC ] - Content: "" - - Name: .dynamic - Type: SHT_DYNAMIC - Entries: - - Tag: DT_RELR - Value: 0x0 - - Tag: DT_ANDROID_RELRSZ - Value: 0x18 - - Tag: DT_ANDROID_RELRENT - Value: 0xFF - - Tag: DT_NULL - Value: 0x0 -DynamicSymbols: [] -ProgramHeaders: - - Type: PT_LOAD - Sections: - - Section: .relr.dyn - - Section: .dynamic - ## Show we print a warning for an invalid value of DT_PLTRELSZ, which describes the total size ## of the relocation entries associated with the procedure linkage table. -# RUN: yaml2obj --docnum=10 %s -o %t10 +# RUN: yaml2obj --docnum=3 %s -o %t10 # RUN: llvm-readobj --dyn-relocations %t10 2>&1 | FileCheck %s -DFILE=%t10 --check-prefix=INVALID-DT-PLTRELSZ # RUN: llvm-readelf --dyn-relocations %t10 2>&1 | FileCheck %s -DFILE=%t10 --check-prefix=INVALID-DT-PLTRELSZ @@ -369,7 +165,7 @@ ProgramHeaders: - Section: .dynamic ## Show we print a warning when dumping dynamic relocations if there is no dynamic symbol table. -# RUN: yaml2obj --docnum=11 %s -o %t11 +# RUN: yaml2obj --docnum=4 %s -o %t11 # RUN: llvm-readobj --dyn-relocations %t11 2>&1 | FileCheck %s -DFILE=%t11 --check-prefix=LLVM-NO-DYNSYM # RUN: llvm-readelf --dyn-relocations %t11 2>&1 | FileCheck %s -DFILE=%t11 --check-prefix=GNU-NO-DYNSYM @@ -419,7 +215,7 @@ ProgramHeaders: ## Show we print a warning when the symbol index of a dynamic relocation is too ## large (goes past the end of the dynamic symbol table). -# RUN: yaml2obj --docnum=12 %s -o %t12 +# RUN: yaml2obj --docnum=5 %s -o %t12 # RUN: llvm-readobj --dyn-relocations %t12 2>&1 | FileCheck %s -DFILE=%t12 --check-prefix=LLVM-INVALID-DYNSYM # RUN: llvm-readelf --dyn-relocations %t12 2>&1 | FileCheck %s -DFILE=%t12 --check-prefix=GNU-INVALID-DYNSYM @@ -465,7 +261,7 @@ ProgramHeaders: - Section: .dynamic ## Show that when we have both REL and RELA relocations, we dump both sets. -# RUN: yaml2obj --docnum=13 %s -o %t13 +# RUN: yaml2obj --docnum=6 %s -o %t13 # RUN: llvm-readobj --dyn-relocations %t13 2>&1 | FileCheck %s -DFILE=%t13 --check-prefix=BOTH-RELA-REL-LLVM # RUN: llvm-readelf --dyn-relocations %t13 2>&1 | FileCheck %s -DFILE=%t13 --check-prefix=BOTH-RELA-REL-GNU diff --git a/llvm/test/tools/llvm-readobj/ELF/broken-dynsym-link.test b/llvm/test/tools/llvm-readobj/ELF/broken-dynsym-link.test index d64b1dfb11a87..a685c7eccc1f4 100644 --- a/llvm/test/tools/llvm-readobj/ELF/broken-dynsym-link.test +++ b/llvm/test/tools/llvm-readobj/ELF/broken-dynsym-link.test @@ -2,7 +2,7 @@ ## .dynsym section's sh_link field is broken. ## Case 1: sh_link is set to 0. -# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: yaml2obj --docnum=1 -DLINK=0 %s -o %t1 # RUN: llvm-readobj -S %t1 2>&1 | FileCheck %s -DFILE=%t1 --check-prefixes=LLVM,ERR # RUN: llvm-readelf -S %t1 2>&1 | FileCheck %s -DFILE=%t1 --check-prefixes=GNU,ERR @@ -32,25 +32,15 @@ FileHeader: Sections: - Name: .dynsym Type: SHT_DYNSYM + Link: [[LINK]] ## Case 2: sh_link is set to 255, which is larger than the number of the sections. -# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: yaml2obj --docnum=1 -DLINK=255 %s -o %t2 # RUN: llvm-readobj -S %t2 2>&1 | FileCheck -DFILE=%t2 %s --check-prefixes=LLVM2,ERR2 # RUN: llvm-readelf -S %t2 2>&1 | FileCheck -DFILE=%t2 %s --check-prefixes=GNU2,ERR2 -# ERR2: warning: '[[FILE]]': invalid section index - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .dynsym - Type: SHT_DYNSYM - Link: 255 +# ERR2: warning: '[[FILE]]': invalid section index: 255 # LLVM2: Name: .dynsym # LLVM2-NEXT: Type: SHT_DYNSYM diff --git a/llvm/test/tools/llvm-readobj/ELF/call-graph-profile.test b/llvm/test/tools/llvm-readobj/ELF/call-graph-profile.test index 8ccc93cf426f0..65709cb5f6969 100644 --- a/llvm/test/tools/llvm-readobj/ELF/call-graph-profile.test +++ b/llvm/test/tools/llvm-readobj/ELF/call-graph-profile.test @@ -37,8 +37,7 @@ Sections: - From: bar To: foo Weight: 98 -## 0x10 is the normal entry size for the SHT_LLVM_CALL_GRAPH_PROFILE section. - EntSize: [[ENTSIZE=0x10]] + EntSize: [[ENTSIZE=]] Symbols: - Name: foo - Name: bar diff --git a/llvm/test/tools/llvm-readobj/ELF/dependent-libraries.test b/llvm/test/tools/llvm-readobj/ELF/dependent-libraries.test index 97f8a75781398..440217f3253d3 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dependent-libraries.test +++ b/llvm/test/tools/llvm-readobj/ELF/dependent-libraries.test @@ -30,8 +30,18 @@ FileHeader: Sections: - Name: .deplibs Type: SHT_LLVM_DEPENDENT_LIBRARIES + ShType: [[TYPE=]] Libraries: [ foo, bar, foo ] +## Show the output when there are no dependent library sections. Check that we are +## locating dependent library sections by type. To do this we change the type to an arbitrary one. +# RUN: yaml2obj --docnum=1 -DTYPE=SHT_PROGBITS %s -o %t3 +# RUN: llvm-readobj --dependent-libraries %t3 2>&1 | FileCheck %s --check-prefix=NONE +# RUN: llvm-readelf --dependent-libraries %t3 2>&1 | FileCheck %s --allow-empty --implicit-check-not={{.}} + +# NONE: DependentLibs [ +# NONE-NEXT: ] + ## Now, check how we dump a mix of valid, empty and invalid SHT_LLVM_DEPENDENT_LIBRARIES sections. # RUN: yaml2obj --docnum=2 %s -o %t2 @@ -98,18 +108,3 @@ Sections: - ShName: 0x10000 Type: SHT_LLVM_DEPENDENT_LIBRARIES Libraries: [ baz ] - -## Show the output when there are no dependent library sections. -# RUN: yaml2obj --docnum=3 %s -o %t3 -# RUN: llvm-readobj --dependent-libraries %t3 2>&1 | FileCheck %s --check-prefix=NONE -# RUN: llvm-readelf --dependent-libraries %t3 2>&1 | FileCheck %s --allow-empty --implicit-check-not={{.}} - -# NONE: DependentLibs [ -# NONE-NEXT: ] - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 diff --git a/llvm/test/tools/llvm-readobj/ELF/dyn-symbols.test b/llvm/test/tools/llvm-readobj/ELF/dyn-symbols.test index 6169871034d5f..4a9daa2641295 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dyn-symbols.test +++ b/llvm/test/tools/llvm-readobj/ELF/dyn-symbols.test @@ -134,41 +134,24 @@ Sections: Value: 0xffff1234 - Tag: DT_NULL Value: 0 + - Name: .dynsym + Type: SHT_DYNSYM + ShName: [[DYNSYMNAME=]] DynamicSymbols: - Name: foo ## Case 3.2: the same as 3.1, but the sh_name field of the SHT_DYNSYM section is invalid. ## Check we are still able to dump symbols. -# RUN: yaml2obj --docnum=3 %s -o %t2.broken.name +# RUN: yaml2obj --docnum=2 -DDYNSYMNAME=0xffffffff %s -o %t2.broken.name # RUN: llvm-readobj %t2.broken.name --dyn-symbols 2>&1 | \ # RUN: FileCheck %s -DFILE=%t2.broken.name --check-prefix=NOPHDRS-LLVM --implicit-check-not=warning: # RUN: llvm-readelf %t2.broken.name --dyn-symbols 2>&1 | \ # RUN: FileCheck %s -DFILE=%t2.broken.name -DNAME="" \ # RUN: --check-prefixes=NOPHDRS-GNU,NOPHDRS-NAMEWARN --implicit-check-not=warning: ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .dynamic - Type: SHT_DYNAMIC - Entries: - - Tag: DT_SYMTAB - Value: 0xffff1234 - - Tag: DT_NULL - Value: 0 - - Name: .dynsym - Type: SHT_DYNSYM - ShName: 0xffffffff -DynamicSymbols: - - Name: foo - ## Case 4: Check we report a warning when there is no SHT_DYNSYM section and we can't map the DT_SYMTAB value ## to an address because of the absence of a corresponding PT_LOAD program header. -# RUN: yaml2obj --docnum=4 %s -o %t3.so +# RUN: yaml2obj --docnum=3 %s -o %t3.so # RUN: llvm-readobj %t3.so --dyn-symbols 2>&1 | FileCheck %s -DFILE=%t3.so --check-prefixes=NOSHT-DYNSYM,NOSHT-DYNSYM-LLVM # RUN: llvm-readelf %t3.so --dyn-symbols 2>&1 | FileCheck %s -DFILE=%t3.so --check-prefix=NOSHT-DYNSYM @@ -197,7 +180,7 @@ DynamicSymbols: ## Case 5: Check that when we can't map the value of the DT_SYMTAB tag to an address, we report a warning and ## use the information in the section header table to locate the dynamic symbol table. -# RUN: yaml2obj --docnum=5 %s -o %t4.so +# RUN: yaml2obj --docnum=4 %s -o %t4.so # RUN: llvm-readobj %t4.so --dyn-symbols 2>&1 | FileCheck -DFILE=%t4.so %s --check-prefixes=BROKEN-DTSYMTAB,BROKEN-DTSYMTAB-LLVM # RUN: llvm-readelf %t4.so --dyn-symbols 2>&1 | FileCheck -DFILE=%t4.so %s --check-prefixes=BROKEN-DTSYMTAB,BROKEN-DTSYMTAB-GNU @@ -229,7 +212,7 @@ ProgramHeaders: ## Case 6: Check that if we can get the location of the dynamic symbol table using both the DT_SYMTAB value ## and the section headers table then we prefer the former and report a warning. -# RUN: yaml2obj --docnum=6 %s -o %t5.so +# RUN: yaml2obj --docnum=5 %s -o %t5.so # RUN: llvm-readobj %t5.so --dyn-symbols 2>&1 | FileCheck -DFILE=%t5.so %s --check-prefixes=PREFER-DTSYMTAB,PREFER-DTSYMTAB-LLVM # RUN: llvm-readelf %t5.so --dyn-symbols 2>&1 | FileCheck -DFILE=%t5.so %s --check-prefixes=PREFER-DTSYMTAB,PREFER-DTSYMTAB-GNU @@ -267,7 +250,7 @@ ProgramHeaders: ## Case 7: Check how we dump versioned symbols. Use both -V and --dyn-symbols ## to check that printed version is consistent. -# RUN: yaml2obj %s --docnum=7 -o %t6 +# RUN: yaml2obj %s --docnum=6 -o %t6 # RUN: llvm-readobj -V --dyn-symbols %t6 | FileCheck %s --check-prefix=VERSIONED-LLVM # RUN: llvm-readelf -V --dyn-symbols %t6 | FileCheck %s --check-prefix=VERSIONED-GNU @@ -350,15 +333,15 @@ DynamicSymbols: ## Case 8: Check what we print when: ## a) The dynamic symbol table does not exist. -# RUN: yaml2obj %s --docnum=8 -o %t7 +# RUN: yaml2obj %s --docnum=7 -o %t7 # RUN: llvm-readobj --dyn-symbols %t7 | FileCheck %s --check-prefix=NO-DYNSYM-LLVM # RUN: llvm-readelf --dyn-symbols %t7 | count 0 ## b) The dynamic symbol table has a size of 0. -# RUN: yaml2obj %s --docnum=9 -o %t8 +# RUN: yaml2obj %s --docnum=8 -o %t8 # RUN: llvm-readobj --dyn-symbols %t8 | FileCheck %s --check-prefix=NO-DYNSYM-LLVM # RUN: llvm-readelf --dyn-symbols %t8 | count 0 ## c) The dynamic symbol table only contains the null symbol. -# RUN: yaml2obj %s --docnum=10 -o %t9 +# RUN: yaml2obj %s --docnum=9 -o %t9 # RUN: llvm-readobj --dyn-symbols %t9 | FileCheck %s --check-prefix=DYNSYM-EMPTY-LLVM # RUN: llvm-readelf --dyn-symbols %t9 | FileCheck %s --check-prefix=DYNSYM-EMPTY-GNU @@ -409,7 +392,7 @@ DynamicSymbols: [] ## Case 9: Check what we print when: ## a) The size of the dynamic symbol table is not a multiple of its entry size. -# RUN: yaml2obj %s --docnum=11 -o %t10 +# RUN: yaml2obj %s --docnum=10 -o %t10 # RUN: llvm-readobj --dyn-symbols %t10 2>&1 | FileCheck %s -DFILE=%t10 --check-prefix=DYNSYM-SIZE-INVALID1 # RUN: llvm-readelf --dyn-symbols %t10 2>&1 | FileCheck %s -DFILE=%t10 --check-prefix=DYNSYM-SIZE-INVALID1 @@ -419,7 +402,7 @@ DynamicSymbols: [] ## information about a location and an entity size of the dynamic symbol table from the section header. ## The code uses sizeof(Elf_Sym) for an entity size, so it can't be incorrect and ## the message printed is a bit shorter. -# RUN: yaml2obj %s --docnum=12 -o %t11 +# RUN: yaml2obj %s --docnum=11 -o %t11 # RUN: llvm-readobj --dyn-symbols %t11 2>&1 | FileCheck %s -DFILE=%t11 --check-prefix=DYNSYM-SIZE-INVALID2 # RUN: llvm-readelf --dyn-symbols %t11 2>&1 | FileCheck %s -DFILE=%t11 --check-prefix=DYNSYM-SIZE-INVALID2 @@ -427,10 +410,10 @@ DynamicSymbols: [] ## c) In the case when the DT_SYMENT tag is present, we report when it's value does not match the # value of the symbol size for the platform. -# RUN: yaml2obj %s -D BITS=32 --docnum=13 -o %t12 +# RUN: yaml2obj %s -D BITS=32 --docnum=12 -o %t12 # RUN: llvm-readobj --dyn-symbols %t12 2>&1 | FileCheck %s -DFILE=%t12 --check-prefix=DYNSYM-SIZE-INVALID3 # RUN: llvm-readelf --dyn-symbols %t12 2>&1 | FileCheck %s -DFILE=%t12 --check-prefix=DYNSYM-SIZE-INVALID3 -# RUN: yaml2obj %s -D BITS=64 --docnum=13 -o %t13 +# RUN: yaml2obj %s -D BITS=64 --docnum=12 -o %t13 # RUN: llvm-readobj --dyn-symbols %t13 2>&1 | FileCheck %s -DFILE=%t13 --check-prefix=DYNSYM-SIZE-INVALID4 # RUN: llvm-readelf --dyn-symbols %t13 2>&1 | FileCheck %s -DFILE=%t13 --check-prefix=DYNSYM-SIZE-INVALID4 @@ -490,7 +473,7 @@ Sections: ## Check we report a warning when the DT_STRSZ value is broken so that the dynamic string ## table goes past the end of the file. Document we stop dumping symbols and report an error. -# RUN: yaml2obj %s --docnum=14 -o %t14 +# RUN: yaml2obj %s --docnum=13 -o %t14 # RUN: llvm-readobj --dyn-symbols %t14 2>&1 | \ # RUN: FileCheck %s -DFILE=%t14 --check-prefix=DYNSTR-INVALID-LLVM # RUN: llvm-readelf --dyn-symbols %t14 2>&1 | \ @@ -570,7 +553,7 @@ ProgramHeaders: - Section: .dynamic ## Check we report a warning when the entry size of the dynamic symbol table is zero. -# RUN: yaml2obj %s --docnum=15 -o %t15 +# RUN: yaml2obj %s --docnum=14 -o %t15 # RUN: llvm-readobj --dyn-symbols %t15 2>&1 | FileCheck %s -DFILE=%t15 --check-prefix=DYNSYM-ZERO-ENTSIZE-LLVM # RUN: llvm-readelf --dyn-symbols %t15 2>&1 | \ # RUN: FileCheck %s -DFILE=%t15 --check-prefix=DYNSYM-ZERO-ENTSIZE-GNU --implicit-check-not="Symbol table" diff --git a/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc-no-section-headers.test b/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc-no-section-headers.test index 1ec81578b9cc9..b6c6f102c9c1b 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc-no-section-headers.test +++ b/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc-no-section-headers.test @@ -21,10 +21,6 @@ FileHeader: Data: ELFDATA2LSB Type: ET_DYN Machine: EM_X86_64 -## We simulate no section header table by -## overriding the ELF header properties. - EShOff: 0x0 - EShNum: 0x0 Sections: - Name: .rela.dyn Type: SHT_RELA @@ -66,3 +62,5 @@ ProgramHeaders: - Type: PT_DYNAMIC Sections: - Section: .dynamic +SectionHeaderTable: + NoHeaders: true diff --git a/llvm/test/tools/llvm-readobj/ELF/dynamic-table-dtnull.s b/llvm/test/tools/llvm-readobj/ELF/dynamic-table-dtnull.s index 5524bdde154c5..b199bc428f2f9 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dynamic-table-dtnull.s +++ b/llvm/test/tools/llvm-readobj/ELF/dynamic-table-dtnull.s @@ -15,26 +15,21 @@ --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 Sections: - - Name: .dynamic - Type: SHT_DYNAMIC - Address: 0x0000000000001010 - AddressAlign: 0x0000000000000010 - EntSize: 0x0000000000000010 + - Name: .dynamic + Type: SHT_DYNAMIC Entries: - - Tag: DT_DEBUG - Value: 0x0000000000000000 + - Tag: DT_DEBUG + Value: 0x0000000000000000 ProgramHeaders: - Type: PT_LOAD - VAddr: 0x1000 Sections: - Section: .dynamic - Type: PT_DYNAMIC - VAddr: 0x1010 Sections: - Section: .dynamic @@ -59,29 +54,24 @@ ProgramHeaders: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 Sections: - - Name: .dynamic - Type: SHT_DYNAMIC - Address: 0x0000000000001010 - AddressAlign: 0x0000000000000010 - EntSize: 0x0000000000000010 + - Name: .dynamic + Type: SHT_DYNAMIC Entries: - - Tag: DT_DEBUG - Value: 0x0000000000000000 - - Tag: DT_NULL - Value: 0x0000000000000000 - - Tag: DT_NULL - Value: 0x0000000000000000 + - Tag: DT_DEBUG + Value: 0x0000000000000000 + - Tag: DT_NULL + Value: 0x0000000000000000 + - Tag: DT_NULL + Value: 0x0000000000000000 ProgramHeaders: - Type: PT_LOAD - VAddr: 0x1000 Sections: - Section: .dynamic - Type: PT_DYNAMIC - VAddr: 0x1010 Sections: - Section: .dynamic diff --git a/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test b/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test index 5f8b709cef5b6..24cc439bdbdbb 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test +++ b/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test @@ -140,11 +140,11 @@ --- !ELF FileHeader: - Class: ELFCLASS64 + Class: ELFCLASS[[BITS=64]] Data: ELFDATA2LSB Type: ET_EXEC - Machine: EM_X86_64 - EPhEntSize: [[PHENTSIZE=56]] + Machine: EM_NONE + EPhEntSize: [[PHENTSIZE=]] Sections: - Name: .dynstr Type: SHT_STRTAB @@ -186,7 +186,7 @@ Sections: - Tag: DT_RPATH Value: 0x5 - Tag: DT_SYMBOLIC - Value: 0x1234567890abcdef + Value: [[SYMBOLIC=0x1234567890abcdef]] - Tag: DT_REL Value: 0x1000 - Tag: DT_RELSZ @@ -196,13 +196,13 @@ Sections: - Tag: DT_PLTREL Value: 0x7 - Tag: DT_DEBUG - Value: 0xfedcba0987654321 + Value: [[DEBUG=0xfedcba0987654321]] - Tag: DT_TEXTREL - Value: 0x1122334455667788 + Value: [[TEXTREL=0x1122334455667788]] - Tag: DT_JMPREL Value: 0x1000 - Tag: DT_BIND_NOW - Value: 0x8877665544332211 + Value: [[BINDNOW=0x8877665544332211]] - Tag: DT_INIT_ARRAY Value: 0x1000 - Tag: DT_FINI_ARRAY @@ -214,7 +214,7 @@ Sections: - Tag: DT_RUNPATH Value: 0x7 - Tag: DT_FLAGS - Value: 0xffffffffffffffff + Value: [[FLAGS=0xffffffffffffffff]] - Tag: DT_PREINIT_ARRAY Value: 0x1000 - Tag: DT_PREINIT_ARRAYSZ @@ -252,7 +252,7 @@ Sections: - Tag: DT_RELCOUNT Value: 0x0 - Tag: DT_FLAGS_1 - Value: 0xffffffffffffffff + Value: [[FLAGS=0xffffffffffffffff]] - Tag: DT_VERSYM Value: 0x1000 - Tag: DT_VERDEF @@ -272,13 +272,13 @@ Sections: ## Show behaviour for unknown values in special and un-marked ranges. ## An arbitrary unknown value outside of the special ranges. - Tag: 0x12345678 - Value: 0x8765432187654321 + Value: [[UNKNOWN=0x8765432187654321]] ## An OS specific tag. - Tag: 0x6abcdef0 - Value: 0x9988776655443322 + Value: [[OSSPEC=0x9988776655443322]] ## A processor specific tags. - Tag: 0x76543210 - Value: 0x5555666677778888 + Value: [[PROCSPEC=0x5555666677778888]] - Tag: DT_NULL Value: 0x0 ProgramHeaders: @@ -292,7 +292,9 @@ ProgramHeaders: Sections: - Section: .dynamic -# RUN: yaml2obj %s --docnum=2 -o %t2 +# RUN: yaml2obj %s --docnum=1 -DBITS=32 -DSYMBOLIC=0x12345678 -DDEBUG=0xfedcba09 \ +# RUN: -DTEXTREL=0x11223344 -DBINDNOW=0x88776655 -DFLAGS=0xffffffff \ +# RUN: -DUNKNOWN=0x87654321 -DOSSPEC=0x99887766 -DPROCSPEC=0x55556666 -o %t2 # RUN: llvm-readobj --dynamic-table %t2 \ # RUN: | FileCheck %s --check-prefix=LLVM32 --strict-whitespace --match-full-lines # RUN: llvm-readobj -d %t2 | FileCheck %s --check-prefix=LLVM32 --strict-whitespace --match-full-lines @@ -429,169 +431,16 @@ ProgramHeaders: # GNU32-NEXT: 0x76543210 (0x76543210) 0x55556666 # GNU32-NEXT: 0x00000000 (NULL) 0x0 ---- !ELF -FileHeader: - Class: ELFCLASS32 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_386 -Sections: - - Name: .dynstr - Type: SHT_STRTAB - Address: 0x1000 - Size: 0x10 - Content: "004400550066007700" - - Name: .dynamic - Type: SHT_DYNAMIC - Address: 0x1010 - Entries: - - Tag: DT_NEEDED - Value: 0x1 - - Tag: DT_PLTRELSZ - Value: 0x10 - - Tag: DT_PLTGOT - Value: 0x1000 - - Tag: DT_HASH - Value: 0x1000 - - Tag: DT_STRTAB - Value: 0x1000 - - Tag: DT_SYMTAB - Value: 0x1000 - - Tag: DT_RELA - Value: 0x1000 - - Tag: DT_RELASZ - Value: 0x10 - - Tag: DT_RELAENT - Value: 0x789 - - Tag: DT_STRSZ - Value: 0x10 - - Tag: DT_SYMENT - Value: 0x987 - - Tag: DT_INIT - Value: 0x1000 - - Tag: DT_FINI - Value: 0x1000 - - Tag: DT_SONAME - Value: 0x3 - - Tag: DT_RPATH - Value: 0x5 - - Tag: DT_SYMBOLIC - Value: 0x12345678 - - Tag: DT_REL - Value: 0x1000 - - Tag: DT_RELSZ - Value: 0x10 - - Tag: DT_RELENT - Value: 0x123 - - Tag: DT_PLTREL - Value: 0x7 - - Tag: DT_DEBUG - Value: 0xfedcba09 - - Tag: DT_TEXTREL - Value: 0x11223344 - - Tag: DT_JMPREL - Value: 0x1000 - - Tag: DT_BIND_NOW - Value: 0x88776655 - - Tag: DT_INIT_ARRAY - Value: 0x1000 - - Tag: DT_FINI_ARRAY - Value: 0x1000 - - Tag: DT_INIT_ARRAYSZ - Value: 0x10 - - Tag: DT_FINI_ARRAYSZ - Value: 0x10 - - Tag: DT_RUNPATH - Value: 0x7 - - Tag: DT_FLAGS - Value: 0xffffffff - - Tag: DT_PREINIT_ARRAY - Value: 0x1000 - - Tag: DT_PREINIT_ARRAYSZ - Value: 0x10 - - Tag: DT_SYMTAB_SHNDX - Value: 0x1000 - - Tag: DT_RELRSZ - Value: 0x10 - - Tag: DT_RELR - Value: 0x1000 - - Tag: DT_RELRENT - Value: 0x4321 - - Tag: DT_ANDROID_REL - Value: 0x1000 - - Tag: DT_ANDROID_RELSZ - Value: 0x10 - - Tag: DT_ANDROID_RELA - Value: 0x1000 - - Tag: DT_ANDROID_RELASZ - Value: 0x10 - - Tag: DT_ANDROID_RELR - Value: 0x1000 - - Tag: DT_ANDROID_RELRSZ - Value: 0x10 - - Tag: DT_ANDROID_RELRENT - Value: 0x1234 - - Tag: DT_GNU_HASH - Value: 0x1000 - - Tag: DT_TLSDESC_PLT - Value: 0x1000 - - Tag: DT_TLSDESC_GOT - Value: 0x1000 - - Tag: DT_RELACOUNT - Value: 0x0 - - Tag: DT_RELCOUNT - Value: 0x0 - - Tag: DT_FLAGS_1 - Value: 0xffffffff - - Tag: DT_VERSYM - Value: 0x1000 - - Tag: DT_VERDEF - Value: 0x1000 - - Tag: DT_VERDEFNUM - Value: 0x0 - - Tag: DT_VERNEED - Value: 0x1000 - - Tag: DT_VERNEEDNUM - Value: 0x0 - - Tag: DT_AUXILIARY - Value: 0x1 - - Tag: DT_USED - Value: 0x3 - - Tag: DT_FILTER - Value: 0x3 -## Show behaviour for unknown values in special and un-marked ranges. -## An arbitrary unknown value outside of the special ranges. - - Tag: 0x12345678 - Value: 0x87654321 -## An OS specific tag. - - Tag: 0x6abcdef0 - Value: 0x99887766 -## A processor specific tags. - - Tag: 0x76543210 - Value: 0x55556666 - - Tag: DT_NULL - Value: 0x0 -ProgramHeaders: - - Type: PT_LOAD - VAddr: 0x1000 - Sections: - - Section: .dynstr - - Section: .dynamic - - Type: PT_DYNAMIC - VAddr: 0x1010 - Sections: - - Section: .dynamic - ## When printing the "Name/Value" column we want to have the minimal possible indentation. ## Use an arbitrary dynamic tag to demonstrate this. -# RUN: yaml2obj %s --docnum=3 -o %t3 +# RUN: yaml2obj %s --docnum=2 -o %t3 # RUN: llvm-readobj --dynamic-table %t3 \ # RUN: | FileCheck %s --check-prefix=INDENT-LLVM-64 --strict-whitespace # RUN: llvm-readelf --dynamic-table %t3 \ # RUN: | FileCheck %s --check-prefix=INDENT-GNU-64 --strict-whitespace -# RUN: yaml2obj %s --docnum=4 -o %t4 +# RUN: yaml2obj %s --docnum=2 -DBITS=32 -o %t4 # RUN: llvm-readobj --dynamic-table %t4 \ # RUN: | FileCheck %s --check-prefix=INDENT-LLVM-32 --strict-whitespace # RUN: llvm-readelf --dynamic-table %t4 \ @@ -611,25 +460,10 @@ ProgramHeaders: --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 -Sections: - - Name: .dynamic - Type: SHT_DYNAMIC - Entries: - - Tag: DT_NEEDED - Value: 0x1 - - Tag: DT_NULL - Value: 0x0 - ---- !ELF -FileHeader: - Class: ELFCLASS32 + Class: ELFCLASS[[BITS=64]] Data: ELFDATA2LSB Type: ET_EXEC - Machine: EM_386 + Machine: EM_NONE Sections: - Name: .dynamic Type: SHT_DYNAMIC diff --git a/llvm/test/tools/llvm-readobj/ELF/file-header-abi-version.test b/llvm/test/tools/llvm-readobj/ELF/file-header-abi-version.test index 95e9e2636f8b9..61c02ee412e85 100644 --- a/llvm/test/tools/llvm-readobj/ELF/file-header-abi-version.test +++ b/llvm/test/tools/llvm-readobj/ELF/file-header-abi-version.test @@ -1,24 +1,26 @@ ## This is a test to test how the ABI version field (EI_ABIVERSION) of an ELF file header is dumped. ## EI_ABIVERSION is set to zero. -# RUN: yaml2obj %s --docnum=1 -o %t.abiver.zero +# RUN: yaml2obj %s -DABIVERSION=0 -o %t.abiver.zero # RUN: llvm-readobj --file-headers %t.abiver.zero | FileCheck %s --match-full-lines --check-prefix=ABIVER-ZERO-LLVM # RUN: llvm-readelf --file-headers %t.abiver.zero | FileCheck %s --match-full-lines --check-prefix=ABIVER-ZERO-GNU -## EI_ABIVERSION is set to an arbitrary number. -# RUN: yaml2obj %s --docnum=2 -o %t.abiver.any -# RUN: llvm-readobj --file-headers %t.abiver.any | FileCheck %s --match-full-lines --check-prefix=ABIVER-ANY-LLVM -# RUN: llvm-readelf --file-headers %t.abiver.any | FileCheck %s --match-full-lines --check-prefix=ABIVER-ANY-GNU -## EI_ABIVERSION is set to the maximum possible value. -# RUN: yaml2obj %s --docnum=3 -o %t.abiver.max -# RUN: llvm-readobj --file-headers %t.abiver.max | FileCheck %s --match-full-lines --check-prefix=ABIVER-MAX-LLVM -# RUN: llvm-readelf --file-headers %t.abiver.max | FileCheck %s --match-full-lines --check-prefix=ABIVER-MAX-GNU # ABIVER-ZERO-LLVM: ABIVersion: 0 # ABIVER-ZERO-GNU: ABI Version: 0 +## EI_ABIVERSION is set to an arbitrary number. +# RUN: yaml2obj %s -DABIVERSION=52 -o %t.abiver.any +# RUN: llvm-readobj --file-headers %t.abiver.any | FileCheck %s --match-full-lines --check-prefix=ABIVER-ANY-LLVM +# RUN: llvm-readelf --file-headers %t.abiver.any | FileCheck %s --match-full-lines --check-prefix=ABIVER-ANY-GNU + # ABIVER-ANY-LLVM: ABIVersion: 52 # ABIVER-ANY-GNU: ABI Version: 52 +## EI_ABIVERSION is set to the maximum possible value. +# RUN: yaml2obj %s -DABIVERSION=255 -o %t.abiver.max +# RUN: llvm-readobj --file-headers %t.abiver.max | FileCheck %s --match-full-lines --check-prefix=ABIVER-MAX-LLVM +# RUN: llvm-readelf --file-headers %t.abiver.max | FileCheck %s --match-full-lines --check-prefix=ABIVER-MAX-GNU + # ABIVER-MAX-LLVM: ABIVersion: 255 # ABIVER-MAX-GNU: ABI Version: 255 @@ -28,20 +30,4 @@ FileHeader: Data: ELFDATA2LSB Type: ET_REL Machine: EM_X86_64 - ABIVersion: 0x0 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - ABIVersion: 0x34 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - ABIVersion: 0xFF + ABIVersion: [[ABIVERSION]] diff --git a/llvm/test/tools/llvm-readobj/ELF/file-header-os-abi.test b/llvm/test/tools/llvm-readobj/ELF/file-header-os-abi.test index c169d29ff5baa..642976775808a 100644 --- a/llvm/test/tools/llvm-readobj/ELF/file-header-os-abi.test +++ b/llvm/test/tools/llvm-readobj/ELF/file-header-os-abi.test @@ -1,7 +1,7 @@ ## This is a test to test how the OS/ABI identification field (EI_OSABI) of an ELF file header is dumped. ## EI_OSABI is set to ELFOSABI_NONE. -# RUN: yaml2obj %s --docnum=1 -o %t.osabi.none +# RUN: yaml2obj %s -DOSABI=ELFOSABI_NONE -o %t.osabi.none # RUN: llvm-readobj --file-headers %t.osabi.none | FileCheck %s --match-full-lines --check-prefix=OSABI-NONE-LLVM # RUN: llvm-readelf --file-headers %t.osabi.none | FileCheck %s --match-full-lines --check-prefix=OSABI-NONE-GNU @@ -14,292 +14,148 @@ FileHeader: Data: ELFDATA2LSB Type: ET_REL Machine: EM_X86_64 - OSABI: ELFOSABI_NONE + OSABI: [[OSABI]] ## EI_OSABI is set to ELFOSABI_HPUX. -# RUN: yaml2obj %s --docnum=2 -o %t.osabi.hpux +# RUN: yaml2obj %s -DOSABI=ELFOSABI_HPUX -o %t.osabi.hpux # RUN: llvm-readobj --file-headers %t.osabi.hpux | FileCheck %s --match-full-lines --check-prefix=OSABI-HPUX-LLVM # RUN: llvm-readelf --file-headers %t.osabi.hpux | FileCheck %s --match-full-lines --check-prefix=OSABI-HPUX-GNU # OSABI-HPUX-LLVM: OS/ABI: HPUX (0x1) # OSABI-HPUX-GNU: OS/ABI: UNIX - HP-UX ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_HPUX - ## EI_OSABI is set to ELFOSABI_NETBSD. -# RUN: yaml2obj %s --docnum=3 -o %t.osabi.netbsd +# RUN: yaml2obj %s -DOSABI=ELFOSABI_NETBSD -o %t.osabi.netbsd # RUN: llvm-readobj --file-headers %t.osabi.netbsd | FileCheck %s --match-full-lines --check-prefix=OSABI-NETBSD-LLVM # RUN: llvm-readelf --file-headers %t.osabi.netbsd | FileCheck %s --match-full-lines --check-prefix=OSABI-NETBSD-GNU # OSABI-NETBSD-LLVM: OS/ABI: NetBSD (0x2) # OSABI-NETBSD-GNU: OS/ABI: UNIX - NetBSD ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_NETBSD - ## EI_OSABI is set to ELFOSABI_LINUX. -# RUN: yaml2obj %s --docnum=4 -o %t.osabi.linux +# RUN: yaml2obj %s -DOSABI=ELFOSABI_LINUX -o %t.osabi.linux # RUN: llvm-readobj --file-headers %t.osabi.linux | FileCheck %s --match-full-lines --check-prefix=OSABI-LINUX-LLVM # RUN: llvm-readelf --file-headers %t.osabi.linux | FileCheck %s --match-full-lines --check-prefix=OSABI-LINUX-GNU # OSABI-LINUX-LLVM: OS/ABI: GNU/Linux (0x3) # OSABI-LINUX-GNU: OS/ABI: UNIX - GNU ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_LINUX - ## EI_OSABI is set to ELFOSABI_HURD. -# RUN: yaml2obj %s --docnum=5 -o %t.osabi.hurd +# RUN: yaml2obj %s -DOSABI=ELFOSABI_HURD -o %t.osabi.hurd # RUN: llvm-readobj --file-headers %t.osabi.hurd | FileCheck %s --match-full-lines --check-prefix=OSABI-HURD-LLVM # RUN: llvm-readelf --file-headers %t.osabi.hurd | FileCheck %s --match-full-lines --check-prefix=OSABI-HURD-GNU # OSABI-HURD-LLVM: OS/ABI: GNU/Hurd (0x4) # OSABI-HURD-GNU: OS/ABI: GNU/Hurd ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_HURD - ## EI_OSABI is set to ELFOSABI_SOLARIS. -# RUN: yaml2obj %s --docnum=6 -o %t.osabi.solaris +# RUN: yaml2obj %s -DOSABI=ELFOSABI_SOLARIS -o %t.osabi.solaris # RUN: llvm-readobj --file-headers %t.osabi.solaris | FileCheck %s --match-full-lines --check-prefix=OSABI-SOLARIS-LLVM # RUN: llvm-readelf --file-headers %t.osabi.solaris | FileCheck %s --match-full-lines --check-prefix=OSABI-SOLARIS-GNU # OSABI-SOLARIS-LLVM: OS/ABI: Solaris (0x6) # OSABI-SOLARIS-GNU: OS/ABI: UNIX - Solaris ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_SOLARIS - ## EI_OSABI is set to ELFOSABI_AIX. -# RUN: yaml2obj %s --docnum=7 -o %t.osabi.aix +# RUN: yaml2obj %s -DOSABI=ELFOSABI_AIX -o %t.osabi.aix # RUN: llvm-readobj --file-headers %t.osabi.aix | FileCheck %s --match-full-lines --check-prefix=OSABI-AIX-LLVM # RUN: llvm-readelf --file-headers %t.osabi.aix | FileCheck %s --match-full-lines --check-prefix=OSABI-AIX-GNU # OSABI-AIX-LLVM: OS/ABI: AIX (0x7) # OSABI-AIX-GNU: OS/ABI: UNIX - AIX ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_AIX - ## EI_OSABI is set to ELFOSABI_IRIX. -# RUN: yaml2obj %s --docnum=8 -o %t.osabi.irix +# RUN: yaml2obj %s -DOSABI=ELFOSABI_IRIX -o %t.osabi.irix # RUN: llvm-readobj --file-headers %t.osabi.irix | FileCheck %s --match-full-lines --check-prefix=OSABI-IRIX-LLVM # RUN: llvm-readelf --file-headers %t.osabi.irix | FileCheck %s --match-full-lines --check-prefix=OSABI-IRIX-GNU # OSABI-IRIX-LLVM: OS/ABI: IRIX (0x8) # OSABI-IRIX-GNU: OS/ABI: UNIX - IRIX ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_IRIX - ## EI_OSABI is set to ELFOSABI_FREEBSD. -# RUN: yaml2obj %s --docnum=9 -o %t.osabi.freebsd +# RUN: yaml2obj %s -DOSABI=ELFOSABI_FREEBSD -o %t.osabi.freebsd # RUN: llvm-readobj --file-headers %t.osabi.freebsd | FileCheck %s --match-full-lines --check-prefix=OSABI-FREEBSD-LLVM # RUN: llvm-readelf --file-headers %t.osabi.freebsd | FileCheck %s --match-full-lines --check-prefix=OSABI-FREEBSD-GNU # OSABI-FREEBSD-LLVM: OS/ABI: FreeBSD (0x9) # OSABI-FREEBSD-GNU: OS/ABI: UNIX - FreeBSD ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_FREEBSD - ## EI_OSABI is set to ELFOSABI_TRU64. -# RUN: yaml2obj %s --docnum=10 -o %t.osabi.tru64 +# RUN: yaml2obj %s -DOSABI=ELFOSABI_TRU64 -o %t.osabi.tru64 # RUN: llvm-readobj --file-headers %t.osabi.tru64 | FileCheck %s --match-full-lines --check-prefix=OSABI-TRU64-LLVM # RUN: llvm-readelf --file-headers %t.osabi.tru64 | FileCheck %s --match-full-lines --check-prefix=OSABI-TRU64-GNU # OSABI-TRU64-LLVM: OS/ABI: TRU64 (0xA) # OSABI-TRU64-GNU: OS/ABI: UNIX - TRU64 ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_TRU64 - ## EI_OSABI is set to ELFOSABI_MODESTO. -# RUN: yaml2obj %s --docnum=11 -o %t.osabi.modesto +# RUN: yaml2obj %s -DOSABI=ELFOSABI_MODESTO -o %t.osabi.modesto # RUN: llvm-readobj --file-headers %t.osabi.modesto | FileCheck %s --match-full-lines --check-prefix=OSABI-MODESTO-LLVM # RUN: llvm-readelf --file-headers %t.osabi.modesto | FileCheck %s --match-full-lines --check-prefix=OSABI-MODESTO-GNU # OSABI-MODESTO-LLVM: OS/ABI: Modesto (0xB) # OSABI-MODESTO-GNU: OS/ABI: Novell - Modesto ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_MODESTO - ## EI_OSABI is set to ELFOSABI_OPENBSD. -# RUN: yaml2obj %s --docnum=12 -o %t.osabi.openbsd +# RUN: yaml2obj %s -DOSABI=ELFOSABI_OPENBSD -o %t.osabi.openbsd # RUN: llvm-readobj --file-headers %t.osabi.openbsd | FileCheck %s --match-full-lines --check-prefix=OSABI-OPENBSD-LLVM # RUN: llvm-readelf --file-headers %t.osabi.openbsd | FileCheck %s --match-full-lines --check-prefix=OSABI-OPENBSD-GNU # OSABI-OPENBSD-LLVM: OS/ABI: OpenBSD (0xC) # OSABI-OPENBSD-GNU: OS/ABI: UNIX - OpenBSD ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_OPENBSD - ## EI_OSABI is set to ELFOSABI_OPENVMS. -# RUN: yaml2obj %s --docnum=13 -o %t.osabi.openvms +# RUN: yaml2obj %s -DOSABI=ELFOSABI_OPENVMS -o %t.osabi.openvms # RUN: llvm-readobj --file-headers %t.osabi.openvms | FileCheck %s --match-full-lines --check-prefix=OSABI-OPENVMS-LLVM # RUN: llvm-readelf --file-headers %t.osabi.openvms | FileCheck %s --match-full-lines --check-prefix=OSABI-OPENVMS-GNU # OSABI-OPENVMS-LLVM: OS/ABI: OpenVMS (0xD) # OSABI-OPENVMS-GNU: OS/ABI: VMS - OpenVMS ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_OPENVMS - ## EI_OSABI is set to ELFOSABI_NSK. -# RUN: yaml2obj %s --docnum=14 -o %t.osabi.nsk +# RUN: yaml2obj %s -DOSABI=ELFOSABI_NSK -o %t.osabi.nsk # RUN: llvm-readobj --file-headers %t.osabi.nsk | FileCheck %s --match-full-lines --check-prefix=OSABI-NSK-LLVM # RUN: llvm-readelf --file-headers %t.osabi.nsk | FileCheck %s --match-full-lines --check-prefix=OSABI-NSK-GNU # OSABI-NSK-LLVM: OS/ABI: NSK (0xE) # OSABI-NSK-GNU: OS/ABI: HP - Non-Stop Kernel ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_NSK - ## EI_OSABI is set to ELFOSABI_AROS. -# RUN: yaml2obj %s --docnum=15 -o %t.osabi.aros +# RUN: yaml2obj %s -DOSABI=ELFOSABI_AROS -o %t.osabi.aros # RUN: llvm-readobj --file-headers %t.osabi.aros | FileCheck %s --match-full-lines --check-prefix=OSABI-AROS-LLVM # RUN: llvm-readelf --file-headers %t.osabi.aros | FileCheck %s --match-full-lines --check-prefix=OSABI-AROS-GNU # OSABI-AROS-LLVM: OS/ABI: AROS (0xF) # OSABI-AROS-GNU: OS/ABI: AROS ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_AROS - ## EI_OSABI is set to ELFOSABI_FENIXOS. -# RUN: yaml2obj %s --docnum=16 -o %t.osabi.fenixos +# RUN: yaml2obj %s -DOSABI=ELFOSABI_FENIXOS -o %t.osabi.fenixos # RUN: llvm-readobj --file-headers %t.osabi.fenixos | FileCheck %s --match-full-lines --check-prefix=OSABI-FENIXOS-LLVM # RUN: llvm-readelf --file-headers %t.osabi.fenixos | FileCheck %s --match-full-lines --check-prefix=OSABI-FENIXOS-GNU # OSABI-FENIXOS-LLVM: OS/ABI: FenixOS (0x10) # OSABI-FENIXOS-GNU: OS/ABI: FenixOS ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_FENIXOS - ## EI_OSABI is set to ELFOSABI_CLOUDABI. -# RUN: yaml2obj %s --docnum=17 -o %t.osabi.cloudabi +# RUN: yaml2obj %s -DOSABI=ELFOSABI_CLOUDABI -o %t.osabi.cloudabi # RUN: llvm-readobj --file-headers %t.osabi.cloudabi | FileCheck %s --match-full-lines --check-prefix=OSABI-CLOUDABI-LLVM # RUN: llvm-readelf --file-headers %t.osabi.cloudabi | FileCheck %s --match-full-lines --check-prefix=OSABI-CLOUDABI-GNU # OSABI-CLOUDABI-LLVM: OS/ABI: CloudABI (0x11) # OSABI-CLOUDABI-GNU: OS/ABI: CloudABI ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_CLOUDABI - ## EI_OSABI is set to ELFOSABI_STANDALONE. -# RUN: yaml2obj %s --docnum=18 -o %t.osabi.standalone +# RUN: yaml2obj %s -DOSABI=ELFOSABI_STANDALONE -o %t.osabi.standalone # RUN: llvm-readobj --file-headers %t.osabi.standalone | FileCheck %s --match-full-lines --check-prefix=OSABI-STANDALONE-LLVM # RUN: llvm-readelf --file-headers %t.osabi.standalone | FileCheck %s --match-full-lines --check-prefix=OSABI-STANDALONE-GNU # OSABI-STANDALONE-LLVM: OS/ABI: Standalone (0xFF) # OSABI-STANDALONE-GNU: OS/ABI: Standalone App ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: ELFOSABI_STANDALONE - ## EI_OSABI is set to an arbitrary value which is not supported by llvm-readobj/llvm-readelf. -# RUN: yaml2obj %s --docnum=19 -o %t.osabi.unknown +# RUN: yaml2obj %s -DOSABI=0xFE -o %t.osabi.unknown # RUN: llvm-readobj --file-headers %t.osabi.unknown | FileCheck %s --match-full-lines --check-prefix=OSABI-UNKNOWN-LLVM # RUN: llvm-readelf --file-headers %t.osabi.unknown | FileCheck %s --match-full-lines --check-prefix=OSABI-UNKNOWN-GNU # OSABI-UNKNOWN-LLVM: OS/ABI: 0xFE # OSABI-UNKNOWN-GNU: OS/ABI: fe - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - OSABI: 0xFE diff --git a/llvm/test/tools/llvm-readobj/ELF/file-types.test b/llvm/test/tools/llvm-readobj/ELF/file-types.test index 0765cff1040c5..0a8e6050df818 100644 --- a/llvm/test/tools/llvm-readobj/ELF/file-types.test +++ b/llvm/test/tools/llvm-readobj/ELF/file-types.test @@ -1,6 +1,6 @@ ## Check llvm-readobj and llvm-readelf can dump files of the different ELF types. -# RUN: yaml2obj %s --docnum=1 -o %t1 +# RUN: yaml2obj %s -DTYPE=ET_NONE -o %t1 # RUN: llvm-readobj -h %t1 | FileCheck %s --match-full-lines --check-prefix LLVM-NONE # RUN: llvm-readelf -h %t1 | FileCheck %s --match-full-lines --check-prefix GNU-NONE @@ -14,10 +14,10 @@ FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB - Type: ET_NONE + Type: [[TYPE]] Machine: EM_X86_64 -# RUN: yaml2obj %s --docnum=2 -o %t2 +# RUN: yaml2obj %s -DTYPE=ET_REL -o %t2 # RUN: llvm-readobj -h %t2 | FileCheck %s --match-full-lines --check-prefix LLVM-REL # RUN: llvm-readelf -h %t2 | FileCheck %s --match-full-lines --check-prefix GNU-REL @@ -27,14 +27,7 @@ FileHeader: # GNU-REL: ELF Header: # GNU-REL: Type: REL (Relocatable file) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - -# RUN: yaml2obj %s --docnum=3 -o %t3 +# RUN: yaml2obj %s -DTYPE=ET_EXEC -o %t3 # RUN: llvm-readobj -h %t3 | FileCheck %s --match-full-lines --check-prefix LLVM-EXEC # RUN: llvm-readelf -h %t3 | FileCheck %s --match-full-lines --check-prefix GNU-EXEC @@ -44,14 +37,7 @@ FileHeader: # GNU-EXEC: ELF Header: # GNU-EXEC: Type: EXEC (Executable file) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 - -# RUN: yaml2obj %s --docnum=4 -o %t4 +# RUN: yaml2obj %s -DTYPE=ET_DYN -o %t4 # RUN: llvm-readobj -h %t4 | FileCheck %s --match-full-lines --check-prefix LLVM-DYN # RUN: llvm-readelf -h %t4 | FileCheck %s --match-full-lines --check-prefix GNU-DYN @@ -61,14 +47,7 @@ FileHeader: # GNU-DYN: ELF Header: # GNU-DYN: Type: DYN (Shared object file) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 - -# RUN: yaml2obj %s --docnum=5 -o %t5 +# RUN: yaml2obj %s -DTYPE=ET_CORE -o %t5 # RUN: llvm-readobj -h %t5 | FileCheck %s --match-full-lines --check-prefix LLVM-CORE # RUN: llvm-readelf -h %t5 | FileCheck %s --match-full-lines --check-prefix GNU-CORE @@ -78,14 +57,7 @@ FileHeader: # GNU-CORE: ELF Header: # GNU-CORE: Type: CORE (Core file) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_CORE - Machine: EM_X86_64 - -# RUN: yaml2obj %s --docnum=6 -o %t6 +# RUN: yaml2obj %s -DTYPE=0xfe00 -o %t6 # RUN: llvm-readobj -h %t6 | FileCheck %s --match-full-lines --check-prefix LLVM-LOOS # RUN: llvm-readelf -h %t6 | FileCheck %s --match-full-lines --check-prefix GNU-LOOS @@ -95,14 +67,7 @@ FileHeader: # GNU-LOOS: ELF Header: # GNU-LOOS: Type: fe00 ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: 0xfe00 - Machine: EM_X86_64 - -# RUN: yaml2obj %s --docnum=7 -o %t7 +# RUN: yaml2obj %s -DTYPE=0xfeff -o %t7 # RUN: llvm-readobj -h %t7 | FileCheck %s --match-full-lines --check-prefix LLVM-HIOS # RUN: llvm-readelf -h %t7 | FileCheck %s --match-full-lines --check-prefix GNU-HIOS @@ -112,14 +77,7 @@ FileHeader: # GNU-HIOS: ELF Header: # GNU-HIOS: Type: feff ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: 0xfeff - Machine: EM_X86_64 - -# RUN: yaml2obj %s --docnum=8 -o %t8 +# RUN: yaml2obj %s -DTYPE=0xff00 -o %t8 # RUN: llvm-readobj -h %t8 | FileCheck %s --match-full-lines --check-prefix LLVM-LOPROC # RUN: llvm-readelf -h %t8 | FileCheck %s --match-full-lines --check-prefix GNU-LOPROC @@ -129,14 +87,7 @@ FileHeader: # GNU-LOPROC: ELF Header: # GNU-LOPROC: Type: ff00 ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: 0xff00 - Machine: EM_X86_64 - -# RUN: yaml2obj %s --docnum=9 -o %t9 +# RUN: yaml2obj %s -DTYPE=0xffff -o %t9 # RUN: llvm-readobj -h %t9 | FileCheck %s --match-full-lines --check-prefix LLVM-HIPROC # RUN: llvm-readelf -h %t9 | FileCheck %s --match-full-lines --check-prefix GNU-HIPROC @@ -145,10 +96,3 @@ FileHeader: # GNU-HIPROC: ELF Header: # GNU-HIPROC: Type: ffff - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: 0xffff - Machine: EM_X86_64 diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test b/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test index 5af6a56e68950..011e5db474126 100644 --- a/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test +++ b/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test @@ -87,8 +87,8 @@ FileHeader: Data: ELFDATA2LSB Type: ET_EXEC Machine: EM_X86_64 - EPhEntSize: [[PHENTSIZE=56]] - EShNum: [[SHNUM=6]] + EPhEntSize: [[PHENTSIZE=]] + EShNum: [[SHNUM=]] Sections: - Name: .note.ABI-tag Type: SHT_NOTE diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test b/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test index 1b5bb2572b111..a767c95069ccf 100644 --- a/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test +++ b/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test @@ -109,7 +109,6 @@ FileHeader: Data: ELFDATA2LSB Type: ET_EXEC Machine: [[MACHINE]] - Entry: 0x12345678 Sections: - Name: .foo.begin Type: SHT_PROGBITS @@ -374,8 +373,8 @@ FileHeader: Data: ELFDATA2LSB Type: ET_EXEC Machine: EM_X86_64 - EPhEntSize: [[PHENTSIZE=56]] - EPhOff: [[PHOFF=64]] + EPhEntSize: [[PHENTSIZE=]] + EPhOff: [[PHOFF=]] Sections: - Name: .foo Type: SHT_PROGBITS diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping-no-phdrs.test b/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping-no-phdrs.test index fb508529fe5bb..86fe989861ce6 100644 --- a/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping-no-phdrs.test +++ b/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping-no-phdrs.test @@ -1,4 +1,4 @@ -## Test the behaviour of --section-mapping when there are no section headers in an object. +## Test the behaviour of --section-mapping when there are no program headers in an object. # RUN: yaml2obj %s -o %t # RUN: llvm-readelf --section-mapping %t | FileCheck %s --strict-whitespace --match-full-lines diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test b/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test index e1c7181f15589..2172ba2d58c6c 100644 --- a/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test +++ b/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test @@ -18,7 +18,7 @@ FileHeader: Data: ELFDATA2LSB Type: ET_EXEC Machine: EM_X86_64 - EPhEntSize: [[PHENTSIZE=56]] + EPhEntSize: [[PHENTSIZE=]] Sections: - Name: .foo.begin Type: SHT_PROGBITS diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-sections.test b/llvm/test/tools/llvm-readobj/ELF/gnu-sections.test index 532ca22d3195b..ab6ea13c3a66c 100644 --- a/llvm/test/tools/llvm-readobj/ELF/gnu-sections.test +++ b/llvm/test/tools/llvm-readobj/ELF/gnu-sections.test @@ -1,23 +1,24 @@ ## In this test case we check how we print section and flag descriptions for different targets. -## EM_386 is a target that does not have any processor and OS specific flags, +## EM_NONE is a target that does not have any processor and OS specific flags, ## we use it to show how the default flag key is printed. -# RUN: yaml2obj --docnum=1 %s -o %t-i386.o -# RUN: llvm-readelf -S %t-i386.o | FileCheck %s --check-prefix=ELF32 --strict-whitespace --match-full-lines +# RUN: yaml2obj -DBITS=32 %s -o %t-default.o +# RUN: llvm-readelf -S %t-default.o | FileCheck %s --check-prefix=ELF32 --strict-whitespace --match-full-lines -# ELF32:There are 8 section headers, starting at offset 0x90: +# ELF32:There are 9 section headers, starting at offset 0x9c: # ELF32-EMPTY: # ELF32-NEXT:Section Headers: # ELF32-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al # ELF32-NEXT: [ 0] NULL 00000000 000000 000000 00 0 0 0 # ELF32-NEXT: [ 1] .text PROGBITS 00000000 000034 000001 00 AX 0 0 4 -# ELF32-NEXT: [ 2] .rel.text REL 00000000 000038 000000 08 5 1 4 -# ELF32-NEXT: [ 3] .data PROGBITS 00000000 000038 000000 00 WA 0 0 4 -# ELF32-NEXT: [ 4] .bss NOBITS 00000000 000038 000000 00 WA 0 0 4 -# ELF32-NEXT: [ 5] .symtab SYMTAB 00000000 000038 000020 10 6 2 8 -# ELF32-NEXT: [ 6] .strtab STRTAB 00000000 000058 000007 00 0 0 1 -# ELF32-NEXT: [ 7] .shstrtab STRTAB 00000000 00005f 000030 00 0 0 1 +# ELF32-NEXT: [ 2] .rel.text REL 00000000 000038 000000 08 6 1 4 +# ELF32-NEXT: [ 3] .rela.text RELA 00000000 000038 000000 18 6 1 8 +# ELF32-NEXT: [ 4] .data PROGBITS 00000000 000038 000000 00 WA 0 0 4 +# ELF32-NEXT: [ 5] .bss NOBITS 00000000 000038 000000 00 WA 0 0 4 +# ELF32-NEXT: [ 6] .symtab SYMTAB 00000000 000038 000020 10 7 2 8 +# ELF32-NEXT: [ 7] .strtab STRTAB 00000000 000058 000007 00 0 0 1 +# ELF32-NEXT: [ 8] .shstrtab STRTAB 00000000 00005f 00003b 00 0 0 1 # ELF32-NEXT:Key to Flags: # ELF32-NEXT: W (write), A (alloc), X (execute), M (merge), S (strings), I (info), # ELF32-NEXT: L (link order), O (extra OS processing required), G (group), T (TLS), @@ -26,11 +27,11 @@ --- !ELF FileHeader: - Class: ELFCLASS32 + Class: ELFCLASS[[BITS=64]] Data: ELFDATA2LSB OSABI: ELFOSABI_GNU Type: ET_REL - Machine: EM_386 + Machine: [[MACHINE=EM_NONE]] Sections: - Name: .text Type: SHT_PROGBITS @@ -44,6 +45,13 @@ Sections: EntSize: 0x0000000000000008 Info: .text Relocations: + - Name: .rela.text + Type: SHT_RELA + Link: .symtab + AddressAlign: 0x0000000000000008 + EntSize: 0x0000000000000018 + Info: .text + Relocations: - Name: .data Type: SHT_PROGBITS Flags: [ SHF_WRITE, SHF_ALLOC ] @@ -61,7 +69,7 @@ Symbols: ## For an EM_X86_64 target we print "l" for the SHF_X86_64_LARGE section flag. ## Check we mention it in the flag key. -# RUN: yaml2obj --docnum=2 %s -o %t-x64.o +# RUN: yaml2obj -DMACHINE=EM_X86_64 %s -o %t-x64.o # RUN: llvm-readelf -S %t-x64.o | FileCheck %s --check-prefix=ELF64 --strict-whitespace --match-full-lines ## Check that --wide is the same as -W and ignored and also @@ -72,62 +80,29 @@ Symbols: # RUN: | FileCheck %s --check-prefix=ELF64 # RUN: llvm-readelf -W -S %t-x64.o | FileCheck %s --check-prefix=ELF64 -# ELF64:There are 8 section headers, starting at offset 0xb0: +# ELF64:There are 9 section headers, starting at offset 0xc0: # ELF64-EMPTY: # ELF64-NEXT:Section Headers: # ELF64-NEXT: [Nr] Name Type Address Off Size ES Flg Lk Inf Al # ELF64-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 0 0 # ELF64-NEXT: [ 1] .text PROGBITS 0000000000000000 000040 000001 00 AX 0 0 4 -# ELF64-NEXT: [ 2] .rela.text RELA 0000000000000000 000048 000000 18 5 1 8 -# ELF64-NEXT: [ 3] .data PROGBITS 0000000000000000 000048 000000 00 WA 0 0 4 -# ELF64-NEXT: [ 4] .bss NOBITS 0000000000000000 000048 000000 00 WA 0 0 4 -# ELF64-NEXT: [ 5] .symtab SYMTAB 0000000000000000 000048 000030 18 6 2 8 -# ELF64-NEXT: [ 6] .strtab STRTAB 0000000000000000 000078 000007 00 0 0 1 -# ELF64-NEXT: [ 7] .shstrtab STRTAB 0000000000000000 00007f 000031 00 0 0 1 +# ELF64-NEXT: [ 2] .rel.text REL 0000000000000000 000044 000000 08 6 1 4 +# ELF64-NEXT: [ 3] .rela.text RELA 0000000000000000 000048 000000 18 6 1 8 +# ELF64-NEXT: [ 4] .data PROGBITS 0000000000000000 000048 000000 00 WA 0 0 4 +# ELF64-NEXT: [ 5] .bss NOBITS 0000000000000000 000048 000000 00 WA 0 0 4 +# ELF64-NEXT: [ 6] .symtab SYMTAB 0000000000000000 000048 000030 18 7 2 8 +# ELF64-NEXT: [ 7] .strtab STRTAB 0000000000000000 000078 000007 00 0 0 1 +# ELF64-NEXT: [ 8] .shstrtab STRTAB 0000000000000000 00007f 00003b 00 0 0 1 # ELF64-NEXT:Key to Flags: # ELF64-NEXT: W (write), A (alloc), X (execute), M (merge), S (strings), I (info), # ELF64-NEXT: L (link order), O (extra OS processing required), G (group), T (TLS), # ELF64-NEXT: C (compressed), x (unknown), o (OS specific), E (exclude), # ELF64-NEXT: l (large), p (processor specific) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - OSABI: ELFOSABI_GNU - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - AddressAlign: 0x0000000000000004 - Content: 00 - - Name: .rela.text - Type: SHT_RELA - Link: .symtab - AddressAlign: 0x0000000000000008 - EntSize: 0x0000000000000018 - Info: .text - Relocations: - - Name: .data - Type: SHT_PROGBITS - Flags: [ SHF_WRITE, SHF_ALLOC ] - AddressAlign: 0x0000000000000004 - Content: '' - - Name: .bss - Type: SHT_NOBITS - Flags: [ SHF_WRITE, SHF_ALLOC ] - AddressAlign: 0x0000000000000004 -Symbols: - - Name: .text - Type: STT_SECTION - Section: .text - ## For an EM_ARM target we print "y" for the SHF_ARM_PURECODE section flag. ## Check we mention it in the flag key. -# RUN: yaml2obj --docnum=3 %s -o %t-arm.o +# RUN: yaml2obj -DMACHINE=EM_ARM %s -o %t-arm.o # RUN: llvm-readelf -S %t-arm.o | FileCheck %s --check-prefix=ARM --strict-whitespace --match-full-lines # ARM:Key to Flags: @@ -135,11 +110,3 @@ Symbols: # ARM-NEXT: L (link order), O (extra OS processing required), G (group), T (TLS), # ARM-NEXT: C (compressed), x (unknown), o (OS specific), E (exclude), # ARM-NEXT: y (purecode), p (processor specific) - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_ARM -Sections: [] diff --git a/llvm/test/tools/llvm-readobj/ELF/hidden-versym.test b/llvm/test/tools/llvm-readobj/ELF/hidden-versym.test index 871bab1767f2c..01a092425f81c 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hidden-versym.test +++ b/llvm/test/tools/llvm-readobj/ELF/hidden-versym.test @@ -7,36 +7,30 @@ --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 - Entry: 0x0000000000201000 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 Sections: - - Name: .gnu.version - Type: SHT_GNU_versym - Flags: [ SHF_ALLOC ] - Address: 0x0000000000200210 - Link: .dynsym - AddressAlign: 0x0000000000000002 - EntSize: 0x0000000000000002 - Entries: [ 0, 0x8003 ] - - Name: .gnu.version_r - Type: SHT_GNU_verneed - Flags: [ SHF_ALLOC ] - Address: 0x0000000000200250 - Link: .dynstr - AddressAlign: 0x0000000000000004 - Info: 0x0000000000000001 + - Name: .gnu.version + Type: SHT_GNU_versym + Flags: [ SHF_ALLOC ] + Address: 0x0000000000200210 + Link: .dynsym + Entries: [ 0, 0x8003 ] + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Link: .dynstr + Info: 0x0000000000000001 Dependencies: - - Version: 1 - File: somefile + - Version: 1 + File: somefile Entries: - - Name: hiddensym - Hash: 1234 - Flags: 0 - Other: 3 + - Name: hiddensym + Hash: 1234 + Flags: 0 + Other: 3 DynamicSymbols: - - Name: h - Binding: STB_GLOBAL -... + - Name: h + Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-readobj/ELF/merged.test b/llvm/test/tools/llvm-readobj/ELF/merged.test index a6f567cda6cfe..454699b8a4342 100644 --- a/llvm/test/tools/llvm-readobj/ELF/merged.test +++ b/llvm/test/tools/llvm-readobj/ELF/merged.test @@ -1,13 +1,13 @@ -# Check merged args produce identical output to when not merged. -RUN: llvm-readelf -aeWhSrnudlVgIs %p/Inputs/trivial.obj.elf-i386 > %t.merged -RUN: llvm-readelf -a -e -W -h -S -r -n -u -d -l -V -g -I -s %p/Inputs/trivial.obj.elf-i386 > %t.not-merged -RUN: cmp %t.merged %t.not-merged -RUN: FileCheck %s --input-file %t.merged +## Check merged args produce identical output to when not merged. +# RUN: llvm-readelf -aeWhSrnudlVgIs %p/Inputs/trivial.obj.elf-i386 > %t.merged +# RUN: llvm-readelf -a -e -W -h -S -r -n -u -d -l -V -g -I -s %p/Inputs/trivial.obj.elf-i386 > %t.not-merged +# RUN: cmp %t.merged %t.not-merged +# RUN: FileCheck %s --input-file %t.merged -# llvm-readobj does not support merged args, because it also supports some old -# flags (-st, -sd, etc.), and it would be confusing if only some merged args -# were supported. -RUN: not llvm-readobj -aeWhSrnudlVgIs %p/Inputs/trivial.obj.elf-i386 2>&1 | FileCheck %s --check-prefix=UNKNOWN +## llvm-readobj does not support merged args, because it also supports some old +## flags (-st, -sd, etc.), and it would be confusing if only some merged args +## were supported. +# RUN: not llvm-readobj -aeWhSrnudlVgIs %p/Inputs/trivial.obj.elf-i386 2>&1 | FileCheck %s --check-prefix=UNKNOWN -CHECK-NOT: Unknown command line argument -UNKNOWN: for the --section-headers option: may only occur zero or one times! +# CHECK-NOT: Unknown command line argument +# UNKNOWN: for the --section-headers option: may only occur zero or one times! diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-got-overlapped.test b/llvm/test/tools/llvm-readobj/ELF/mips-got-overlapped.test index 881c63b79a4f9..c8f81ccf9d280 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-got-overlapped.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-got-overlapped.test @@ -3,43 +3,43 @@ # address as the .got. got-over.exe.elf-mips has zero-sized .data # section at the same offset .got section. -RUN: llvm-readobj -A %p/Inputs/got-over.exe.elf-mips | FileCheck %s +# RUN: llvm-readobj -A %p/Inputs/got-over.exe.elf-mips | FileCheck %s -GOT-OBJ: Cannot find PLTGOT dynamic table tag. +# GOT-OBJ: Cannot find PLTGOT dynamic table tag. -CHECK: Primary GOT { -CHECK-NEXT: Canonical gp value: 0x418270 -CHECK-NEXT: Reserved entries [ -CHECK-NEXT: Entry { -CHECK-NEXT: Address: 0x410280 -CHECK-NEXT: Access: -32752 -CHECK-NEXT: Initial: 0x0 -CHECK-NEXT: Purpose: Lazy resolver -CHECK-NEXT: } -CHECK-NEXT: Entry { -CHECK-NEXT: Address: 0x410284 -CHECK-NEXT: Access: -32748 -CHECK-NEXT: Initial: 0x80000000 -CHECK-NEXT: Purpose: Module pointer (GNU extension) -CHECK-NEXT: } -CHECK-NEXT: ] -CHECK-NEXT: Local entries [ -CHECK-NEXT: Entry { -CHECK-NEXT: Address: 0x410288 -CHECK-NEXT: Access: -32744 -CHECK-NEXT: Initial: 0x4001B8 -CHECK-NEXT: } -CHECK-NEXT: ] -CHECK-NEXT: Global entries [ -CHECK-NEXT: Entry { -CHECK-NEXT: Address: 0x41028C -CHECK-NEXT: Access: -32740 -CHECK-NEXT: Initial: 0x0 -CHECK-NEXT: Value: 0x0 -CHECK-NEXT: Type: None -CHECK-NEXT: Section: Undefined -CHECK-NEXT: Name: _foo -CHECK-NEXT: } -CHECK-NEXT: ] -CHECK-NEXT: Number of TLS and multi-GOT entries: 0 -CHECK-NEXT: } +# CHECK: Primary GOT { +# CHECK-NEXT: Canonical gp value: 0x418270 +# CHECK-NEXT: Reserved entries [ +# CHECK-NEXT: Entry { +# CHECK-NEXT: Address: 0x410280 +# CHECK-NEXT: Access: -32752 +# CHECK-NEXT: Initial: 0x0 +# CHECK-NEXT: Purpose: Lazy resolver +# CHECK-NEXT: } +# CHECK-NEXT: Entry { +# CHECK-NEXT: Address: 0x410284 +# CHECK-NEXT: Access: -32748 +# CHECK-NEXT: Initial: 0x80000000 +# CHECK-NEXT: Purpose: Module pointer (GNU extension) +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: Local entries [ +# CHECK-NEXT: Entry { +# CHECK-NEXT: Address: 0x410288 +# CHECK-NEXT: Access: -32744 +# CHECK-NEXT: Initial: 0x4001B8 +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: Global entries [ +# CHECK-NEXT: Entry { +# CHECK-NEXT: Address: 0x41028C +# CHECK-NEXT: Access: -32740 +# CHECK-NEXT: Initial: 0x0 +# CHECK-NEXT: Value: 0x0 +# CHECK-NEXT: Type: None +# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: Name: _foo +# CHECK-NEXT: } +# CHECK-NEXT: ] +# CHECK-NEXT: Number of TLS and multi-GOT entries: 0 +# CHECK-NEXT: } diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-options.test b/llvm/test/tools/llvm-readobj/ELF/mips-options.test index d44b3c0096a38..0d0ec7e554015 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-options.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-options.test @@ -1,22 +1,22 @@ -# Check DT_MIPS_OPTIONS .dynamic section tag reading +## Check DT_MIPS_OPTIONS .dynamic section tag reading -RUN: llvm-readobj --dynamic-table %p/Inputs/mips-options.elf-mips64el | \ -RUN: FileCheck %s +# RUN: llvm-readobj --dynamic-table %p/Inputs/mips-options.elf-mips64el | \ +# RUN: FileCheck %s -CHECK: DynamicSection [ (14 entries) -CHECK-NEXT: Tag Type Name/Value -CHECK-NEXT: 0x0000000000000004 HASH 0x158 -CHECK-NEXT: 0x0000000000000005 STRTAB 0x1C0 -CHECK-NEXT: 0x0000000000000006 SYMTAB 0x178 -CHECK-NEXT: 0x000000000000000A STRSZ 7 (bytes) -CHECK-NEXT: 0x000000000000000B SYMENT 24 (bytes) -CHECK-NEXT: 0x0000000070000001 MIPS_RLD_VERSION 1 -CHECK-NEXT: 0x0000000070000005 MIPS_FLAGS NOTPOT -CHECK-NEXT: 0x0000000070000006 MIPS_BASE_ADDRESS 0x0 -CHECK-NEXT: 0x000000007000000A MIPS_LOCAL_GOTNO 2 -CHECK-NEXT: 0x0000000070000011 MIPS_SYMTABNO 3 -CHECK-NEXT: 0x0000000070000013 MIPS_GOTSYM 0x3 -CHECK-NEXT: 0x0000000000000003 PLTGOT 0x2000 -CHECK-NEXT: 0x0000000070000029 MIPS_OPTIONS 0x1000 -CHECK-NEXT: 0x0000000000000000 NULL 0x0 -CHECK-NEXT: ] +# CHECK: DynamicSection [ (14 entries) +# CHECK-NEXT: Tag Type Name/Value +# CHECK-NEXT: 0x0000000000000004 HASH 0x158 +# CHECK-NEXT: 0x0000000000000005 STRTAB 0x1C0 +# CHECK-NEXT: 0x0000000000000006 SYMTAB 0x178 +# CHECK-NEXT: 0x000000000000000A STRSZ 7 (bytes) +# CHECK-NEXT: 0x000000000000000B SYMENT 24 (bytes) +# CHECK-NEXT: 0x0000000070000001 MIPS_RLD_VERSION 1 +# CHECK-NEXT: 0x0000000070000005 MIPS_FLAGS NOTPOT +# CHECK-NEXT: 0x0000000070000006 MIPS_BASE_ADDRESS 0x0 +# CHECK-NEXT: 0x000000007000000A MIPS_LOCAL_GOTNO 2 +# CHECK-NEXT: 0x0000000070000011 MIPS_SYMTABNO 3 +# CHECK-NEXT: 0x0000000070000013 MIPS_GOTSYM 0x3 +# CHECK-NEXT: 0x0000000000000003 PLTGOT 0x2000 +# CHECK-NEXT: 0x0000000070000029 MIPS_OPTIONS 0x1000 +# CHECK-NEXT: 0x0000000000000000 NULL 0x0 +# CHECK-NEXT: ] diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-rld-map-rel.test b/llvm/test/tools/llvm-readobj/ELF/mips-rld-map-rel.test index 05e541120a651..00198bbfe7571 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-rld-map-rel.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-rld-map-rel.test @@ -1,24 +1,24 @@ -# Check DT_MIPS_RLD_MAP_REL .dynamic section tag reading +## Check DT_MIPS_RLD_MAP_REL .dynamic section tag reading -RUN: llvm-readobj --dynamic-table %p/Inputs/mips-rld-map-rel.elf-mipsel | \ -RUN: FileCheck %s +# RUN: llvm-readobj --dynamic-table %p/Inputs/mips-rld-map-rel.elf-mipsel | \ +# RUN: FileCheck %s -CHECK: DynamicSection [ (16 entries) -CHECK-NEXT: Tag Type Name/Value -CHECK-NEXT: 0x00000004 HASH 0x220 -CHECK-NEXT: 0x00000005 STRTAB 0x2FC -CHECK-NEXT: 0x00000006 SYMTAB 0x25C -CHECK-NEXT: 0x0000000A STRSZ 72 (bytes) -CHECK-NEXT: 0x0000000B SYMENT 16 (bytes) -CHECK-NEXT: 0x70000035 MIPS_RLD_MAP_REL 0x101E0 -CHECK-NEXT: 0x00000015 DEBUG 0x0 -CHECK-NEXT: 0x00000003 PLTGOT 0x10390 -CHECK-NEXT: 0x70000001 MIPS_RLD_VERSION 1 -CHECK-NEXT: 0x70000005 MIPS_FLAGS NOTPOT -CHECK-NEXT: 0x70000006 MIPS_BASE_ADDRESS 0x0 -CHECK-NEXT: 0x7000000A MIPS_LOCAL_GOTNO 2 -CHECK-NEXT: 0x70000011 MIPS_SYMTABNO 10 -CHECK-NEXT: 0x70000012 MIPS_UNREFEXTNO 15 -CHECK-NEXT: 0x70000013 MIPS_GOTSYM 0xA -CHECK-NEXT: 0x00000000 NULL 0x0 -CHECK-NEXT: ] +# CHECK: DynamicSection [ (16 entries) +# CHECK-NEXT: Tag Type Name/Value +# CHECK-NEXT: 0x00000004 HASH 0x220 +# CHECK-NEXT: 0x00000005 STRTAB 0x2FC +# CHECK-NEXT: 0x00000006 SYMTAB 0x25C +# CHECK-NEXT: 0x0000000A STRSZ 72 (bytes) +# CHECK-NEXT: 0x0000000B SYMENT 16 (bytes) +# CHECK-NEXT: 0x70000035 MIPS_RLD_MAP_REL 0x101E0 +# CHECK-NEXT: 0x00000015 DEBUG 0x0 +# CHECK-NEXT: 0x00000003 PLTGOT 0x10390 +# CHECK-NEXT: 0x70000001 MIPS_RLD_VERSION 1 +# CHECK-NEXT: 0x70000005 MIPS_FLAGS NOTPOT +# CHECK-NEXT: 0x70000006 MIPS_BASE_ADDRESS 0x0 +# CHECK-NEXT: 0x7000000A MIPS_LOCAL_GOTNO 2 +# CHECK-NEXT: 0x70000011 MIPS_SYMTABNO 10 +# CHECK-NEXT: 0x70000012 MIPS_UNREFEXTNO 15 +# CHECK-NEXT: 0x70000013 MIPS_GOTSYM 0xA +# CHECK-NEXT: 0x00000000 NULL 0x0 +# CHECK-NEXT: ] diff --git a/llvm/test/tools/llvm-readobj/ELF/needed-libs.test b/llvm/test/tools/llvm-readobj/ELF/needed-libs.test index 1c5f4978f093b..99bdca29a5160 100644 --- a/llvm/test/tools/llvm-readobj/ELF/needed-libs.test +++ b/llvm/test/tools/llvm-readobj/ELF/needed-libs.test @@ -46,7 +46,7 @@ Sections: - Tag: DT_NEEDED Value: 0x1111111 - Tag: DT_STRSZ - Value: 0xD + Value: [[SIZE=0xD]] - Tag: DT_NULL Value: 0x0 ProgramHeaders: diff --git a/llvm/test/tools/llvm-readobj/ELF/no-action.test b/llvm/test/tools/llvm-readobj/ELF/no-action.test index b18c900438542..acfcb285f58bd 100644 --- a/llvm/test/tools/llvm-readobj/ELF/no-action.test +++ b/llvm/test/tools/llvm-readobj/ELF/no-action.test @@ -2,7 +2,7 @@ ## Check the behavior on ET_EXEC input. -# RUN: yaml2obj --docnum=1 %s -o %t.exe +# RUN: yaml2obj -DTYPE=ET_EXEC %s -o %t.exe # RUN: llvm-readobj %t.exe | FileCheck %s -DFILE=%t.exe --check-prefix LLVM # RUN: llvm-readelf %t.exe | FileCheck %s -DFILE=%t.exe --check-prefix GNU --allow-empty @@ -20,31 +20,17 @@ FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB - Type: ET_EXEC + Type: [[TYPE]] Machine: EM_X86_64 ## Check the behavior on ET_REL input. -# RUN: yaml2obj --docnum=2 %s -o %t.o +# RUN: yaml2obj -DTYPE=ET_REL %s -o %t.o # RUN: llvm-readobj %t.o | FileCheck %s -DFILE=%t.o --check-prefix LLVM # RUN: llvm-readelf %t.o | FileCheck %s -DFILE=%t.o --check-prefix GNU --allow-empty ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - ## Check the behavior on ET_DYN input. -# RUN: yaml2obj --docnum=3 %s -o %t.so +# RUN: yaml2obj -DTYPE=ET_DYN %s -o %t.so # RUN: llvm-readobj %t.so | FileCheck %s -DFILE=%t.so --check-prefix LLVM # RUN: llvm-readelf %t.so | FileCheck %s -DFILE=%t.so --check-prefix GNU --allow-empty - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs.test b/llvm/test/tools/llvm-readobj/ELF/packed-relocs.test index 330426c710ecf..e3664d44ccaad 100644 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs.test +++ b/llvm/test/tools/llvm-readobj/ELF/packed-relocs.test @@ -27,25 +27,21 @@ # elf-packed-relocs1.s --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 - Entry: 0x0000000000001000 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 Sections: - - Name: .rela.dyn - Type: SHT_ANDROID_RELA - Flags: [ SHF_ALLOC ] - Address: 0x00000000000001C8 - Link: .symtab - AddressAlign: 0x0000000000000001 - Content: 41505332088020020108800280010202088180808010818080802002080181808080100802818080802004020C7E048180808010088180808020 + - Name: .rela.dyn + Type: SHT_ANDROID_RELA + Flags: [ SHF_ALLOC ] + Link: .symtab + Content: 41505332088020020108800280010202088180808010818080802002080181808080100802818080802004020C7E048180808010088180808020 Symbols: - - Name: sym1 - Binding: STB_GLOBAL - - Name: sym2 - Binding: STB_GLOBAL -... + - Name: sym1 + Binding: STB_GLOBAL + - Name: sym2 + Binding: STB_GLOBAL # RUN: yaml2obj --docnum=2 %s | llvm-readobj --relocations - | FileCheck --check-prefix=LLVM2 %s # LLVM2: Section (1) .rel.dyn { @@ -77,25 +73,21 @@ Symbols: # elf-packed-relocs2.s --- !ELF FileHeader: - Class: ELFCLASS32 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_386 - Entry: 0x0000000000001000 + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_386 Sections: - - Name: .rel.dyn - Type: SHT_ANDROID_REL - Flags: [ SHF_ALLOC ] - Address: 0x00000000000001C8 - Link: .symtab - AddressAlign: 0x0000000000000001 - Content: 415053320A80200202088102830408037C08 + - Name: .rel.dyn + Type: SHT_ANDROID_REL + Flags: [ SHF_ALLOC ] + Link: .symtab + Content: 415053320A80200202088102830408037C08 Symbols: - - Name: sym1 - Binding: STB_GLOBAL - - Name: sym2 - Binding: STB_GLOBAL -... + - Name: sym1 + Binding: STB_GLOBAL + - Name: sym2 + Binding: STB_GLOBAL # RUN: yaml2obj --docnum=3 %s | llvm-readobj --relocations - | FileCheck --check-prefix=LLVM3 %s # @@ -120,22 +112,18 @@ Symbols: # elf-packed-relocs3.s --- !ELF FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 - Entry: 0x0000000000001000 + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 Sections: - - Name: .rela.dyn - Type: SHT_ANDROID_RELA - Flags: [ SHF_ALLOC ] - Address: 0x00000000000001C8 - Link: .symtab - AddressAlign: 0x0000000000000001 - Content: 415053320680200208800208008001080802008001818080801008818080802002080881808080100008818080802008 + - Name: .rela.dyn + Type: SHT_ANDROID_RELA + Flags: [ SHF_ALLOC ] + Link: .symtab + Content: 415053320680200208800208008001080802008001818080801008818080802002080881808080100008818080802008 Symbols: - - Name: sym1 - Binding: STB_GLOBAL - - Name: sym2 - Binding: STB_GLOBAL -... + - Name: sym1 + Binding: STB_GLOBAL + - Name: sym2 + Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-readobj/ELF/pt-gnu-property.test b/llvm/test/tools/llvm-readobj/ELF/pt-gnu-property.test index f47341115b5ca..31e2304334124 100644 --- a/llvm/test/tools/llvm-readobj/ELF/pt-gnu-property.test +++ b/llvm/test/tools/llvm-readobj/ELF/pt-gnu-property.test @@ -2,6 +2,8 @@ # RUN: llvm-readelf --program-headers %t | FileCheck %s --check-prefix=GNU # RUN: llvm-readobj --program-headers %t | FileCheck %s --check-prefix=LLVM +## TODO: merge this test with program-headers.test. + # GNU: {{ }}GNU_PROPERTY{{ }} # LLVM: ProgramHeader { diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-symbol-with-versioning.test b/llvm/test/tools/llvm-readobj/ELF/reloc-symbol-with-versioning.test index f6314cce99f3d..933d53f31221c 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-symbol-with-versioning.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-symbol-with-versioning.test @@ -21,88 +21,69 @@ # LLVM-NEXT: ] --- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 - Entry: 0x0000000000000000 -Sections: - - Name: .gnu.version - Type: SHT_GNU_versym - Flags: [ SHF_ALLOC ] - Link: .dynsym - AddressAlign: 0x0000000000000002 - EntSize: 0x0000000000000002 - Entries: [ 0, 2, 3, 4, 2 ] - - Name: .gnu.version_r - Type: SHT_GNU_verneed - Flags: [ SHF_ALLOC ] - Link: .dynstr - AddressAlign: 0x0000000000000004 - Info: 0x0000000000000002 - Dependencies: - - Version: 1 - File: verneed1.so.0 - Entries: - - Name: v2 - Hash: 1938 - Flags: 0 - Other: 3 - - Name: v3 - Hash: 1939 - Flags: 0 - Other: 2 - - Version: 1 - File: verneed2.so.0 - Entries: - - Name: v1 - Hash: 1937 - Flags: 0 - Other: 4 - - Name: .rela.plt - Type: SHT_RELA - Flags: [ SHF_ALLOC ] - Info: 0 - Link: .dynsym - AddressAlign: 0x0000000000000008 - EntSize: 0x0000000000000018 - Relocations: - - Offset: 0x0000000000013018 - Symbol: f1 - Type: R_X86_64_JUMP_SLOT - - Offset: 0x0000000000013020 - Symbol: f2 - Type: R_X86_64_JUMP_SLOT - - Offset: 0x0000000000013028 - Symbol: g1 - Type: R_X86_64_JUMP_SLOT - - Offset: 0x0000000000013040 - Symbol: _Z2f1v - Type: R_X86_64_JUMP_SLOT - - Offset: 0x0000000000013058 - Symbol: f3 - Type: R_X86_64_JUMP_SLOT -Symbols: - - Name: f1 - Binding: STB_GLOBAL - - Name: f2 - Binding: STB_GLOBAL - - Name: g1 - Binding: STB_GLOBAL - - Name: _Z2f1v - Binding: STB_GLOBAL - - Name: f3 - Binding: STB_GLOBAL +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version + Type: SHT_GNU_versym + Flags: [ SHF_ALLOC ] + Entries: [ 0, 2, 3, 4, 2 ] + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Link: .dynstr + AddressAlign: 0x0000000000000004 + Info: 0x0000000000000002 + Dependencies: + - Version: 1 + File: verneed1.so.0 + Entries: + - Name: v2 + Hash: 1938 + Flags: 0 + Other: 3 + - Name: v3 + Hash: 1939 + Flags: 0 + Other: 2 + - Version: 1 + File: verneed2.so.0 + Entries: + - Name: v1 + Hash: 1937 + Flags: 0 + Other: 4 + - Name: .rela.plt + Type: SHT_RELA + Flags: [ SHF_ALLOC ] + Link: .dynsym + Relocations: + - Offset: 0x0000000000013018 + Symbol: f1 + Type: R_X86_64_JUMP_SLOT + - Offset: 0x0000000000013020 + Symbol: f2 + Type: R_X86_64_JUMP_SLOT + - Offset: 0x0000000000013028 + Symbol: g1 + Type: R_X86_64_JUMP_SLOT + - Offset: 0x0000000000013040 + Symbol: _Z2f1v + Type: R_X86_64_JUMP_SLOT + - Offset: 0x0000000000013058 + Symbol: f3 + Type: R_X86_64_JUMP_SLOT DynamicSymbols: - - Name: f1 - Binding: STB_GLOBAL - - Name: f2 - Binding: STB_GLOBAL - - Name: g1 - Binding: STB_GLOBAL - - Name: _Z2f1v - Binding: STB_GLOBAL - - Name: f3 - Binding: STB_GLOBAL -... + - Name: f1 + Binding: STB_GLOBAL + - Name: f2 + Binding: STB_GLOBAL + - Name: g1 + Binding: STB_GLOBAL + - Name: _Z2f1v + Binding: STB_GLOBAL + - Name: f3 + Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-aarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-aarch64.test index 2882d32fd92a6..7291282811048 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-aarch64.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-aarch64.test @@ -136,14 +136,8 @@ FileHeader: Type: ET_REL Machine: EM_AARCH64 Sections: - - Name: .text - Type: SHT_PROGBITS - Content: 00 - - Name: .rela.text - Type: SHT_RELA - AddressAlign: 0x0000000000000008 - EntSize: 0x0000000000000018 - Info: .text + - Name: .rela.text + Type: SHT_RELA Relocations: - Type: R_AARCH64_NONE - Type: R_AARCH64_ABS64 diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-arm.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-arm.test index f72d7cdd89138..96d6cfed4df3e 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-arm.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-arm.test @@ -142,16 +142,9 @@ FileHeader: Data: ELFDATA2LSB Type: ET_REL Machine: EM_ARM - Flags: [ EF_ARM_EABI_VER5 ] Sections: - - Name: .text - Type: SHT_PROGBITS - Content: 00 - - Name: .rel.text - Type: SHT_REL - AddressAlign: 0x0000000000000004 - EntSize: 0x0000000000000008 - Info: .text + - Name: .rel.text + Type: SHT_REL Relocations: - Type: R_ARM_NONE - Type: R_ARM_PC24 diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-i386.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-i386.test index 8606f3fa8cba5..fb16185ecf48e 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-i386.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-i386.test @@ -47,7 +47,7 @@ # LLVM: Type: R_386_IRELATIVE (42) # LLVM: Type: R_386_GOT32X (43) -# GNU: Relocation section '.rel.text' at offset 0x38 contains 41 entries: +# GNU: Relocation section '.rel.text' at offset 0x34 contains 41 entries: # GNU-NEXT: Offset Info Type Sym. Value Symbol's Name # GNU-NEXT: 00000002 00000100 R_386_NONE 00000000 foo # GNU-NEXT: 00000008 00000101 R_386_32 00000000 foo @@ -93,21 +93,14 @@ --- !ELF FileHeader: - Class: ELFCLASS32 - Data: ELFDATA2LSB - OSABI: ELFOSABI_GNU - Type: ET_REL - Machine: EM_386 + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_386 Sections: - - Name: .text - Type: SHT_PROGBITS - Content: 00 - - Name: .rel.text - Type: SHT_REL - Link: .symtab - AddressAlign: 0x0000000000000004 - EntSize: 0x0000000000000008 - Info: .text + - Name: .rel.text + Type: SHT_REL + Link: .symtab Relocations: - Offset: 0x0000000000000002 Type: R_386_NONE diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-lanai.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-lanai.test index 6bbe1a839b509..123ab1f5bd0e6 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-lanai.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-lanai.test @@ -19,16 +19,8 @@ FileHeader: Type: ET_REL Machine: EM_LANAI Sections: - - Name: .text - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - AddressAlign: 0x0000000000000004 - Content: 00 - - Name: .rela.text - Type: SHT_RELA - AddressAlign: 0x0000000000000004 - EntSize: 0x000000000000000C - Info: .text + - Name: .rela.text + Type: SHT_RELA Relocations: - Type: R_LANAI_NONE - Type: R_LANAI_21 diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips.test index 59793c512b35a..3566d42a5d735 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips.test @@ -61,18 +61,11 @@ FileHeader: Class: ELFCLASS32 Data: ELFDATA2MSB - OSABI: ELFOSABI_GNU Type: ET_REL Machine: EM_MIPS Sections: - - Name: .text - Type: SHT_PROGBITS - Content: 00 - - Name: .rel.text - Type: SHT_REL - AddressAlign: 0x0000000000000004 - EntSize: 0x0000000000000008 - Info: .text + - Name: .rel.text + Type: SHT_REL Relocations: - Type: R_MIPS_NONE - Type: R_MIPS_16 diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips64.test index fc72edd29d2c0..d96c922bd8339 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips64.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-mips64.test @@ -61,18 +61,11 @@ FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB - OSABI: ELFOSABI_GNU Type: ET_REL Machine: EM_MIPS Sections: - - Name: .text - Type: SHT_PROGBITS - Content: 00 - - Name: .rela.text - Type: SHT_RELA - AddressAlign: 0x0000000000000008 - EntSize: 0x0000000000000018 - Info: .text + - Name: .rela.text + Type: SHT_RELA Relocations: - Type: R_MIPS_NONE - Type: R_MIPS_16 diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-x64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-x64.test index ad4e06fc955a3..0a48325d9c9d2 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-x64.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-elf-x64.test @@ -46,7 +46,7 @@ # LLVM: Type: R_X86_64_GOTPCRELX (41) # LLVM: Type: R_X86_64_REX_GOTPCRELX (42) -# GNU: Relocation section '.rela.text' at offset 0x48 contains 40 entries: +# GNU: Relocation section '.rela.text' at offset 0x40 contains 40 entries: # GNU-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend # GNU-NEXT: 0000000000000003 0000000100000000 R_X86_64_NONE 0000000000000000 foo + 0 # GNU-NEXT: 000000000000000a 0000000100000001 R_X86_64_64 0000000000000000 foo + 0 @@ -93,19 +93,11 @@ FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB - OSABI: ELFOSABI_GNU Type: ET_REL Machine: EM_X86_64 Sections: - - Name: .text - Type: SHT_PROGBITS - Content: 00 - - Name: .rela.text - Type: SHT_RELA - Link: .symtab - AddressAlign: 0x0000000000000008 - EntSize: 0x0000000000000018 - Info: .text + - Name: .rela.text + Type: SHT_RELA Relocations: - Offset: 0x0000000000000003 Type: R_X86_64_NONE diff --git a/llvm/test/tools/llvm-readobj/ELF/sections.test b/llvm/test/tools/llvm-readobj/ELF/sections.test index d2865c041798a..1b087f710f18f 100644 --- a/llvm/test/tools/llvm-readobj/ELF/sections.test +++ b/llvm/test/tools/llvm-readobj/ELF/sections.test @@ -1,9 +1,9 @@ ## Check how llvm-readobj prints sections with --sections. ## We test the --sections flag for llvm-readelf in the gnu-sections.test. -# RUN: yaml2obj --docnum=1 %s -o %t64 +# RUN: yaml2obj %s -o %t64 # RUN: llvm-readobj --sections %t64 | FileCheck %s --check-prefixes=ELF,ELF64 -# RUN: yaml2obj --docnum=2 %s -o %t32 +# RUN: yaml2obj -DBITS=32 %s -o %t32 # RUN: llvm-readobj --sections %t32 | FileCheck %s --check-prefixes=ELF,ELF32 ## Check flag aliases produce identical output. @@ -89,27 +89,10 @@ --- !ELF FileHeader: - Class: ELFCLASS64 + Class: ELFCLASS[[BITS=64]] Data: ELFDATA2LSB Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .foo - Type: SHT_PROGBITS - Info: 1 - Address: 0x2 - Size: 0x3 - Flags: [ SHF_WRITE, SHF_ALLOC ] - Link: 4 - AddressAlign: 5 - EntSize: 6 - ---- !ELF -FileHeader: - Class: ELFCLASS32 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_386 + Machine: EM_NONE Sections: - Name: .foo Type: SHT_PROGBITS diff --git a/llvm/test/tools/llvm-readobj/ELF/types.test b/llvm/test/tools/llvm-readobj/ELF/types.test index aecbdeacde983..a9413959e3146 100644 --- a/llvm/test/tools/llvm-readobj/ELF/types.test +++ b/llvm/test/tools/llvm-readobj/ELF/types.test @@ -1,5 +1,5 @@ # Show that llvm-readobj can handle all standard ELF types. -# RUN: yaml2obj %s --docnum=1 -o %t.none +# RUN: yaml2obj %s -DTYPE=ET_NONE -o %t.none # RUN: llvm-readobj --file-headers %t.none | FileCheck %s --check-prefix=LLVM-NONE # RUN: llvm-readelf --file-headers %t.none | FileCheck %s --check-prefix=GNU-NONE @@ -10,107 +10,57 @@ FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB - Type: ET_NONE + Type: [[TYPE]] Machine: EM_X86_64 -# RUN: yaml2obj %s --docnum=2 -o %t.rel +# RUN: yaml2obj %s -DTYPE=ET_REL -o %t.rel # RUN: llvm-readobj --file-headers %t.rel | FileCheck %s --check-prefix=LLVM-REL # RUN: llvm-readelf --file-headers %t.rel | FileCheck %s --check-prefix=GNU-REL # LLVM-REL: Type: Relocatable (0x1) # GNU-REL: Type: REL (Relocatable file) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - - -# RUN: yaml2obj %s --docnum=3 -o %t.exec +# RUN: yaml2obj %s -DTYPE=ET_EXEC -o %t.exec # RUN: llvm-readobj --file-headers %t.exec | FileCheck %s --check-prefix=LLVM-EXEC # RUN: llvm-readelf --file-headers %t.exec | FileCheck %s --check-prefix=GNU-EXEC # LLVM-EXEC: Type: Executable (0x2) # GNU-EXEC: Type: EXEC (Executable file) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 - -# RUN: yaml2obj %s --docnum=4 -o %t.so +# RUN: yaml2obj %s -DTYPE=ET_DYN -o %t.so # RUN: llvm-readobj --file-headers %t.so | FileCheck %s --check-prefix=LLVM-SO # RUN: llvm-readelf --file-headers %t.so | FileCheck %s --check-prefix=GNU-SO # LLVM-SO: Type: SharedObject (0x3) # GNU-SO: Type: DYN (Shared object file) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 - -# RUN: yaml2obj %s --docnum=5 -o %t.core +# RUN: yaml2obj %s -DTYPE=ET_CORE -o %t.core # RUN: llvm-readobj --file-headers %t.core | FileCheck %s --check-prefix=LLVM-CORE # RUN: llvm-readelf --file-headers %t.core | FileCheck %s --check-prefix=GNU-CORE # LLVM-CORE: Type: Core (0x4) # GNU-CORE: Type: CORE (Core file) ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_CORE - Machine: EM_X86_64 - # Show that llvm-readobj can handle an unknown ELF type. -# RUN: yaml2obj %s --docnum=6 -o %t.unknown +# RUN: yaml2obj %s -DTYPE=0x42 -o %t.unknown # RUN: llvm-readobj --file-headers %t.unknown | FileCheck %s --check-prefix=LLVM-UNKNOWN # RUN: llvm-readelf --file-headers %t.unknown | FileCheck %s --check-prefix=GNU-UNKNOWN # LLVM-UNKNOWN: Type: 0x42 # GNU-UNKNOWN: Type: 42 ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: 0x42 - Machine: EM_X86_64 - # Show that llvm-readobj can handle an unknown OS-specific ELF type. -# RUN: yaml2obj %s --docnum=7 -o %t.os +# RUN: yaml2obj %s -DTYPE=0xfe00 -o %t.os # RUN: llvm-readobj --file-headers %t.os | FileCheck %s --check-prefix=LLVM-OS # RUN: llvm-readelf --file-headers %t.os | FileCheck %s --check-prefix=GNU-OS # LLVM-OS: Type: 0xFE00 # GNU-OS: Type: fe00 ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: 0xfe00 - Machine: EM_X86_64 - # Show that llvm-readobj can handle an unknown machine-specific ELF type. -# RUN: yaml2obj %s --docnum=8 -o %t.proc +# RUN: yaml2obj %s -DTYPE=0xff00 -o %t.proc # RUN: llvm-readobj --file-headers %t.proc | FileCheck %s --check-prefix=LLVM-PROC # RUN: llvm-readelf --file-headers %t.proc | FileCheck %s --check-prefix=GNU-PROC # LLVM-PROC: Type: 0xFF00 # GNU-PROC: Type: ff00 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: 0xff00 - Machine: EM_X86_64 diff --git a/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test b/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test index 991ed8b9cb5fd..f568bfe5ed00b 100644 --- a/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test +++ b/llvm/test/tools/llvm-readobj/ELF/verdef-invalid.test @@ -2,7 +2,7 @@ ## Check that we report a warning when sh_link references a non-existent section. -# RUN: yaml2obj %s --docnum=1 -o %t1 +# RUN: yaml2obj %s --docnum=1 -DLINK=0xFF -o %t1 # RUN: llvm-readobj -V %t1 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-LLVM --implicit-check-not="warning:" -DFILE=%t1 # RUN: llvm-readelf -V %t1 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-GNU --implicit-check-not="warning:" -DFILE=%t1 @@ -21,34 +21,21 @@ FileHeader: Sections: - Name: .gnu.version_d Type: SHT_GNU_verdef - Link: 0xFF + Link: [[LINK]] Info: 0x0 Entries: [] ## Check that we report a warning when the sh_link field of a SHT_GNU_verdef section references a non-string table section. -# RUN: yaml2obj %s --docnum=2 -o %t2 +# RUN: yaml2obj %s --docnum=1 -DLINK=0x0 -o %t2 # RUN: llvm-readobj -V %t2 2>&1 | FileCheck %s --check-prefix=INVALID-STRING-TABLE -DFILE=%t2 # RUN: llvm-readelf -V %t2 2>&1 | FileCheck %s --check-prefix=INVALID-STRING-TABLE -DFILE=%t2 # INVALID-STRING-TABLE: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verdef section with index 1: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .gnu.version_d - Type: SHT_GNU_verdef - Link: 0x0 - Info: 0x0 - Entries: [] - ## Check that we report a warning when we can't read the content of the SHT_GNU_verdef section. -# RUN: yaml2obj %s --docnum=3 -o %t3 +# RUN: yaml2obj %s --docnum=2 -o %t3 # RUN: llvm-readobj -V %t3 2>&1 | FileCheck %s --check-prefix=INVALID-DATA -DFILE=%t3 # RUN: llvm-readelf -V %t3 2>&1 | FileCheck %s --check-prefix=INVALID-DATA -DFILE=%t3 @@ -73,7 +60,7 @@ DynamicSymbols: ## Check that we report a warning when a SHT_GNU_verdef section contains a version definition ## that goes past the end of the section. -# RUN: yaml2obj %s --docnum=4 -o %t4 +# RUN: yaml2obj %s --docnum=3 -o %t4 # RUN: llvm-readobj -V %t4 2>&1 | FileCheck %s --check-prefix=DEF-PAST-END -DFILE=%t4 # RUN: llvm-readelf -V %t4 2>&1 | FileCheck %s --check-prefix=DEF-PAST-END -DFILE=%t4 @@ -104,7 +91,7 @@ DynamicSymbols: ## Check that we report a warning when a SHT_GNU_verdef section contains a version definition ## that refers to an auxiliary entry that goes past the end of the section. -# RUN: yaml2obj %s --docnum=5 -o %t5 +# RUN: yaml2obj %s --docnum=4 -o %t5 # RUN: llvm-readobj -V %t5 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t5 # RUN: llvm-readelf -V %t5 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t5 @@ -135,7 +122,7 @@ DynamicSymbols: ## Check that we can dump a SHT_GNU_verdef section properly even if it contains version names strings ## that overrun the linked string table. -# RUN: yaml2obj %s --docnum=6 -o %t6 +# RUN: yaml2obj %s --docnum=5 -o %t6 # RUN: llvm-readobj -V %t6 2>&1 | FileCheck %s --check-prefix=PAST-STRTAB-END-LLVM --implicit-check-not="warning:" -DFILE=%t6 # RUN: llvm-readelf -V %t6 2>&1 | FileCheck %s --check-prefix=PAST-STRTAB-END-GNU --implicit-check-not="warning:" -DFILE=%t6 @@ -180,7 +167,7 @@ DynamicSymbols: ## Check we report a warning when a version definition is not correctly aligned in memory. -# RUN: yaml2obj %s --docnum=7 -o %t7 +# RUN: yaml2obj %s --docnum=6 -o %t7 # RUN: llvm-readobj -V %t7 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t7 # RUN: llvm-readelf -V %t7 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t7 @@ -211,7 +198,7 @@ DynamicSymbols: ## Check we report a warning when an auxiliary entry is not correctly aligned in memory. -# RUN: yaml2obj %s --docnum=8 -o %t8 +# RUN: yaml2obj %s --docnum=7 -o %t8 # RUN: llvm-readobj -V %t8 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t8 # RUN: llvm-readelf -V %t8 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t8 @@ -237,7 +224,7 @@ DynamicSymbols: ## Check how we handle a version definition entry with an unsupported version. -# RUN: yaml2obj %s --docnum=9 -o %t9 +# RUN: yaml2obj %s --docnum=8 -o %t9 # RUN: llvm-readobj -V %t9 2>&1 | FileCheck %s --check-prefix=UNSUPPORTED-VERSION -DFILE=%t9 # RUN: llvm-readelf -V %t9 2>&1 | FileCheck %s --check-prefix=UNSUPPORTED-VERSION -DFILE=%t9 @@ -266,7 +253,7 @@ DynamicSymbols: ## Check we report a warning when trying to print version symbols, but SHT_GNU_verdef ## is invalid due to any reason. -# RUN: yaml2obj %s --docnum=10 -o %t10 +# RUN: yaml2obj %s --docnum=9 -o %t10 # RUN: llvm-readobj -V %t10 2>&1 | FileCheck %s --check-prefix=INVALID-VERDEF-LLVM -DFILE=%t10 # RUN: llvm-readelf -V %t10 2>&1 | FileCheck %s --check-prefix=INVALID-VERDEF-GNU -DFILE=%t10 diff --git a/llvm/test/tools/llvm-readobj/ELF/versym-invalid.test b/llvm/test/tools/llvm-readobj/ELF/versym-invalid.test index d495b1cfd0630..ce32644462db7 100644 --- a/llvm/test/tools/llvm-readobj/ELF/versym-invalid.test +++ b/llvm/test/tools/llvm-readobj/ELF/versym-invalid.test @@ -2,11 +2,11 @@ ## Check that we report a warning when sh_link references a non-existent section. -# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: yaml2obj --docnum=1 -DLINK=0xFF %s -o %t1 # RUN: llvm-readelf -V %t1 2>&1 | FileCheck -DFILE=%t1 %s --check-prefix=GNU-INVALID-LINK # RUN: llvm-readobj -V %t1 2>&1 | FileCheck -DFILE=%t1 %s --check-prefix=LLVM-INVALID-LINK -# GNU-INVALID-LINK: Version symbols section '.gnu.version' contains 0 entries: +# GNU-INVALID-LINK: Version symbols section '.gnu.version' contains 1 entries: # GNU-INVALID-LINK-NEXT: warning: '[[FILE]]': invalid section linked to SHT_GNU_versym section with index 1: invalid section index: 255 # GNU-INVALID-LINK-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 255 () @@ -23,13 +23,13 @@ FileHeader: Sections: - Name: .gnu.version Type: SHT_GNU_versym - Link: 0xFF - Entries: [ ] + Link: [[LINK]] + Entries: [ 0 ] ## Check that we report a warning when the sh_link field of a SHT_GNU_versym section does not reference ## a dynamic symbol table section. -# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: yaml2obj --docnum=1 -DLINK=0x0 %s -o %t2 # RUN: llvm-readelf -V %t2 2>&1 | FileCheck -DFILE=%t2 %s --check-prefix=INVALID-SYMBOL-TABLE-GNU # RUN: llvm-readobj -V %t2 2>&1 | FileCheck -DFILE=%t2 %s --check-prefix=INVALID-SYMBOL-TABLE-LLVM @@ -42,23 +42,11 @@ Sections: # INVALID-SYMBOL-TABLE-LLVM-NEXT: warning: '[[FILE]]': invalid section linked to SHT_GNU_versym section with index 1: expected SHT_DYNSYM, but got SHT_NULL # INVALID-SYMBOL-TABLE-LLVM-NEXT: ] ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_X86_64 -Sections: - - Name: .gnu.version - Type: SHT_GNU_versym - Link: 0x0 - Entries: [ 0 ] - ## Check we report a warning when something is wrong with a string table linked to a symbol table that ## is linked with SHT_GNU_versym. In this case we are unable to produce LLVM style output, ## but GNU style is fine because it does not need that string table. -# RUN: yaml2obj --docnum=3 %s -o %t3 +# RUN: yaml2obj --docnum=2 %s -o %t3 # RUN: llvm-readelf -V %t3 2>&1 | FileCheck -DFILE=%t3 %s --check-prefix=INVALID-STRING-TABLE-GNU # RUN: llvm-readobj -V %t3 2>&1 | FileCheck -DFILE=%t3 %s --check-prefix=INVALID-STRING-TABLE-LLVM @@ -88,7 +76,7 @@ DynamicSymbols: [] ## Check we report a warning when a SHT_GNU_versym section is not correctly aligned in memory. -# RUN: yaml2obj --docnum=4 %s -o %t4 +# RUN: yaml2obj --docnum=3 %s -o %t4 # RUN: llvm-readelf -V %t4 2>&1 | FileCheck -DFILE=%t4 %s --check-prefix=MISALIGNED-GNU # RUN: llvm-readobj -V %t4 2>&1 | FileCheck -DFILE=%t4 %s --check-prefix=MISALIGNED-LLVM @@ -114,7 +102,7 @@ Sections: ## Check we report a warning when a SHT_GNU_versym section has an invalid entry size. -# RUN: yaml2obj --docnum=5 %s -o %t5 +# RUN: yaml2obj --docnum=4 %s -o %t5 # RUN: llvm-readelf -V --dyn-syms %t5 2>&1 | FileCheck -DFILE=%t5 %s --check-prefix=INVALID-ENT-SIZE-GNU # RUN: llvm-readobj -V --dyn-syms %t5 2>&1 | FileCheck -DFILE=%t5 %s --check-prefix=INVALID-ENT-SIZE-LLVM @@ -168,7 +156,7 @@ DynamicSymbols: ## Check we report a warning when the number of version entries does not match the number of symbols in the associated symbol table. -# RUN: yaml2obj --docnum=6 %s -o %t6 +# RUN: yaml2obj --docnum=5 %s -o %t6 # RUN: llvm-readelf -V %t6 2>&1 | FileCheck -DFILE=%t6 %s --check-prefix=SYMBOLS-NUM-MISMATCH-GNU # RUN: llvm-readobj -V %t6 2>&1 | FileCheck -DFILE=%t6 %s --check-prefix=SYMBOLS-NUM-MISMATCH-LLVM @@ -199,7 +187,7 @@ DynamicSymbols: ## Check we can dump a SHT_GNU_versym section when it is linked to a custom dynamic symbol ## table that is not called ".dynsym". -# RUN: yaml2obj --docnum=7 %s -o %t7 +# RUN: yaml2obj --docnum=6 %s -o %t7 # RUN: llvm-readelf -V %t7 2>&1 | FileCheck -DFILE=%t7 %s --check-prefix=CUSTOM-SYMTAB-GNU # RUN: llvm-readobj -V %t7 2>&1 | FileCheck -DFILE=%t7 %s --check-prefix=CUSTOM-SYMTAB-LLVM @@ -238,7 +226,7 @@ DynamicSymbols: ## Version index in a SHT_GNU_versym section overflows the version map. ## Check we report it when trying to dump dynamic symbols. -# RUN: yaml2obj %s --docnum=8 -o %t8 +# RUN: yaml2obj %s --docnum=7 -o %t8 # RUN: llvm-readobj --dyn-syms %t8 2>&1 \ # RUN: | FileCheck -DFILE=%t8 --implicit-check-not=warning --check-prefix=VERSION-OVERFLOW-LLVM %s # RUN: llvm-readelf --dyn-syms %t8 2>&1 \ From 3de93230010fc8fbb76d5cb6bde709f4ed296a1c Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 23 Jul 2020 17:10:31 +0300 Subject: [PATCH 175/600] [llvm-readobj] - Don't stop dumping when the name of a relocation section can't be read. This removes undesired `unwrapOrError` calls from printRelocations() methods. Differential revision: https://reviews.llvm.org/D84408 --- llvm/test/Object/invalid.test | 2 +- .../llvm-readobj/ELF/dependent-libraries.test | 2 +- .../llvm-readobj/ELF/invalid-shstrndx.test | 4 +- .../tools/llvm-readobj/ELF/relocations.test | 47 ++++++++++++++-- ...ctions-no-section-header-string-table.test | 6 +-- llvm/tools/llvm-readobj/ELFDumper.cpp | 54 ++++++++----------- 6 files changed, 75 insertions(+), 40 deletions(-) diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test index bc95c5bc6e905..5c04eca1e96ba 100644 --- a/llvm/test/Object/invalid.test +++ b/llvm/test/Object/invalid.test @@ -343,7 +343,7 @@ FileHeader: # RUN: yaml2obj %s --docnum=16 -o %t16 # RUN: llvm-readobj --sections %t16 2>&1 | FileCheck -DFILE=%t16 --check-prefix=BROKEN-SECNAME %s -## BROKEN-SECNAME: warning: '[[FILE]]': a section [index 2] has an invalid sh_name (0xb) offset which goes past the end of the section name string table +## BROKEN-SECNAME: warning: '[[FILE]]': unable to get the name of SHT_STRTAB section with index 2: a section [index 2] has an invalid sh_name (0xb) offset which goes past the end of the section name string table --- !ELF FileHeader: diff --git a/llvm/test/tools/llvm-readobj/ELF/dependent-libraries.test b/llvm/test/tools/llvm-readobj/ELF/dependent-libraries.test index 440217f3253d3..7c6759c2390fb 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dependent-libraries.test +++ b/llvm/test/tools/llvm-readobj/ELF/dependent-libraries.test @@ -72,7 +72,7 @@ Sections: # MIX-GNU-NEXT: [ 0] bar # MIX-GNU-NEXT: [ 4] xxx # MIX-GNU-EMPTY: -# MIX-GNU-NEXT: warning: '[[FILE]]': cannot get section name of SHT_LLVM_DEPENDENT_LIBRARIES section: a section [index 6] has an invalid sh_name (0x10000) offset which goes past the end of the section name string table +# MIX-GNU-NEXT: warning: '[[FILE]]': unable to get the name of SHT_LLVM_DEPENDENT_LIBRARIES section with index 6: a section [index 6] has an invalid sh_name (0x10000) offset which goes past the end of the section name string table # MIX-GNU-NEXT: Dependent libraries section at offset 0x53 contains 1 entries: # MIX-GNU-NEXT: [ 0] baz diff --git a/llvm/test/tools/llvm-readobj/ELF/invalid-shstrndx.test b/llvm/test/tools/llvm-readobj/ELF/invalid-shstrndx.test index 50bdd3fc1fb66..1bb9e90b1eade 100644 --- a/llvm/test/tools/llvm-readobj/ELF/invalid-shstrndx.test +++ b/llvm/test/tools/llvm-readobj/ELF/invalid-shstrndx.test @@ -20,9 +20,9 @@ # LLVM: StringTableSectionIndex: 255 # LLVM-NEXT: } # LLVM-NEXT: Sections [ -# LLVM-NEXT: warning: '[[FILE]]': section header string table index 255 does not exist # LLVM-NEXT: Section { # LLVM-NEXT: Index: 0 +# LLVM-NEXT: warning: '[[FILE]]': unable to get the name of SHT_NULL section with index 0: section header string table index 255 does not exist # LLVM-NEXT: Name: (0) # LLVM-NEXT: Type: SHT_NULL (0x0) # LLVM-NEXT: Flags [ (0x0) @@ -36,8 +36,10 @@ # LLVM-NEXT: EntrySize: 0 # LLVM-NEXT: } # LLVM: Index: 1 +# LLVM-NEXT: warning: '[[FILE]]': unable to get the name of SHT_STRTAB section with index 1: section header string table index 255 does not exist # LLVM-NEXT: Name: (11) # LLVM: Index: 2 +# LLVM-NEXT: warning: '[[FILE]]': unable to get the name of SHT_STRTAB section with index 2: section header string table index 255 does not exist # LLVM-NEXT: Name: (1) --- !ELF diff --git a/llvm/test/tools/llvm-readobj/ELF/relocations.test b/llvm/test/tools/llvm-readobj/ELF/relocations.test index 5c8e1086e98ad..5dd5762fe19b2 100644 --- a/llvm/test/tools/llvm-readobj/ELF/relocations.test +++ b/llvm/test/tools/llvm-readobj/ELF/relocations.test @@ -125,9 +125,10 @@ Sections: - Name: .text Type: SHT_PROGBITS Content: '00FFFFFFFF020000001234567890ABCDEF' - - Name: .rel.text - Type: SHT_REL - Info: .text + - Name: .rel.text + Type: SHT_REL + Info: .text + ShName: [[SHNAME=]] Relocations: - Symbol: rel_0 Type: R_X86_64_NONE @@ -143,6 +144,7 @@ Sections: - Name: .rela.text Type: SHT_RELA Info: .text + ShName: [[SHNAME=]] Relocations: - Symbol: rela_0 Type: R_X86_64_NONE @@ -369,3 +371,42 @@ Symbols: - Name: rela_maxpos Section: .text Value: 0xFFFFFFFF + +## Check we report a warning when the name of a relocation section can't be read. Check we continue dumping. +# RUN: yaml2obj %s --docnum=1 -DSHNAME=0xffffffff -o %tshname +# RUN: llvm-readobj --relocs %tshname 2>&1 | FileCheck %s -DFILE=%tshname --check-prefix=LLVM-SHNAME +# RUN: llvm-readelf --relocs %tshname 2>&1 | FileCheck %s -DFILE=%tshname --check-prefix=GNU-SHNAME + +# LLVM-SHNAME: Relocations [ +# LLVM-SHNAME-NEXT: warning: '[[FILE]]': unable to get the name of SHT_REL section with index 2: a section [index 2] has an invalid sh_name (0xffffffff) offset which goes past the end of the section name string table +# LLVM-SHNAME-NEXT: Section (2) { +# LLVM-SHNAME-NEXT: 0x0 R_X86_64_NONE rel_0 0x0 +# LLVM-SHNAME-NEXT: 0x1 R_X86_64_PC32 rel_neg 0x0 +# LLVM-SHNAME-NEXT: 0x5 R_X86_64_PLT32 rel_pos 0x0 +# LLVM-SHNAME-NEXT: 0x9 R_X86_64_64 rel_64 0x0 +# LLVM-SHNAME-NEXT: } +# LLVM-SHNAME-NEXT: warning: '[[FILE]]': unable to get the name of SHT_RELA section with index 3: a section [index 3] has an invalid sh_name (0xffffffff) offset which goes past the end of the section name string table +# LLVM-SHNAME-NEXT: Section (3) { +# LLVM-SHNAME-NEXT: 0x0 R_X86_64_NONE rela_0 0x0 +# LLVM-SHNAME-NEXT: 0x1 R_X86_64_PC32 rela_neg 0xFFFFFFFFFFFFFFFF +# LLVM-SHNAME-NEXT: 0x5 R_X86_64_PLT32 rela_pos 0x2 +# LLVM-SHNAME-NEXT: 0xFFFFFFFFFFFFFFFF R_X86_64_64 rela_minneg 0x8000000000000000 +# LLVM-SHNAME-NEXT: 0x9 R_X86_64_32S rela_maxpos 0x7FFFFFFFFFFFFFFF +# LLVM-SHNAME-NEXT: } +# LLVM-SHNAME-NEXT: ] + +# GNU-SHNAME: warning: '[[FILE]]': unable to get the name of SHT_REL section with index 2: a section [index 2] has an invalid sh_name (0xffffffff) offset which goes past the end of the section name string table +# GNU-SHNAME: Relocation section '' at offset 0x51 contains 4 entries: +# GNU-SHNAME-NEXT: Offset Info Type Symbol's Value Symbol's Name +# GNU-SHNAME-NEXT: 0000000000000000 0000000100000000 R_X86_64_NONE 0000000000000000 rel_0 +# GNU-SHNAME-NEXT: 0000000000000001 0000000200000002 R_X86_64_PC32 0000000000000001 rel_neg +# GNU-SHNAME-NEXT: 0000000000000005 0000000300000004 R_X86_64_PLT32 0000000000000002 rel_pos +# GNU-SHNAME-NEXT: 0000000000000009 0000000400000001 R_X86_64_64 ffffffffffffffff rel_64 +# GNU-SHNAME: warning: '[[FILE]]': unable to get the name of SHT_RELA section with index 3: a section [index 3] has an invalid sh_name (0xffffffff) offset which goes past the end of the section name string table +# GNU-SHNAME: Relocation section '' at offset 0x91 contains 5 entries: +# GNU-SHNAME-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# GNU-SHNAME-NEXT: 0000000000000000 0000000500000000 R_X86_64_NONE 0000000000000000 rela_0 + 0 +# GNU-SHNAME-NEXT: 0000000000000001 0000000600000002 R_X86_64_PC32 0000000000000001 rela_neg - 1 +# GNU-SHNAME-NEXT: 0000000000000005 0000000700000004 R_X86_64_PLT32 0000000000000002 rela_pos + 2 +# GNU-SHNAME-NEXT: ffffffffffffffff 0000000800000001 R_X86_64_64 0000000000000003 rela_minneg - 8000000000000000 +# GNU-SHNAME-NEXT: 0000000000000009 000000090000000b R_X86_64_32S ffffffffffffffff rela_maxpos + 7fffffffffffffff diff --git a/llvm/test/tools/llvm-readobj/ELF/sections-no-section-header-string-table.test b/llvm/test/tools/llvm-readobj/ELF/sections-no-section-header-string-table.test index 95a72a4d05136..a708af795c819 100644 --- a/llvm/test/tools/llvm-readobj/ELF/sections-no-section-header-string-table.test +++ b/llvm/test/tools/llvm-readobj/ELF/sections-no-section-header-string-table.test @@ -43,9 +43,9 @@ # LLVM-NEXT: AddressAlignment: 0 # LLVM-NEXT: EntrySize: 0 # LLVM-NEXT: } -# LLVM-NEXT: warning: '[[FILE]]': a section [index 2] has an invalid sh_name (0x1) offset which goes past the end of the section name string table # LLVM-NEXT: Section { # LLVM-NEXT: Index: 2 +# LLVM-NEXT: warning: '[[FILE]]': unable to get the name of SHT_PROGBITS section with index 2: a section [index 2] has an invalid sh_name (0x1) offset which goes past the end of the section name string table # LLVM-NEXT: Name: (1) # LLVM-NEXT: Type: SHT_PROGBITS (0x1) # LLVM-NEXT: Flags [ (0x0) @@ -58,9 +58,9 @@ # LLVM-NEXT: AddressAlignment: 0 # LLVM-NEXT: EntrySize: 0 # LLVM-NEXT: } -# LLVM-NEXT: warning: '[[FILE]]': a section [index 3] has an invalid sh_name (0x15) offset which goes past the end of the section name string table # LLVM-NEXT: Section { # LLVM-NEXT: Index: 3 +# LLVM-NEXT: warning: '[[FILE]]': unable to get the name of SHT_STRTAB section with index 3: a section [index 3] has an invalid sh_name (0x15) offset which goes past the end of the section name string table # LLVM-NEXT: Name: (21) # LLVM-NEXT: Type: SHT_STRTAB (0x3) # LLVM-NEXT: Flags [ (0x0) @@ -73,9 +73,9 @@ # LLVM-NEXT: AddressAlignment: 1 # LLVM-NEXT: EntrySize: 0 # LLVM-NEXT: } -# LLVM-NEXT: warning: '[[FILE]]': a section [index 4] has an invalid sh_name (0xb) offset which goes past the end of the section name string table # LLVM-NEXT: Section { # LLVM-NEXT: Index: 4 +# LLVM-NEXT: warning: '[[FILE]]': unable to get the name of SHT_STRTAB section with index 4: a section [index 4] has an invalid sh_name (0xb) offset which goes past the end of the section name string table # LLVM-NEXT: Name: (11) # LLVM-NEXT: Type: SHT_STRTAB (0x3) # LLVM-NEXT: Flags [ (0x0) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 35b5e2637b4d4..2edca5aaa0b9d 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -778,6 +778,9 @@ template class DumpStyle { virtual void printRelrReloc(const Elf_Relr &R) = 0; void printRelocationsHelper(const ELFFile *Obj, const Elf_Shdr &Sec); + StringRef getPrintableSectionName(const ELFFile *Obj, + const Elf_Shdr &Sec) const; + void reportUniqueWarning(Error Err) const; StringRef FileName; @@ -3732,7 +3735,6 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { continue; HasRelocSections = true; - StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec)); unsigned Entries; // Android's packed relocation section needs to be unpacked first // to get the actual number of entries. @@ -3748,6 +3750,7 @@ template void GNUStyle::printRelocations(const ELFO *Obj) { } uintX_t Offset = Sec.sh_offset; + StringRef Name = this->getPrintableSectionName(Obj, Sec); OS << "\nRelocation section '" << Name << "' at offset 0x" << to_hexString(Offset, false) << " contains " << Entries << " entries:\n"; @@ -3877,17 +3880,8 @@ void GNUStyle::printSymtabMessage(const ELFO *Obj, const Elf_Shdr *Symtab, size_t Entries, bool NonVisibilityBitsUsed) { StringRef Name; - if (Symtab) { - if (Expected NameOrErr = Obj->getSectionName(Symtab)) { - Name = *NameOrErr; - } else { - this->reportUniqueWarning(createError("unable to get the name of " + - describe(Obj, *Symtab) + ": " + - toString(NameOrErr.takeError()))); - Name = ""; - } - } - + if (Symtab) + Name = this->getPrintableSectionName(Obj, *Symtab); if (!Name.empty()) OS << "\nSymbol table '" << Name << "'"; else @@ -5519,6 +5513,20 @@ void DumpStyle::printRelocationsHelper(const ELFFile *Obj, } } +template +StringRef DumpStyle::getPrintableSectionName(const ELFFile *Obj, + const Elf_Shdr &Sec) const { + StringRef Name = ""; + if (Expected SecNameOrErr = + Obj->getSectionName(&Sec, this->dumper()->WarningHandler)) + Name = *SecNameOrErr; + else + this->reportUniqueWarning(createError("unable to get the name of " + + describe(Obj, Sec) + ": " + + toString(SecNameOrErr.takeError()))); + return Name; +} + template void GNUStyle::printDependentLibs(const ELFFile *Obj) { bool SectionStarted = false; @@ -5544,16 +5552,7 @@ void GNUStyle::printDependentLibs(const ELFFile *Obj) { PrintSection(); SectionStarted = true; Current.Offset = Shdr.sh_offset; - Expected Name = Obj->getSectionName(&Shdr); - if (!Name) { - Current.Name = ""; - this->reportUniqueWarning( - createError("cannot get section name of " - "SHT_LLVM_DEPENDENT_LIBRARIES section: " + - toString(Name.takeError()))); - } else { - Current.Name = *Name; - } + Current.Name = this->getPrintableSectionName(Obj, Shdr); }; auto OnLibEntry = [&](StringRef Lib, uint64_t Offset) { SecEntries.push_back(NameOffset{Lib, Offset}); @@ -6135,7 +6134,7 @@ template void LLVMStyle::printRelocations(const ELFO *Obj) { if (!isRelocationSec(Sec)) continue; - StringRef Name = unwrapOrError(this->FileName, Obj->getSectionName(&Sec)); + StringRef Name = this->getPrintableSectionName(Obj, Sec); unsigned SecNdx = &Sec - &cantFail(Obj->sections()).front(); W.startLine() << "Section (" << SecNdx << ") " << Name << " {\n"; W.indent(); @@ -6205,16 +6204,9 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { std::vector> FlagsList = getSectionFlagsForTarget(Obj->getHeader()->e_machine); for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { - StringRef Name = ""; - if (Expected SecNameOrErr = - Obj->getSectionName(&Sec, this->dumper()->WarningHandler)) - Name = *SecNameOrErr; - else - this->reportUniqueWarning(SecNameOrErr.takeError()); - DictScope SectionD(W, "Section"); W.printNumber("Index", ++SectionIndex); - W.printNumber("Name", Name, Sec.sh_name); + W.printNumber("Name", this->getPrintableSectionName(Obj, Sec), Sec.sh_name); W.printHex( "Type", object::getELFSectionTypeName(Obj->getHeader()->e_machine, Sec.sh_type), From 76c3ec814dec8eef020490b5d3a640d873b5918b Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Sat, 1 Aug 2020 19:03:40 +0200 Subject: [PATCH 176/600] [clang][Tooling] Optimize addTargetAndMode in case of invalid modes This skips searching for `target` related flags in the existing args if we don't have a valid target to insert. Depends on D85076 Differential Revision: https://reviews.llvm.org/D85077 --- clang/lib/Tooling/Tooling.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp index 0593f0cc1d195..1ee8ce28c2efa 100644 --- a/clang/lib/Tooling/Tooling.cpp +++ b/clang/lib/Tooling/Tooling.cpp @@ -258,22 +258,23 @@ void addTargetAndModeForProgramName(std::vector &CommandLine, // --driver-mode=X const std::string DriverModeOPT = Table.getOption(driver::options::OPT_driver_mode).getPrefixedName(); - bool AlreadyHasTarget = false; - bool AlreadyHasMode = false; + auto TargetMode = + driver::ToolChain::getTargetAndModeFromProgramName(InvokedAs); + // No need to search for target args if we don't have a target/mode to insert. + bool ShouldAddTarget = TargetMode.TargetIsValid; + bool ShouldAddMode = TargetMode.DriverMode != nullptr; // Skip CommandLine[0]. for (auto Token = ++CommandLine.begin(); Token != CommandLine.end(); ++Token) { StringRef TokenRef(*Token); - AlreadyHasTarget |= - TokenRef.startswith(TargetOPT) || TokenRef.equals(TargetOPTLegacy); - AlreadyHasMode |= TokenRef.startswith(DriverModeOPT); + ShouldAddTarget = ShouldAddTarget && !TokenRef.startswith(TargetOPT) && + !TokenRef.equals(TargetOPTLegacy); + ShouldAddMode = ShouldAddMode && !TokenRef.startswith(DriverModeOPT); } - auto TargetMode = - driver::ToolChain::getTargetAndModeFromProgramName(InvokedAs); - if (!AlreadyHasMode && TargetMode.DriverMode) { + if (ShouldAddMode) { CommandLine.insert(++CommandLine.begin(), TargetMode.DriverMode); } - if (!AlreadyHasTarget && TargetMode.TargetIsValid) { + if (ShouldAddTarget) { CommandLine.insert(++CommandLine.begin(), TargetOPT + TargetMode.TargetPrefix); } From 86e1b73507f3738f10eefb580d7c5e9adf17c6c0 Mon Sep 17 00:00:00 2001 From: Denys Petrov Date: Fri, 31 Jul 2020 18:57:04 +0300 Subject: [PATCH 177/600] [analyzer] Simplify function SVal::getAsSymbolicExpression and similar ones Summary: Simplify functions SVal::getAsSymbolicExpression SVal::getAsSymExpr and SVal::getAsSymbol. After revision I concluded that `getAsSymbolicExpression` and `getAsSymExpr` repeat functionality of `getAsSymbol`, thus them can be removed. Fix: Remove functions SVal::getAsSymbolicExpression and SVal::getAsSymExpr. Differential Revision: https://reviews.llvm.org/D85034 --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 6 ------ .../Checkers/CheckObjCDealloc.cpp | 2 +- .../Checkers/MacOSKeychainAPIChecker.cpp | 2 +- .../RetainCountDiagnostics.cpp | 2 +- clang/lib/StaticAnalyzer/Checkers/Taint.cpp | 2 +- clang/lib/StaticAnalyzer/Core/ProgramState.cpp | 3 --- clang/lib/StaticAnalyzer/Core/SValBuilder.cpp | 6 +++--- clang/lib/StaticAnalyzer/Core/SVals.cpp | 18 ------------------ .../Core/SimpleConstraintManager.cpp | 4 ++-- .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 2 +- 10 files changed, 10 insertions(+), 37 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index 1abe297820886..a640d815a5ce4 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -182,12 +182,6 @@ class SVal { /// should continue to the base regions if the region is not symbolic. SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const; - /// getAsSymbolicExpression - If this Sval wraps a symbolic expression then - /// return that expression. Otherwise return NULL. - const SymExpr *getAsSymbolicExpression() const; - - const SymExpr *getAsSymExpr() const; - const MemRegion *getAsRegion() const; /// printJson - Pretty-prints in JSON format. diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp index 13836f08a61ef..78b3c209ad6bc 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp @@ -406,7 +406,7 @@ ProgramStateRef ObjCDeallocChecker::evalAssume(ProgramStateRef State, SVal Cond, if (State->get().isEmpty()) return State; - auto *CondBSE = dyn_cast_or_null(Cond.getAsSymExpr()); + auto *CondBSE = dyn_cast_or_null(Cond.getAsSymbol()); if (!CondBSE) return State; diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp index 87477e96d2d16..a157ee2da5df4 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp @@ -509,7 +509,7 @@ ProgramStateRef MacOSKeychainAPIChecker::evalAssume(ProgramStateRef State, if (AMap.isEmpty()) return State; - auto *CondBSE = dyn_cast_or_null(Cond.getAsSymExpr()); + auto *CondBSE = dyn_cast_or_null(Cond.getAsSymbol()); if (!CondBSE) return State; BinaryOperator::Opcode OpCode = CondBSE->getOpcode(); diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp index 1d8ed90f7590c..854646a8779d7 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp @@ -177,7 +177,7 @@ static Optional findArgIdxOfSymbol(ProgramStateRef CurrSt, for (unsigned Idx = 0; Idx < (*CE)->getNumArgs(); Idx++) if (const MemRegion *MR = (*CE)->getArgSVal(Idx).getAsRegion()) if (const auto *TR = dyn_cast(MR)) - if (CurrSt->getSVal(MR, TR->getValueType()).getAsSymExpr() == Sym) + if (CurrSt->getSVal(MR, TR->getValueType()).getAsSymbol() == Sym) return Idx; return None; diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp index 5b46ffb656cf8..71b2ab834a07a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -148,7 +148,7 @@ bool taint::isTainted(ProgramStateRef State, const Stmt *S, } bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { - if (const SymExpr *Sym = V.getAsSymExpr()) + if (SymbolRef Sym = V.getAsSymbol()) return isTainted(State, Sym, Kind); if (const MemRegion *Reg = V.getAsRegion()) return isTainted(State, Reg, Kind); diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp index 006a4006b7fc9..1ccb0de92fba3 100644 --- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp +++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp @@ -582,9 +582,6 @@ bool ScanReachableSymbols::scan(SVal val) { if (SymbolRef Sym = val.getAsSymbol()) return scan(Sym); - if (const SymExpr *Sym = val.getAsSymbolicExpression()) - return scan(Sym); - if (Optional X = val.getAs()) return scan(*X); diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp index c00a2c8ba8a2c..5b6b6973b310c 100644 --- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp @@ -377,8 +377,8 @@ Optional SValBuilder::getConstantVal(const Expr *E) { SVal SValBuilder::makeSymExprValNN(BinaryOperator::Opcode Op, NonLoc LHS, NonLoc RHS, QualType ResultTy) { - const SymExpr *symLHS = LHS.getAsSymExpr(); - const SymExpr *symRHS = RHS.getAsSymExpr(); + SymbolRef symLHS = LHS.getAsSymbol(); + SymbolRef symRHS = RHS.getAsSymbol(); // TODO: When the Max Complexity is reached, we should conjure a symbol // instead of generating an Unknown value and propagate the taint info to it. @@ -492,7 +492,7 @@ SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val, if (getContext().getTypeSize(castTy) >= getContext().getTypeSize(originalTy)) return evalCast(val, castTy, originalTy); - const SymExpr *se = val.getAsSymbolicExpression(); + SymbolRef se = val.getAsSymbol(); if (!se) // Let evalCast handle non symbolic expressions. return evalCast(val, castTy, originalTy); diff --git a/clang/lib/StaticAnalyzer/Core/SVals.cpp b/clang/lib/StaticAnalyzer/Core/SVals.cpp index 9b5de6c3eb92b..465800fa67fce 100644 --- a/clang/lib/StaticAnalyzer/Core/SVals.cpp +++ b/clang/lib/StaticAnalyzer/Core/SVals.cpp @@ -116,8 +116,6 @@ SymbolRef SVal::getLocSymbolInBase() const { return nullptr; } -// TODO: The next 3 functions have to be simplified. - /// If this SVal wraps a symbol return that SymbolRef. /// Otherwise, return 0. /// @@ -132,22 +130,6 @@ SymbolRef SVal::getAsSymbol(bool IncludeBaseRegions) const { return getAsLocSymbol(IncludeBaseRegions); } -/// getAsSymbolicExpression - If this Sval wraps a symbolic expression then -/// return that expression. Otherwise return NULL. -const SymExpr *SVal::getAsSymbolicExpression() const { - if (Optional X = getAs()) - return X->getSymbol(); - - return getAsSymbol(); -} - -const SymExpr* SVal::getAsSymExpr() const { - const SymExpr* Sym = getAsSymbol(); - if (!Sym) - Sym = getAsSymbolicExpression(); - return Sym; -} - const MemRegion *SVal::getAsRegion() const { if (Optional X = getAs()) return X->getRegion(); diff --git a/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp index 3709106ad44ce..f96974f97dcc5 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp @@ -57,7 +57,7 @@ ProgramStateRef SimpleConstraintManager::assumeAux(ProgramStateRef State, // SymIntExprs. if (!canReasonAbout(Cond)) { // Just add the constraint to the expression without trying to simplify. - SymbolRef Sym = Cond.getAsSymExpr(); + SymbolRef Sym = Cond.getAsSymbol(); assert(Sym); return assumeSymUnsupported(State, Sym, Assumption); } @@ -101,7 +101,7 @@ ProgramStateRef SimpleConstraintManager::assumeInclusiveRange( if (!canReasonAbout(Value)) { // Just add the constraint to the expression without trying to simplify. - SymbolRef Sym = Value.getAsSymExpr(); + SymbolRef Sym = Value.getAsSymbol(); assert(Sym); return assumeSymInclusiveRange(State, Sym, From, To, InRange); } diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index 2e269f6a596e8..a64ed78ac3458 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -86,7 +86,7 @@ SVal SimpleSValBuilder::evalCastFromNonLoc(NonLoc val, QualType castTy) { return makeLocAsInteger(LI->getLoc(), castSize); } - if (const SymExpr *se = val.getAsSymbolicExpression()) { + if (SymbolRef se = val.getAsSymbol()) { QualType T = Context.getCanonicalType(se->getType()); // If types are the same or both are integers, ignore the cast. // FIXME: Remove this hack when we support symbolic truncation/extension. From 21fa82d5c63c30c745d5181889329084ac6d2767 Mon Sep 17 00:00:00 2001 From: Denys Petrov Date: Fri, 31 Jul 2020 15:54:46 +0300 Subject: [PATCH 178/600] [analyzer] Introduce minor refactoring of SVal::getSubKind function Summary: `BaseMask` occupies the lowest bits. Effect of applying the mask is neutralized by right shift operation, thus making it useless. Fix: Remove a redundant bitwise operation. Differential Revision: https://reviews.llvm.org/D85026 --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index a640d815a5ce4..a561ac67bf786 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -80,7 +80,7 @@ class SVal { #define ABSTRACT_SVAL_WITH_KIND(Id, Parent) Id ## Kind, #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" }; - enum { BaseBits = 2, BaseMask = 0x3 }; + enum { BaseBits = 2, BaseMask = 0b11 }; protected: const void *Data = nullptr; @@ -116,7 +116,7 @@ class SVal { unsigned getRawKind() const { return Kind; } BaseKind getBaseKind() const { return (BaseKind) (Kind & BaseMask); } - unsigned getSubKind() const { return (Kind & ~BaseMask) >> BaseBits; } + unsigned getSubKind() const { return Kind >> BaseBits; } // This method is required for using SVal in a FoldingSetNode. It // extracts a unique signature for this SVal object. From 5191f70ab1f4b0b9225b2e9e11584e199172418c Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Sat, 25 Jul 2020 21:52:33 +0200 Subject: [PATCH 179/600] [clangd] Support new/deleta operator in TargetFinder. Differential Revision: https://reviews.llvm.org/D85028 --- clang-tools-extra/clangd/FindTarget.cpp | 6 ++++ clang-tools-extra/clangd/XRefs.cpp | 4 +++ .../clangd/unittests/FindTargetTests.cpp | 31 +++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index a346d6b662e9a..e4d2dddb4b5d3 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -460,6 +460,12 @@ struct TargetFinder { void VisitPseudoObjectExpr(const PseudoObjectExpr *POE) { Outer.add(POE->getSyntacticForm(), Flags); } + void VisitCXXNewExpr(const CXXNewExpr *CNE) { + Outer.add(CNE->getOperatorNew(), Flags); + } + void VisitCXXDeleteExpr(const CXXDeleteExpr *CDE) { + Outer.add(CDE->getOperatorDelete(), Flags); + } }; Visitor(*this, Flags).Visit(S); } diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 1fc89f3e08472..26653aa409d7d 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -238,6 +238,10 @@ locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier, llvm::DenseMap ResultIndex; auto AddResultDecl = [&](const NamedDecl *D) { + // FIXME: Canonical declarations of some symbols might refer to built-in + // decls with possibly-invalid source locations (e.g. global new operator). + // In such cases we should pick up a redecl with valid source location + // instead of failing. D = llvm::cast(D->getCanonicalDecl()); auto Loc = makeLocation(AST.getASTContext(), nameLocation(*D, SM), MainFilePath); diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index 92095e871e201..8b872d6314d45 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -535,6 +535,7 @@ TEST_F(TargetDeclTest, OverloadExpr) { // FIXME: Auto-completion in a template requires disabling delayed template // parsing. Flags = {"-fno-delayed-template-parsing"}; + Flags.push_back("--target=x86_64-pc-linux-gnu"); Code = R"cpp( void func(int*); @@ -559,6 +560,36 @@ TEST_F(TargetDeclTest, OverloadExpr) { }; )cpp"; EXPECT_DECLS("UnresolvedMemberExpr", "void func(int *)", "void func(char *)"); + + Code = R"cpp( + struct X { + static void *operator new(unsigned long); + }; + auto* k = [[new]] X(); + )cpp"; + EXPECT_DECLS("CXXNewExpr", "static void *operator new(unsigned long)"); + Code = R"cpp( + void *operator new(unsigned long); + auto* k = [[new]] int(); + )cpp"; + EXPECT_DECLS("CXXNewExpr", "void *operator new(unsigned long)"); + + Code = R"cpp( + struct X { + static void operator delete(void *) noexcept; + }; + void k(X* x) { + [[delete]] x; + } + )cpp"; + EXPECT_DECLS("CXXDeleteExpr", "static void operator delete(void *) noexcept"); + Code = R"cpp( + void operator delete(void *) noexcept; + void k(int* x) { + [[delete]] x; + } + )cpp"; + EXPECT_DECLS("CXXDeleteExpr", "void operator delete(void *) noexcept"); } TEST_F(TargetDeclTest, DependentExprs) { From 18279a54b5d3382874924d6a3c7775b7e22598dc Mon Sep 17 00:00:00 2001 From: Nicholas Guy Date: Wed, 1 Jul 2020 11:35:58 +0100 Subject: [PATCH 180/600] [ARM] Fix IT block generation after Thumb2SizeReduce with -Oz Fixes a regression caused by D82439, in which IT blocks were no longer being generated when -Oz is present. This was due to the CPSR register being marked as dead, while this case was not accounted for. Differential Revision: https://reviews.llvm.org/D83667 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 7 +++++ llvm/test/CodeGen/Thumb2/constant-hoisting.ll | 27 +++++++++---------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index b501dc06ca72f..0353cfd3d86f7 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -587,6 +587,13 @@ bool ARMBaseInstrInfo::DefinesPredicate( const MachineOperand &MO = MI.getOperand(i); if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { + + // Filter out T1 instructions that have a dead CPSR, + // allowing IT blocks to be generated containing T1 instructions + const MCInstrDesc &MCID = MI.getDesc(); + if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead()) + continue; + Pred.push_back(MO); Found = true; } diff --git a/llvm/test/CodeGen/Thumb2/constant-hoisting.ll b/llvm/test/CodeGen/Thumb2/constant-hoisting.ll index 5c8f934ce61d3..a106900dc3e99 100644 --- a/llvm/test/CodeGen/Thumb2/constant-hoisting.ll +++ b/llvm/test/CodeGen/Thumb2/constant-hoisting.ll @@ -37,26 +37,25 @@ define i32 @test_values(i32 %a, i32 %b) minsize optsize { ; CHECK-V7M: mov r2, r0 ; CHECK-V7M-NEXT: ldr r0, .LCPI0_0 ; CHECK-V7M-NEXT: cmp r2, #50 -; CHECK-V7M-NEXT: beq .LBB0_5 +; CHECK-V7M-NEXT: beq .LBB0_3 ; CHECK-V7M-NEXT: cmp r2, #1 -; CHECK-V7M-NEXT: beq .LBB0_7 +; CHECK-V7M-NEXT: ittt eq +; CHECK-V7M-NEXT: addeq r0, r1 +; CHECK-V7M-NEXT: addeq r0, #1 +; CHECK-V7M-NEXT: bxeq lr ; CHECK-V7M-NEXT: cmp r2, #30 -; CHECK-V7M-NEXT: beq .LBB0_8 -; CHECK-V7M-NEXT: cbnz r2, .LBB0_6 +; CHECK-V7M-NEXT: ittt eq +; CHECK-V7M-NEXT: addeq r0, r1 +; CHECK-V7M-NEXT: addeq r0, #2 +; CHECK-V7M-NEXT: bxeq lr +; CHECK-V7M-NEXT: cbnz r2, .LBB0_4 +; CHECK-V7M-NEXT: .LBB0_2: ; CHECK-V7M-NEXT: add r0, r1 ; CHECK-V7M-NEXT: bx lr -; CHECK-V7M-NEXT: .LBB0_5: +; CHECK-V7M-NEXT: .LBB0_3: ; CHECK-V7M-NEXT: add r0, r1 ; CHECK-V7M-NEXT: adds r0, #4 -; CHECK-V7M-NEXT: .LBB0_6: -; CHECK-V7M-NEXT: bx lr -; CHECK-V7M-NEXT: .LBB0_7: -; CHECK-V7M-NEXT: add r0, r1 -; CHECK-V7M-NEXT: adds r0, #1 -; CHECK-V7M-NEXT: bx lr -; CHECK-V7M-NEXT: .LBB0_8: -; CHECK-V7M-NEXT: add r0, r1 -; CHECK-V7M-NEXT: adds r0, #2 +; CHECK-V7M-NEXT: .LBB0_4: ; CHECK-V7M-NEXT: bx lr ; CHECK-V7M-NEXT: .p2align 2 ; CHECK-V7M-NEXT: .LCPI0_0: From ed0e4c70c99d3afd87fb202ab03bda40512677e7 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 3 Aug 2020 13:30:48 +0100 Subject: [PATCH 181/600] [clang][ARM] Add name-mangling test for direct __fp16 arguments. `clang/test/CodeGenCXX/fp16-mangle.cpp` tests pointers to __fp16, but if you give the `-fallow-half-arguments-and-returns` option, then clang can also leave an __fp16 unmodified as a function argument or return type. This regression test checks the name-mangling of that. Reviewed By: miyuki Differential Revision: https://reviews.llvm.org/D85010 --- clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp diff --git a/clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp b/clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp new file mode 100644 index 0000000000000..15214e13ad8a7 --- /dev/null +++ b/clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -emit-llvm -o - -triple arm-arm-none-eabi -fallow-half-arguments-and-returns %s | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-arm-none-eabi -fallow-half-arguments-and-returns %s | FileCheck %s + +// Test name-mangling of __fp16 passed directly as a function argument +// (when that is permitted). + +// CHECK: define {{.*}}void @_Z13fp16_argumentDh(half %{{.*}}) +void fp16_argument(__fp16 arg) {} + +// Test name-mangling of __fp16 as a return type. The return type of +// fp16_return itself isn't mentioned in the mangled name, so to test +// this, we have to pass it a function pointer and make __fp16 the +// return type of that. + +// CHECK: define {{.*}}void @_Z11fp16_returnPFDhvE(half ()* %{{.*}}) +void fp16_return(__fp16 (*func)(void)) {} From b57ea8ef2a8a07ffd2c05389da3f759caaa49f3e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 2 Aug 2020 13:40:23 -0400 Subject: [PATCH 182/600] [InstCombine] add tests for xor-of-ors; NFC --- llvm/test/Transforms/InstCombine/xor.ll | 82 +++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll index 0fc39103d96fe..a133f2a0e009b 100644 --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -912,3 +912,85 @@ define <2 x i32> @test51vec(<2 x i32> %x, <2 x i32> %y) { %e = xor <2 x i32> %d, ret <2 x i32> %e } + +define i4 @or_or_xor(i4 %x, i4 %y, i4 %z) { +; CHECK-LABEL: @or_or_xor( +; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: ret i4 [[R]] +; + %o1 = or i4 %z, %x + %o2 = or i4 %z, %y + %r = xor i4 %o1, %o2 + ret i4 %r +} + +define i4 @or_or_xor_commute1(i4 %x, i4 %y, i4 %z) { +; CHECK-LABEL: @or_or_xor_commute1( +; CHECK-NEXT: [[O1:%.*]] = or i4 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: ret i4 [[R]] +; + %o1 = or i4 %x, %z + %o2 = or i4 %z, %y + %r = xor i4 %o1, %o2 + ret i4 %r +} + +define i4 @or_or_xor_commute2(i4 %x, i4 %y, i4 %z) { +; CHECK-LABEL: @or_or_xor_commute2( +; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: [[O2:%.*]] = or i4 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: ret i4 [[R]] +; + %o1 = or i4 %z, %x + %o2 = or i4 %y, %z + %r = xor i4 %o1, %o2 + ret i4 %r +} + +define <2 x i4> @or_or_xor_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { +; CHECK-LABEL: @or_or_xor_commute3( +; CHECK-NEXT: [[O1:%.*]] = or <2 x i4> [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[O2:%.*]] = or <2 x i4> [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[O1]], [[O2]] +; CHECK-NEXT: ret <2 x i4> [[R]] +; + %o1 = or <2 x i4> %x, %z + %o2 = or <2 x i4> %y, %z + %r = xor <2 x i4> %o1, %o2 + ret <2 x i4> %r +} + +define i4 @or_or_xor_use1(i4 %x, i4 %y, i4 %z, i4* %p) { +; CHECK-LABEL: @or_or_xor_use1( +; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: store i4 [[O1]], i4* [[P:%.*]], align 1 +; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: ret i4 [[R]] +; + %o1 = or i4 %z, %x + store i4 %o1, i4* %p + %o2 = or i4 %z, %y + %r = xor i4 %o1, %o2 + ret i4 %r +} + +define i4 @or_or_xor_use2(i4 %x, i4 %y, i4 %z, i4* %p) { +; CHECK-LABEL: @or_or_xor_use2( +; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] +; CHECK-NEXT: store i4 [[O2]], i4* [[P:%.*]], align 1 +; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: ret i4 [[R]] +; + %o1 = or i4 %z, %x + %o2 = or i4 %z, %y + store i4 %o2, i4* %p + %r = xor i4 %o1, %o2 + ret i4 %r +} From 2265d01f2a5bd153959701e22f5be2a40e1674a3 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 3 Aug 2020 08:11:06 -0400 Subject: [PATCH 183/600] [InstCombine] reduce xor-of-or's bitwise logic (PR46955) I tried to use m_Deferred() on this, but didn't find a clean way to do that. http://bugs.llvm.org/PR46955 https://alive2.llvm.org/ce/z/2h6QTq --- .../InstCombine/InstCombineAndOrXor.cpp | 14 +++++++++++ llvm/test/Transforms/InstCombine/xor.ll | 24 +++++++++---------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 030d2f203ed6c..ef1e8dbe01558 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3351,6 +3351,20 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { match(Op1, m_Not(m_Specific(A)))) return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); + // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants. + // TODO: Loosen one-use restriction if common operand is a constant. + Value *D; + if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_Or(m_Value(C), m_Value(D))))) { + if (B == C || B == D) + std::swap(A, B); + if (A == C) + std::swap(C, D); + if (A == D) + return BinaryOperator::CreateAnd(Builder.CreateXor(B, C), + Builder.CreateNot(A)); + } + if (auto *LHS = dyn_cast(I.getOperand(0))) if (auto *RHS = dyn_cast(I.getOperand(1))) if (Value *V = foldXorOfICmps(LHS, RHS, I)) diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll index a133f2a0e009b..2bdb837bff041 100644 --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -915,9 +915,9 @@ define <2 x i32> @test51vec(<2 x i32> %x, <2 x i32> %y) { define i4 @or_or_xor(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_or_xor( -; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[Z:%.*]], -1 +; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %o1 = or i4 %z, %x @@ -928,9 +928,9 @@ define i4 @or_or_xor(i4 %x, i4 %y, i4 %z) { define i4 @or_or_xor_commute1(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_or_xor_commute1( -; CHECK-NEXT: [[O1:%.*]] = or i4 [[X:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[Z:%.*]], -1 +; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %o1 = or i4 %x, %z @@ -941,9 +941,9 @@ define i4 @or_or_xor_commute1(i4 %x, i4 %y, i4 %z) { define i4 @or_or_xor_commute2(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_or_xor_commute2( -; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[O2:%.*]] = or i4 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[Z:%.*]], -1 +; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i4 [[R]] ; %o1 = or i4 %z, %x @@ -954,9 +954,9 @@ define i4 @or_or_xor_commute2(i4 %x, i4 %y, i4 %z) { define <2 x i4> @or_or_xor_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { ; CHECK-LABEL: @or_or_xor_commute3( -; CHECK-NEXT: [[O1:%.*]] = or <2 x i4> [[X:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[O2:%.*]] = or <2 x i4> [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[O1]], [[O2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i4> [[Z:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %o1 = or <2 x i4> %x, %z From d8ef1d1251e3c0e11894ed82904dbab5e41c5711 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 22 Jul 2020 21:07:03 -0400 Subject: [PATCH 184/600] AMDGPU/GlobalISel: Fix selecting broken copies for s32->s64 anyext These should probably not be legal in the first place, but that might also be a pain. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 27 ++++++- .../AMDGPU/GlobalISel/inst-select-anyext.mir | 76 +++++++++++++++++-- 2 files changed, 95 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 709329b4c0c6d..5aceb40933c37 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1894,12 +1894,33 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { if (!DstTy.isScalar()) return false; - if (I.getOpcode() == AMDGPU::G_ANYEXT) - return selectCOPY(I); - // Artifact casts should never use vcc. const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI); + // FIXME: This should probably be illegal and split earlier. + if (I.getOpcode() == AMDGPU::G_ANYEXT) { + if (DstSize <= 32) + return selectCOPY(I); + + const TargetRegisterClass *SrcRC = + TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank, *MRI); + const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); + const TargetRegisterClass *DstRC = + TRI.getRegClassForSizeOnBank(DstSize, *DstBank, *MRI); + + Register UndefReg = MRI->createVirtualRegister(SrcRC); + BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); + BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(SrcReg) + .addImm(AMDGPU::sub0) + .addReg(UndefReg) + .addImm(AMDGPU::sub1); + I.eraseFromParent(); + + return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) && + RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI); + } + if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { // 64-bit should have been split up in RegBankSelect diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir index 58d01774f745b..dcad0a85e8e0e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir @@ -22,22 +22,88 @@ body: | ... --- +name: anyext_sgpr_s32_to_sgpr_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: anyext_sgpr_s32_to_sgpr_s64 + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s64) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 + +... -name: anyext_sgpr_s16_to_sgpr_s64 +--- +name: anyext_sgpr_s16_to_sgpr_s64 legalized: true regBankSelected: true -body: | +tracksRegLiveness: true +body: | bb.0: liveins: $sgpr0 ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s64 + ; GCN: liveins: $sgpr0 ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]] - ; GCN: $sgpr0_sgpr1 = COPY [[COPY1]] + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s16) = G_TRUNC %0 %2:sgpr(s64) = G_ANYEXT %1 - $sgpr0_sgpr1 = COPY %2 + S_ENDPGM 0, implicit %2 + +... + +--- +name: anyext_vgpr_s32_to_vgpr_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: anyext_vgpr_s32_to_vgpr_s64 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s64) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 + +... + +--- +name: anyext_vgpr_s16_to_vgpr_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s64 + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1 + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s64) = G_ANYEXT %1 + S_ENDPGM 0, implicit %2 ... From 99a971cadff7832a846394462c39a74aac64325d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 3 Aug 2020 12:18:21 +0100 Subject: [PATCH 185/600] [X86][SSE] Start shuffle combining from ANY_EXTEND_VECTOR_INREG on SSE targets We already do this on AVX (+ for ZERO_EXTEND_VECTOR_INREG), but this enables it for all SSE targets - we attempted something similar back at rL357057 but hit issues with the ZERO_EXTEND_VECTOR_INREG handling (PR41249). I'm still looking at the vector-mul.ll regression - which is due to 32-bit targets performing the load as a f64, resulting in the shuffle combiner thinking it has to create a shuffle in the float domain. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 +- llvm/test/CodeGen/X86/combine-pmuldq.ll | 4 +- llvm/test/CodeGen/X86/mulvi32.ll | 8 +- llvm/test/CodeGen/X86/pmul.ll | 16 +- llvm/test/CodeGen/X86/promote-cmp.ll | 23 ++- llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll | 14 +- llvm/test/CodeGen/X86/vector-mul.ll | 3 +- llvm/test/CodeGen/X86/vector-reduce-mul.ll | 171 ++++++------------ llvm/test/CodeGen/X86/vector-trunc-math.ll | 12 +- 9 files changed, 98 insertions(+), 164 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ff59f28c8b6d2..e9bb50aacec0e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48671,6 +48671,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); SDValue In = N->getOperand(0); + unsigned Opcode = N->getOpcode(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Try to merge vector loads and extend_inreg to an extload. @@ -48679,7 +48680,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, auto *Ld = cast(In); if (Ld->isSimple()) { MVT SVT = In.getSimpleValueType().getVectorElementType(); - ISD::LoadExtType Ext = N->getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG + ISD::LoadExtType Ext = Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SEXTLOAD : ISD::ZEXTLOAD; EVT MemVT = @@ -48687,8 +48688,7 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, if (TLI.isLoadExtLegal(Ext, VT, MemVT)) { SDValue Load = DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), MemVT, - Ld->getOriginalAlign(), + Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); return Load; @@ -48697,8 +48697,9 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, } // Attempt to combine as a shuffle. - // TODO: SSE41 support - if (Subtarget.hasAVX() && N->getOpcode() != ISD::SIGN_EXTEND_VECTOR_INREG) { + // TODO: SSE ZERO_EXTEND_VECTOR_INREG support. + if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG || + (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG && Subtarget.hasAVX())) { SDValue Op(N, 0); if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType())) if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll index 0e448f3f3be06..27823cf5fe8c5 100644 --- a/llvm/test/CodeGen/X86/combine-pmuldq.ll +++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll @@ -91,9 +91,9 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) define <8 x i64> @combine_zext_pmuludq_256(<8 x i32> %a) { ; SSE-LABEL: combine_zext_pmuludq_256: ; SSE: # %bb.0: -; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,2,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,1,3,3] ; SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,1,3,3] ; SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [715827883,715827883] ; SSE-NEXT: pmuludq %xmm4, %xmm0 diff --git a/llvm/test/CodeGen/X86/mulvi32.ll b/llvm/test/CodeGen/X86/mulvi32.ll index d05c26e9842ac..388b5bc8a746c 100644 --- a/llvm/test/CodeGen/X86/mulvi32.ll +++ b/llvm/test/CodeGen/X86/mulvi32.ll @@ -137,8 +137,8 @@ define <4 x i64> @_mul4xi32toi64a(<4 x i32>, <4 x i32>) { ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,1,3] ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3] ; SSE2-NEXT: pmuludq %xmm3, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,1,3,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,1,3,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,3,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,3,3] ; SSE2-NEXT: pmuludq %xmm3, %xmm1 ; SSE2-NEXT: movdqa %xmm2, %xmm0 ; SSE2-NEXT: retq @@ -148,8 +148,8 @@ define <4 x i64> @_mul4xi32toi64a(<4 x i32>, <4 x i32>) { ; SSE42-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero ; SSE42-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero ; SSE42-NEXT: pmuludq %xmm3, %xmm2 -; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,2,3,3] -; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] +; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,1,3,3] +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,1,3,3] ; SSE42-NEXT: pmuludq %xmm3, %xmm1 ; SSE42-NEXT: movdqa %xmm2, %xmm0 ; SSE42-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index 9aeb8292ac0ca..5a3101bc3efda 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -1033,10 +1033,10 @@ define <4 x i32> @mul_v4i64_zero_upper(<4 x i32> %val1, <4 x i32> %val2) { ; SSE41-LABEL: mul_v4i64_zero_upper: ; SSE41: # %bb.0: # %entry ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,2,3,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; SSE41-NEXT: pmuludq %xmm2, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] ; SSE41-NEXT: pmuludq %xmm3, %xmm1 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] ; SSE41-NEXT: retq @@ -1186,17 +1186,17 @@ define <8 x i32> @mul_v8i64_zero_upper(<8 x i32> %val1, <8 x i32> %val2) { ; SSE41-LABEL: mul_v8i64_zero_upper: ; SSE41: # %bb.0: # %entry ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero -; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,2,3,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,1,3,3] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = xmm1[0],zero,xmm1[1],zero -; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,2,3,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm1[2,1,3,3] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero ; SSE41-NEXT: pmuludq %xmm4, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] ; SSE41-NEXT: pmuludq %xmm5, %xmm1 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero ; SSE41-NEXT: pmuludq %xmm6, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,3,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,3,3] ; SSE41-NEXT: pmuludq %xmm7, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] ; SSE41-NEXT: retq @@ -1311,11 +1311,11 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) { ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,1,1] ; SSE41-NEXT: pmovsxwq %xmm3, %xmm6 ; SSE41-NEXT: pmovsxwq %xmm0, %xmm7 -; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,2,3,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,1,3,3] ; SSE41-NEXT: pmuldq %xmm4, %xmm3 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero ; SSE41-NEXT: pmuldq %xmm5, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,2,3,3] +; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,1,3,3] ; SSE41-NEXT: pmuldq %xmm6, %xmm4 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; SSE41-NEXT: pmuldq %xmm7, %xmm0 diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll index f23900e8b8f8c..d6fcac28f62ad 100644 --- a/llvm/test/CodeGen/X86/promote-cmp.ll +++ b/llvm/test/CodeGen/X86/promote-cmp.ll @@ -30,20 +30,19 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) { ; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] ; SSE2-NEXT: por %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2] -; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 -; SSE2-NEXT: movaps %xmm5, %xmm6 -; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[2,1],xmm5[3,3] -; SSE2-NEXT: psllq $63, %xmm6 -; SSE2-NEXT: psrad $31, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm1 -; SSE2-NEXT: pandn %xmm3, %xmm6 -; SSE2-NEXT: por %xmm6, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,1,1,3] -; SSE2-NEXT: xorps %xmm4, %xmm5 +; SSE2-NEXT: movaps {{.*#+}} xmm4 = <1,1,u,0> +; SSE2-NEXT: xorps %xmm5, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,1,3,3] ; SSE2-NEXT: psllq $63, %xmm5 ; SSE2-NEXT: psrad $31, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] +; SSE2-NEXT: pand %xmm5, %xmm1 +; SSE2-NEXT: pandn %xmm3, %xmm5 +; SSE2-NEXT: por %xmm5, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,1,1,3] +; SSE2-NEXT: psllq $63, %xmm3 +; SSE2-NEXT: psrad $31, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pandn %xmm2, %xmm3 ; SSE2-NEXT: por %xmm3, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll index 65131c0e3cb73..28f3e436efcd9 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -835,17 +835,15 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm2 ; SSE41-NEXT: psrlw $8, %xmm2 +; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm2 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm1, %xmm2 ; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm3 ; SSE41-NEXT: psrlw $8, %xmm3 +; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm3 +; SSE41-NEXT: pand %xmm1, %xmm3 ; SSE41-NEXT: packuswb %xmm2, %xmm3 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: pand %xmm3, %xmm1 -; SSE41-NEXT: pmullw {{.*}}(%rip), %xmm2 -; SSE41-NEXT: pand %xmm3, %xmm2 -; SSE41-NEXT: packuswb %xmm2, %xmm1 -; SSE41-NEXT: psubb %xmm1, %xmm0 +; SSE41-NEXT: psubb %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX1-LABEL: test_remconstant_16i8: diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index 805ff9f69ed5e..d93ac61592f29 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -1904,7 +1904,8 @@ define <2 x i64> @mul_v2i64_zext_cross_bb(<2 x i32>* %in, <2 x i32>* %y) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; X86-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero +; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0,0,1,1] ; X86-NEXT: pmuludq %xmm1, %xmm0 ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll index a3cec079ab091..b44b66eb1d87a 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll @@ -1567,14 +1567,14 @@ define i8 @test_v4i8(<4 x i8> %a0) { ; SSE2-LABEL: test_v4i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pmullw %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: pmullw %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: psrld $16, %xmm0 +; SSE2-NEXT: pmullw %xmm1, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; @@ -1618,16 +1618,18 @@ define i8 @test_v4i8(<4 x i8> %a0) { define i8 @test_v8i8(<8 x i8> %a0) { ; SSE2-LABEL: test_v8i8: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,2,3,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: pmullw %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,2,3,0] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] ; SSE2-NEXT: pmullw %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pmullw %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255] +; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: packuswb %xmm1, %xmm1 +; SSE2-NEXT: psrld $16, %xmm0 +; SSE2-NEXT: pmullw %xmm1, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax ; SSE2-NEXT: retq ; @@ -1637,13 +1639,11 @@ define i8 @test_v8i8(<8 x i8> %a0) { ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; SSE41-NEXT: pmullw %xmm0, %xmm1 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] -; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 ; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrld $16, %xmm1 +; SSE41-NEXT: pmullw %xmm0, %xmm1 +; SSE41-NEXT: movd %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1710,23 +1710,15 @@ define i8 @test_v16i8(<16 x i8> %a0) { ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: packuswb %xmm3, %xmm0 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: pmullw %xmm2, %xmm0 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $8, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] ; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrld $8, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pmullw %xmm0, %xmm1 +; SSE41-NEXT: movd %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1900,30 +1892,17 @@ define i8 @test_v32i8(<32 x i8> %a0) { ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm0, %xmm4 -; SSE41-NEXT: pand %xmm1, %xmm4 -; SSE41-NEXT: pmullw %xmm2, %xmm3 -; SSE41-NEXT: pand %xmm1, %xmm3 -; SSE41-NEXT: packuswb %xmm4, %xmm3 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: pmullw %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: packuswb %xmm3, %xmm2 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: pmullw %xmm0, %xmm2 -; SSE41-NEXT: pand %xmm2, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmullw %xmm2, %xmm0 +; SSE41-NEXT: pmullw %xmm3, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $8, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] ; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrld $8, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pmullw %xmm0, %xmm1 +; SSE41-NEXT: movd %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2139,31 +2118,13 @@ define i8 @test_v64i8(<64 x i8> %a0) { ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE41-NEXT: pmullw %xmm2, %xmm0 -; SSE41-NEXT: pmullw %xmm3, %xmm1 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> -; SSE41-NEXT: pshufb %xmm2, %xmm1 ; SSE41-NEXT: pmullw %xmm4, %xmm5 -; SSE41-NEXT: pshufb %xmm2, %xmm5 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: pand %xmm2, %xmm3 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; SSE41-NEXT: pmullw %xmm4, %xmm1 -; SSE41-NEXT: pand %xmm2, %xmm1 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; SSE41-NEXT: pand %xmm2, %xmm1 -; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: packuswb %xmm3, %xmm1 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: packuswb %xmm3, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; SSE41-NEXT: pmullw %xmm3, %xmm5 +; SSE41-NEXT: pmullw %xmm0, %xmm5 +; SSE41-NEXT: pmullw %xmm1, %xmm5 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3] +; SSE41-NEXT: pmullw %xmm5, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; SSE41-NEXT: pmullw %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrld $8, %xmm0 @@ -2446,14 +2407,14 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm9 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE41-NEXT: pmullw %xmm6, %xmm2 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm10 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm6 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE41-NEXT: pmullw %xmm2, %xmm4 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm10 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm11 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm6 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE41-NEXT: pmullw %xmm7, %xmm3 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm7 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero @@ -2464,43 +2425,17 @@ define i8 @test_v128i8(<128 x i8> %a0) { ; SSE41-NEXT: pmullw %xmm5, %xmm1 ; SSE41-NEXT: pmullw %xmm4, %xmm1 ; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pmullw %xmm7, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: pmullw %xmm11, %xmm6 -; SSE41-NEXT: pshufb %xmm0, %xmm6 -; SSE41-NEXT: pmullw %xmm10, %xmm2 -; SSE41-NEXT: pshufb %xmm0, %xmm2 ; SSE41-NEXT: pmullw %xmm8, %xmm9 -; SSE41-NEXT: pshufb %xmm0, %xmm9 -; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm9[0],zero,xmm9[1],zero,xmm9[2],zero,xmm9[3],zero,xmm9[4],zero,xmm9[5],zero,xmm9[6],zero,xmm9[7],zero -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: pmullw %xmm5, %xmm2 -; SSE41-NEXT: pshufb %xmm0, %xmm2 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: pmullw %xmm5, %xmm3 -; SSE41-NEXT: pshufb %xmm0, %xmm3 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: pand %xmm4, %xmm0 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: pmullw %xmm3, %xmm2 -; SSE41-NEXT: pand %xmm4, %xmm2 -; SSE41-NEXT: packuswb %xmm0, %xmm2 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; SSE41-NEXT: pand %xmm4, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: packuswb %xmm2, %xmm0 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: pand %xmm0, %xmm4 -; SSE41-NEXT: packuswb %xmm2, %xmm4 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero +; SSE41-NEXT: pmullw %xmm9, %xmm6 +; SSE41-NEXT: pmullw %xmm11, %xmm2 +; SSE41-NEXT: pmullw %xmm2, %xmm7 +; SSE41-NEXT: pmullw %xmm7, %xmm3 +; SSE41-NEXT: pmullw %xmm6, %xmm3 +; SSE41-NEXT: pmullw %xmm1, %xmm3 +; SSE41-NEXT: pmullw %xmm10, %xmm3 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] +; SSE41-NEXT: pmullw %xmm3, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] ; SSE41-NEXT: pmullw %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrld $8, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index 4489fd51035aa..81a6f60f034f1 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -5058,10 +5058,10 @@ define <4 x i32> @mul_add_const_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwi ; SSE-LABEL: mul_add_const_v4i64_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3] -; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,3,3] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] ; SSE-NEXT: pmuludq %xmm2, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,3,3] ; SSE-NEXT: pmuludq %xmm3, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 @@ -5084,10 +5084,10 @@ define <4 x i32> @mul_add_self_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwin ; SSE-LABEL: mul_add_self_v4i64_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3] -; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,3,3] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] ; SSE-NEXT: pmuludq %xmm2, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,3,3] ; SSE-NEXT: pmuludq %xmm3, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; SSE-NEXT: paddd %xmm0, %xmm0 @@ -5110,10 +5110,10 @@ define <4 x i32> @mul_add_multiuse_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nou ; SSE-LABEL: mul_add_multiuse_v4i64_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3] -; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,3,3] ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,1,1,3] ; SSE-NEXT: pmuludq %xmm2, %xmm4 -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,3,3] ; SSE-NEXT: pmuludq %xmm3, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2],xmm1[0,2] ; SSE-NEXT: paddd %xmm4, %xmm0 From fd63e46941fc48d4cc777ef94e185637898d0adb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jun 2020 14:52:14 -0400 Subject: [PATCH 186/600] AMDGPU/GlobalISel: Apply load bitcast to s.buffer.load intrinsic Should also apply this to the non-scalar buffer loads. --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 68 +++++++++------ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 4 +- .../legalize-llvm.amdgcn.s.buffer.load.mir | 85 +++++++++++++++++-- 3 files changed, 121 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index cc97e11707ab1..b40870024cc49 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -122,20 +122,23 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) { }; } +static LLT getBitcastRegisterType(const LLT Ty) { + const unsigned Size = Ty.getSizeInBits(); + + LLT CoercedTy; + if (Size <= 32) { + // <2 x s8> -> s16 + // <4 x s8> -> s32 + return LLT::scalar(Size); + } + + return LLT::scalarOrVector(Size / 32, 32); +} + static LegalizeMutation bitcastToRegisterType(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; - unsigned Size = Ty.getSizeInBits(); - - LLT CoercedTy; - if (Size <= 32) { - // <2 x s8> -> s16 - // <4 x s8> -> s32 - CoercedTy = LLT::scalar(Size); - } else - CoercedTy = LLT::scalarOrVector(Size / 32, 32); - - return std::make_pair(TypeIdx, CoercedTy); + return std::make_pair(TypeIdx, getBitcastRegisterType(Ty)); }; } @@ -335,6 +338,20 @@ static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query, !loadStoreBitcastWorkaround(Ty); } +/// Return true if a load or store of the type should be lowered with a bitcast +/// to a different type. +static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty, + const unsigned MemSizeInBits) { + const unsigned Size = Ty.getSizeInBits(); + if (Size != MemSizeInBits) + return Size <= 32 && Ty.isVector(); + + if (loadStoreBitcastWorkaround(Ty) && isRegisterType(Ty)) + return true; + return Ty.isVector() && (Size <= 32 || isRegisterSize(Size)) && + !isRegisterVectorElementType(Ty.getElementType()); +} + AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const GCNTargetMachine &TM) : ST(ST_) { @@ -1048,16 +1065,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // 16-bit vector parts. Actions.bitcastIf( [=](const LegalityQuery &Query) -> bool { - const LLT Ty = Query.Types[0]; - const unsigned Size = Ty.getSizeInBits(); - - if (Size != Query.MMODescrs[0].SizeInBits) - return Size <= 32 && Ty.isVector(); - - if (loadStoreBitcastWorkaround(Ty) && isRegisterType(Ty)) - return true; - return Ty.isVector() && (Size <= 32 || isRegisterSize(Size)) && - !isRegisterVectorElementType(Ty.getElementType()); + return shouldBitcastLoadStoreType(ST, Query.Types[0], + Query.MMODescrs[0].SizeInBits); }, bitcastToRegisterType(0)); Actions @@ -4137,8 +4146,10 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( } bool AMDGPULegalizerInfo::legalizeSBufferLoad( - MachineInstr &MI, MachineIRBuilder &B, - GISelChangeObserver &Observer) const { + LegalizerHelper &Helper, MachineInstr &MI) const { + MachineIRBuilder &B = Helper.MIRBuilder; + GISelChangeObserver &Observer = Helper.Observer; + Register Dst = MI.getOperand(0).getReg(); LLT Ty = B.getMRI()->getType(Dst); unsigned Size = Ty.getSizeInBits(); @@ -4146,6 +4157,13 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad( Observer.changingInstr(MI); + if (shouldBitcastLoadStoreType(ST, Ty, Size)) { + Ty = getBitcastRegisterType(Ty); + Helper.bitcastDst(MI, Ty, 0); + Dst = MI.getOperand(0).getReg(); + B.setInsertPt(B.getMBB(), MI); + } + // FIXME: We don't really need this intermediate instruction. The intrinsic // should be fixed to have a memory operand. Since it's readnone, we're not // allowed to add one. @@ -4167,8 +4185,6 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad( // always be legal. We may need to restore this to a 96-bit result if it turns // out this needs to be converted to a vector load during RegBankSelect. if (!isPowerOf2_32(Size)) { - LegalizerHelper Helper(MF, *this, Observer, B); - if (Ty.isVector()) Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0); else @@ -4360,7 +4376,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return true; } case Intrinsic::amdgcn_s_buffer_load: - return legalizeSBufferLoad(MI, B, Helper.Observer); + return legalizeSBufferLoad(Helper, MI); case Intrinsic::amdgcn_raw_buffer_store: case Intrinsic::amdgcn_struct_buffer_store: return legalizeBufferStore(MI, MRI, B, false, false); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index fe4e17db48a6b..332d675c1a88e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -167,9 +167,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { GISelChangeObserver &Observer, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const; - bool legalizeSBufferLoad( - MachineInstr &MI, MachineIRBuilder &B, - GISelChangeObserver &Observer) const; + bool legalizeSBufferLoad(LegalizerHelper &Helper, MachineInstr &MI) const; bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B, bool IsInc) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir index 8860ca6ba5e58..9aee145ec1d15 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -67,9 +67,10 @@ body: | ; GCN-LABEL: name: s_buffer_load_v6s16 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s16>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) - ; GCN: [[EXTRACT:%[0-9]+]]:_(<6 x s16>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<8 x s16>), 0 - ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), 0 + ; GCN: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; GCN: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<6 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -124,13 +125,83 @@ body: | ; GCN-LABEL: name: s_buffer_load_v12s8 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<16 x s8>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) - ; GCN: [[EXTRACT:%[0-9]+]]:_(<12 x s8>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<16 x s8>), 0 - ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<12 x s8>) + ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) + ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), 0 + ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s32>) + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GCN: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GCN: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; GCN: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; GCN: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; GCN: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) + ; GCN: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; GCN: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; GCN: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) + ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C4]] + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C4]] + ; GCN: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GCN: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GCN: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; GCN: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C4]] + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; GCN: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C4]] + ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GCN: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; GCN: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C4]] + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; GCN: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C4]] + ; GCN: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GCN: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GCN: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) + ; GCN: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C4]] + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) + ; GCN: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C4]] + ; GCN: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GCN: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GCN: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) + ; GCN: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C4]] + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) + ; GCN: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C4]] + ; GCN: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) + ; GCN: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GCN: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) + ; GCN: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C4]] + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) + ; GCN: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C4]] + ; GCN: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GCN: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GCN: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GCN: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 - S_ENDPGM 0, implicit %2 + %3:_(<12 x s16>) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 ... From f19a9be385ef782140d1e551e03553daa79d1bc1 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 3 Aug 2020 08:58:41 -0400 Subject: [PATCH 187/600] Revert "[InstCombine] reduce xor-of-or's bitwise logic (PR46955)" This reverts commit 2265d01f2a5bd153959701e22f5be2a40e1674a3. Seeing bot failures after this change like: http://lab.llvm.org:8011/builders/clang-cmake-x86_64-sde-avx512-linux/builds/42586 --- .../InstCombine/InstCombineAndOrXor.cpp | 14 ----------- llvm/test/Transforms/InstCombine/xor.ll | 24 +++++++++---------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index ef1e8dbe01558..030d2f203ed6c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3351,20 +3351,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { match(Op1, m_Not(m_Specific(A)))) return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); - // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants. - // TODO: Loosen one-use restriction if common operand is a constant. - Value *D; - if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B)))) && - match(Op1, m_OneUse(m_Or(m_Value(C), m_Value(D))))) { - if (B == C || B == D) - std::swap(A, B); - if (A == C) - std::swap(C, D); - if (A == D) - return BinaryOperator::CreateAnd(Builder.CreateXor(B, C), - Builder.CreateNot(A)); - } - if (auto *LHS = dyn_cast(I.getOperand(0))) if (auto *RHS = dyn_cast(I.getOperand(1))) if (Value *V = foldXorOfICmps(LHS, RHS, I)) diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll index 2bdb837bff041..a133f2a0e009b 100644 --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -915,9 +915,9 @@ define <2 x i32> @test51vec(<2 x i32> %x, <2 x i32> %y) { define i4 @or_or_xor(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_or_xor( -; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[Z:%.*]], -1 -; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] ; CHECK-NEXT: ret i4 [[R]] ; %o1 = or i4 %z, %x @@ -928,9 +928,9 @@ define i4 @or_or_xor(i4 %x, i4 %y, i4 %z) { define i4 @or_or_xor_commute1(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_or_xor_commute1( -; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[Z:%.*]], -1 -; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[O1:%.*]] = or i4 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] ; CHECK-NEXT: ret i4 [[R]] ; %o1 = or i4 %x, %z @@ -941,9 +941,9 @@ define i4 @or_or_xor_commute1(i4 %x, i4 %y, i4 %z) { define i4 @or_or_xor_commute2(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_or_xor_commute2( -; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[Z:%.*]], -1 -; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: [[O2:%.*]] = or i4 [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] ; CHECK-NEXT: ret i4 [[R]] ; %o1 = or i4 %z, %x @@ -954,9 +954,9 @@ define i4 @or_or_xor_commute2(i4 %x, i4 %y, i4 %z) { define <2 x i4> @or_or_xor_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { ; CHECK-LABEL: @or_or_xor_commute3( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i4> [[Z:%.*]], -; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[O1:%.*]] = or <2 x i4> [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[O2:%.*]] = or <2 x i4> [[Y:%.*]], [[Z]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[O1]], [[O2]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %o1 = or <2 x i4> %x, %z From 1782fbbc69482e76eee8af203694bb771a44c921 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 2 Feb 2020 17:42:02 -0500 Subject: [PATCH 188/600] GlobalISel: Reimplement moreElementsVectorDst Use pad with undef and unmerge with unused results. This is annoyingly similar to several other places in LegalizerHelper, but they're all slightly different. --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 4 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 38 +++- .../AMDGPU/GlobalISel/legalize-and.mir | 55 +++-- .../AMDGPU/GlobalISel/legalize-ashr.mir | 15 +- .../AMDGPU/GlobalISel/legalize-extract.mir | 84 ++++--- .../AMDGPU/GlobalISel/legalize-fabs.mir | 64 +++--- .../AMDGPU/GlobalISel/legalize-fadd.mir | 95 ++++---- .../GlobalISel/legalize-fcanonicalize.mir | 60 ++--- .../AMDGPU/GlobalISel/legalize-fcos.mir | 58 ++--- .../AMDGPU/GlobalISel/legalize-fdiv.mir | 157 +++++++------ .../AMDGPU/GlobalISel/legalize-ffloor.mir | 58 ++--- .../AMDGPU/GlobalISel/legalize-fma.mir | 130 ++++++----- .../AMDGPU/GlobalISel/legalize-fmaxnum.mir | 5 +- .../AMDGPU/GlobalISel/legalize-fminnum.mir | 5 +- .../AMDGPU/GlobalISel/legalize-fmul.mir | 95 ++++---- .../AMDGPU/GlobalISel/legalize-fneg.mir | 64 +++--- .../AMDGPU/GlobalISel/legalize-fpext.mir | 12 +- .../AMDGPU/GlobalISel/legalize-freeze.mir | 34 +-- .../AMDGPU/GlobalISel/legalize-fsin.mir | 58 ++--- .../AMDGPU/GlobalISel/legalize-fsqrt.mir | 58 ++--- .../AMDGPU/GlobalISel/legalize-fsub.mir | 91 ++++---- .../GlobalISel/legalize-implicit-def.mir | 39 ++-- .../AMDGPU/GlobalISel/legalize-insert.mir | 35 +-- .../legalize-llvm.amdgcn.image.load.2d.d16.ll | 26 ++- .../legalize-llvm.amdgcn.s.buffer.load.mir | 70 +++--- .../GlobalISel/legalize-load-constant.mir | 200 ++++++++++------- .../AMDGPU/GlobalISel/legalize-load-flat.mir | 175 +++++++++------ .../GlobalISel/legalize-load-global.mir | 210 +++++++++++------- .../AMDGPU/GlobalISel/legalize-load-local.mir | 175 +++++++++------ .../GlobalISel/legalize-load-private.mir | 180 +++++++++------ .../AMDGPU/GlobalISel/legalize-lshr.mir | 15 +- .../CodeGen/AMDGPU/GlobalISel/legalize-or.mir | 55 +++-- .../AMDGPU/GlobalISel/legalize-phi.mir | 5 +- .../AMDGPU/GlobalISel/legalize-saddsat.mir | 24 +- .../AMDGPU/GlobalISel/legalize-select.mir | 10 +- .../AMDGPU/GlobalISel/legalize-shl.mir | 15 +- .../AMDGPU/GlobalISel/legalize-smax.mir | 83 ++++--- .../AMDGPU/GlobalISel/legalize-smin.mir | 83 ++++--- .../AMDGPU/GlobalISel/legalize-ssubsat.mir | 24 +- .../AMDGPU/GlobalISel/legalize-uaddsat.mir | 24 +- .../AMDGPU/GlobalISel/legalize-umax.mir | 83 ++++--- .../AMDGPU/GlobalISel/legalize-umin.mir | 83 ++++--- .../GlobalISel/legalize-unmerge-values.mir | 12 +- .../AMDGPU/GlobalISel/legalize-usubsat.mir | 24 +- .../AMDGPU/GlobalISel/legalize-xor.mir | 55 +++-- .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 144 +++++++----- .../regbankselect-amdgcn.s.buffer.load.ll | 26 ++- .../GlobalISel/LegalizerHelperTest.cpp | 3 +- 48 files changed, 1816 insertions(+), 1302 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index e819dca5bdf0f..dfd27bd5f7c5f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -154,6 +154,10 @@ class LegalizerHelper { /// def by inserting a G_BITCAST from \p CastTy void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx); + /// Widen \p OrigReg to \p WideTy by merging to a wider type, padding with + /// G_IMPLICIT_DEF, and producing dead results. + Register widenWithUnmerge(LLT WideTy, Register OrigReg); + private: LegalizeResult widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 920c9e008012e..c11f91bc0a6ca 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1317,10 +1317,8 @@ void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); - Register DstExt = MRI.createGenericVirtualRegister(WideTy); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MIRBuilder.buildExtract(MO, DstExt, 0); - MO.setReg(DstExt); + MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); } void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, @@ -1488,6 +1486,40 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { + Register WideReg = MRI.createGenericVirtualRegister(WideTy); + LLT OrigTy = MRI.getType(OrigReg); + LLT LCMTy = getLCMType(WideTy, OrigTy); + + const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); + const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); + + Register UnmergeSrc = WideReg; + + // Create a merge to the LCM type, padding with undef + // %0:_(<3 x s32>) = G_FOO => <4 x s32> + // => + // %1:_(<4 x s32>) = G_FOO + // %2:_(<4 x s32>) = G_IMPLICIT_DEF + // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 + // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 + if (NumMergeParts > 1) { + Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); + SmallVector MergeParts(NumMergeParts, Undef); + MergeParts[0] = WideReg; + UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); + } + + // Unmerge to the original register and pad with dead defs. + SmallVector UnmergeResults(NumUnmergeParts); + UnmergeResults[0] = OrigReg; + for (int I = 1; I != NumUnmergeParts; ++I) + UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); + + MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); + return WideReg; +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index 3d26cefc4f55d..ef7e4e2f15a2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -361,15 +361,18 @@ body: | ; CHECK-LABEL: name: test_and_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CHECK: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]] - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[AND]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[AND]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF @@ -403,27 +406,32 @@ body: | ; CHECK-LABEL: name: test_and_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[DEF1]](<6 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<5 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 + ; CHECK: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) + ; CHECK: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0 + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 ; CHECK: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT2]], [[INSERT3]] - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[AND]](<4 x s16>), 0 - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV1]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV3]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[AND]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) + ; CHECK: [[UV16:%[0-9]+]]:_(<3 x s16>), [[UV17:%[0-9]+]]:_(<3 x s16>), [[UV18:%[0-9]+]]:_(<3 x s16>), [[UV19:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 ; CHECK: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]] - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[AND1]](<4 x s16>), 0 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT2]](<3 x s16>), [[EXTRACT3]](<3 x s16>) - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[AND1]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) + ; CHECK: [[UV20:%[0-9]+]]:_(<3 x s16>), [[UV21:%[0-9]+]]:_(<3 x s16>), [[UV22:%[0-9]+]]:_(<3 x s16>), [[UV23:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) + ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV16]](<3 x s16>), [[UV20]](<3 x s16>) + ; CHECK: [[CONCAT_VECTORS5:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS4]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>), [[UV26:%[0-9]+]]:_(<5 x s16>), [[UV27:%[0-9]+]]:_(<5 x s16>), [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>) ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[EXTRACT4]](<5 x s16>), 0 + ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV24]](<5 x s16>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF @@ -463,8 +471,9 @@ body: | ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) + ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index c4d511ddda3ca..a39e97cf3309e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -863,14 +863,17 @@ body: | ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT3]], [[EXTRACT5]](s16) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT6]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[ASHR]](<2 x s16>), 0 - ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 - ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT7]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[ASHR1]](s16), 32 - ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 - ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT8]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT7]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index 9bb25356c950c..0e23fba6afaaa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -522,11 +522,14 @@ body: | ; CHECK-LABEL: name: extract_s8_v3s8_offset16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC1]](<4 x s8>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s8>), [[UV1:%[0-9]+]]:_(<3 x s8>), [[UV2:%[0-9]+]]:_(<3 x s8>), [[UV3:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV]](<3 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16) ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -538,12 +541,12 @@ body: | ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT2]](s16) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 @@ -558,13 +561,16 @@ body: | ; CHECK-LABEL: name: extract_s8_v5s1_offset4 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s1>) = G_TRUNC [[DEF]](<6 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s1>) = G_EXTRACT [[TRUNC]](<6 x s1>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s1), [[UV1:%[0-9]+]]:_(s1), [[UV2:%[0-9]+]]:_(s1), [[UV3:%[0-9]+]]:_(s1), [[UV4:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[EXTRACT]](<5 x s1>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s1) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s1) + ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<6 x s1>) = G_TRUNC [[DEF1]](<6 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s1>) = G_CONCAT_VECTORS [[TRUNC]](<6 x s1>), [[TRUNC1]](<6 x s1>), [[TRUNC1]](<6 x s1>), [[TRUNC1]](<6 x s1>), [[TRUNC1]](<6 x s1>) + ; CHECK: [[UV:%[0-9]+]]:_(<5 x s1>), [[UV1:%[0-9]+]]:_(<5 x s1>), [[UV2:%[0-9]+]]:_(<5 x s1>), [[UV3:%[0-9]+]]:_(<5 x s1>), [[UV4:%[0-9]+]]:_(<5 x s1>), [[UV5:%[0-9]+]]:_(<5 x s1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s1>) + ; CHECK: [[UV6:%[0-9]+]]:_(s1), [[UV7:%[0-9]+]]:_(s1), [[UV8:%[0-9]+]]:_(s1), [[UV9:%[0-9]+]]:_(s1), [[UV10:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[UV]](<5 x s1>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s1) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s1) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s1) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s1) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s1) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT]](s16) ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ANYEXT1]](s16) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -581,12 +587,12 @@ body: | ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<5 x s16>), 0 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT2]](s16) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<5 x s16>), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT5]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_EXTRACT %0, 4 @@ -850,10 +856,12 @@ body: | ; CHECK-LABEL: name: extract_s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT1]](s16) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(s16) = G_EXTRACT %0, 0 @@ -933,10 +941,12 @@ body: | ; CHECK-LABEL: name: extract_v2s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; CHECK: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -949,10 +959,12 @@ body: | ; CHECK-LABEL: name: extract_v2s16_v5s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 0 - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<5 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0 - ; CHECK: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir index e693766954c98..0857d286ff5c9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -209,13 +209,15 @@ body: | ; SI-LABEL: name: test_fabs_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) @@ -231,21 +233,23 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST2]] ; SI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]] - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) @@ -261,34 +265,36 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST2]] ; VI: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BITCAST3]] - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF1]](s32) - ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[FABS:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR_TRUNC]] ; GFX9: [[FABS1:%[0-9]+]]:_(<2 x s16>) = G_FABS [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF2]](<2 x s16>) - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FABS]](<2 x s16>), [[FABS1]](<2 x s16>), [[DEF3]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FABS %0 S_NOP 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir index 50d846ca2367a..1235d999f2e9c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -330,26 +330,29 @@ body: | ; SI-LABEL: name: test_fadd_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -374,31 +377,34 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fadd_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[TRUNC3]] @@ -414,44 +420,47 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fadd_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32) - ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF3]](s32) + ; GFX9: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF3]](s32) ; GFX9: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<2 x s16>), [[FADD1]](<2 x s16>), [[DEF3]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<2 x s16>), [[FADD1]](<2 x s16>), [[DEF4]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FADD %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir index 9efb04c35f6e9..e5b9fe0b86562 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -222,15 +222,17 @@ body: | ; SI-LABEL: name: test_fcanonicalize_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -252,20 +254,22 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fcanonicalize_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] @@ -281,31 +285,33 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fcanonicalize_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF1]](s32) - ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FCANONICALIZE]](<2 x s16>), [[FCANONICALIZE1]](<2 x s16>), [[DEF2]](<2 x s16>) - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FCANONICALIZE]](<2 x s16>), [[FCANONICALIZE1]](<2 x s16>), [[DEF3]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FCANONICALIZE %0 S_NOP 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir index a6b5fc876f7d6..f97896ba623d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -326,15 +326,17 @@ body: | ; SI-LABEL: name: test_fcos_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -363,20 +365,22 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fcos_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 @@ -399,20 +403,22 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fcos_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 @@ -426,11 +432,11 @@ body: | ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF1]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FCOS %0 S_NOP 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir index 3ed48e39e57e3..3236b8c5a3d06 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -1307,26 +1307,29 @@ body: | ; SI-LABEL: name: test_fdiv_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -1385,31 +1388,34 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fdiv_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -1440,31 +1446,34 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fdiv_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -1489,33 +1498,36 @@ body: | ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF3]](s32) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16 ; GFX9-UNSAFE: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9-UNSAFE: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9-UNSAFE: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-UNSAFE: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-UNSAFE: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9-UNSAFE: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-UNSAFE: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-UNSAFE: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9-UNSAFE: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9-UNSAFE: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-UNSAFE: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; GFX9-UNSAFE: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9-UNSAFE: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) @@ -1528,33 +1540,36 @@ body: | ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-UNSAFE: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) - ; GFX9-UNSAFE: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) - ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-UNSAFE: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9-UNSAFE: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX9-UNSAFE: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF3]](s32) + ; GFX9-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-UNSAFE: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; GFX9-UNSAFE: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX10-LABEL: name: test_fdiv_v3s16 ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX10: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX10: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX10: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX10: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX10: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX10: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX10: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX10: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX10: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX10: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -1579,11 +1594,11 @@ body: | ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX10: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX10: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX10: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF3]](s32) + ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX10: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; GFX10: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FDIV %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir index b1eec4dfee3d3..4466b4c521c56 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -322,15 +322,17 @@ body: | ; SI-LABEL: name: test_ffloor_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -352,20 +354,22 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_ffloor_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] @@ -381,20 +385,22 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_ffloor_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] @@ -404,11 +410,11 @@ body: | ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF1]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FFLOOR %0 S_NOP 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir index 06f237f7a3050..56c51dfa726e3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -393,37 +393,41 @@ body: | ; SI-LABEL: name: test_fma_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 - ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 + ; SI: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -451,42 +455,46 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; SI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>) + ; SI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fma_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 - ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 + ; VI: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] @@ -502,57 +510,61 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; VI: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>) + ; VI: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fma_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF2]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF3]](s32) - ; GFX9: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9: [[DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF4]](s32) + ; GFX9: [[DEF5:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF3]](s32) - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF4]](s32) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 + ; GFX9: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[DEF3]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[DEF4]](s32) ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]] ; GFX9: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<2 x s16>), [[FMA1]](<2 x s16>), [[DEF4]](<2 x s16>) - ; GFX9: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT3]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<2 x s16>), [[FMA1]](<2 x s16>), [[DEF5]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS3]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index 8b9b0e972e6f4..24253aaeb4c38 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -506,8 +506,9 @@ body: | ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index f6456cd57f01e..723168a2a8ec3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -506,8 +506,9 @@ body: | ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir index 45130639f696f..ea9b1e66d7e13 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -329,26 +329,29 @@ body: | ; SI-LABEL: name: test_fmul_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -373,31 +376,34 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fmul_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC3]] @@ -413,44 +419,47 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fmul_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32) - ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF3]](s32) + ; GFX9: [[DEF4:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[COPY4]](s32) ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[DEF3]](s32) ; GFX9: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMUL]](<2 x s16>), [[FMUL1]](<2 x s16>), [[DEF3]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMUL]](<2 x s16>), [[FMUL1]](<2 x s16>), [[DEF4]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FMUL %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir index 75e498b3a2e75..35f229088167c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -207,13 +207,15 @@ body: | ; SI-LABEL: name: test_fneg_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) @@ -229,21 +231,23 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; SI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; SI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST2]] ; SI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]] - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fneg_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) @@ -259,34 +263,36 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; VI: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; VI: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST2]] ; VI: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BITCAST3]] - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF2]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fneg_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF1]](s32) - ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[DEF2]](s32) + ; GFX9: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR_TRUNC]] ; GFX9: [[FNEG1:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF2]](<2 x s16>) - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FNEG]](<2 x s16>), [[FNEG1]](<2 x s16>), [[DEF3]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FNEG %0 S_NOP 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir index c6ba0b60d99dd..844c972a6dfab 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir @@ -70,15 +70,17 @@ body: | ; CHECK-LABEL: name: test_fpext_v3f16_to_v3f32 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir index 460d23b441a0b..b2a525da27515 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -490,8 +490,9 @@ body: | ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[INSERT]](<4 x s1>) ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT]] ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[FREEZE]](<4 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s1>) = G_EXTRACT [[TRUNC]](<4 x s1>), 0 - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s1>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s1>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s1>), [[DEF1]](<4 x s1>), [[DEF1]](<4 x s1>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s1>), [[UV1:%[0-9]+]]:_(<3 x s1>), [[UV2:%[0-9]+]]:_(<3 x s1>), [[UV3:%[0-9]+]]:_(<3 x s1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s1>) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV]](<3 x s1>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT1]](<3 x s32>) %0:_(<3 x s1>) = G_IMPLICIT_DEF %1:_(<3 x s1>) = G_FREEZE %0 @@ -530,8 +531,9 @@ body: | ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[INSERT]](<4 x s8>) ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT]] ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[FREEZE]](<4 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC1]](<4 x s8>), 0 - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[TRUNC1]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s8>), [[UV1:%[0-9]+]]:_(<3 x s8>), [[UV2:%[0-9]+]]:_(<3 x s8>), [[UV3:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV]](<3 x s8>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT1]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s8>) = G_TRUNC %0 @@ -573,13 +575,14 @@ body: | ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[INSERT]] - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[FREEZE]](<4 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[FREEZE]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) + ; CHECK: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0 + ; CHECK: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) @@ -628,15 +631,16 @@ body: | ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<5 x s16>), 0 ; CHECK: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[INSERT]] - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[FREEZE]](<6 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<5 x s16>), 0 - ; CHECK: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[FREEZE]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>), [[DEF2]](<6 x s16>) + ; CHECK: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) + ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0 + ; CHECK: [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir index 20ba990e8bdb1..740f023b93ab1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -326,15 +326,17 @@ body: | ; SI-LABEL: name: test_fsin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -363,20 +365,22 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fsin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 @@ -399,20 +403,22 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fsin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3118 @@ -426,11 +432,11 @@ body: | ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT2]](s16) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF1]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FSIN %0 S_NOP 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir index 9508109707cc1..4be1a32f81148 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -246,15 +246,17 @@ body: | ; SI-LABEL: name: test_fsqrt_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) @@ -276,20 +278,22 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fsqrt_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] @@ -305,20 +309,22 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fsqrt_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] @@ -328,11 +334,11 @@ body: | ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF1]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT1]](<3 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FSQRT %0 S_NOP 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir index 7df607de5ed84..710e0b405fc9c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -374,26 +374,29 @@ body: | ; SI-LABEL: name: test_fsub_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC3]] @@ -421,31 +424,34 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_fsub_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC3]] @@ -464,31 +470,34 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_fsub_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC3]] @@ -501,11 +510,11 @@ body: | ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD2]](s16) - ; GFX9: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF2]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[DEF3]](s32) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; GFX9: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FSUB %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index 1ce85a95ef6db..993bb6a437561 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -377,11 +377,14 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s1 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s1>) = G_EXTRACT [[TRUNC]](<4 x s1>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s1), [[UV1:%[0-9]+]]:_(s1), [[UV2:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s1>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s1) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s1>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s1>), [[TRUNC1]](<4 x s1>), [[TRUNC1]](<4 x s1>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s1>), [[UV1:%[0-9]+]]:_(<3 x s1>), [[UV2:%[0-9]+]]:_(<3 x s1>), [[UV3:%[0-9]+]]:_(<3 x s1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s1>) + ; CHECK: [[UV4:%[0-9]+]]:_(s1), [[UV5:%[0-9]+]]:_(s1), [[UV6:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[UV]](<3 x s1>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s1) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s1) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s1) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s1>) = G_IMPLICIT_DEF @@ -411,11 +414,14 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s8>), [[TRUNC1]](<4 x s8>), [[TRUNC1]](<4 x s8>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s8>), [[UV1:%[0-9]+]]:_(<3 x s8>), [[UV2:%[0-9]+]]:_(<3 x s8>), [[UV3:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV]](<3 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF @@ -442,9 +448,10 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %1(<4 x s16>), %1(<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<4 x s16>) = G_IMPLICIT_DEF @@ -471,9 +478,11 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 0 - ; CHECK: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<5 x s16>), 0 + ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) + ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir index 37971d389fe4e..99ff3fff27dda 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -843,8 +843,9 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -869,8 +870,9 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 16 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -895,8 +897,9 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -920,8 +923,9 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -944,8 +948,9 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 16 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -968,8 +973,9 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -992,8 +998,9 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 16 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index a8ab9c7582751..d2c89ab19927e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -768,13 +768,14 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %16(<4 x s16>), %16(<4 x s16>) + ; UNPACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 - ; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) + ; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 + ; UNPACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; UNPACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>) ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): @@ -790,13 +791,14 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %16(<4 x s16>), %16(<4 x s16>) + ; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; PACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 - ; PACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>) + ; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; PACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32 + ; PACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; PACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>) ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir index 9aee145ec1d15..b75ec76d7ff44 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -30,8 +30,10 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) - ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), 0 - ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>) + ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -49,8 +51,10 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x p3>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) - ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x p3>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x p3>), 0 - ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x p3>) + ; GCN: [[DEF:%[0-9]+]]:_(<4 x p3>) = G_IMPLICIT_DEF + ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x p3>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x p3>), [[DEF]](<4 x p3>), [[DEF]](<4 x p3>) + ; GCN: [[UV:%[0-9]+]]:_(<3 x p3>), [[UV1:%[0-9]+]]:_(<3 x p3>), [[UV2:%[0-9]+]]:_(<3 x p3>), [[UV3:%[0-9]+]]:_(<3 x p3>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x p3>) + ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x p3>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -68,8 +72,10 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) - ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), 0 - ; GCN: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[UV]](<3 x s32>) ; GCN: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 @@ -88,8 +94,10 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 24, align 4) - ; GCN: [[EXTRACT:%[0-9]+]]:_(<6 x s32>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>), 0 - ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s32>) + ; GCN: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>), [[DEF]](<8 x s32>), [[DEF]](<8 x s32>) + ; GCN: [[UV:%[0-9]+]]:_(<6 x s32>), [[UV1:%[0-9]+]]:_(<6 x s32>), [[UV2:%[0-9]+]]:_(<6 x s32>), [[UV3:%[0-9]+]]:_(<6 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<24 x s32>) + ; GCN: S_ENDPGM 0, implicit [[UV]](<6 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -107,8 +115,10 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 24, align 4) - ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>), 0 - ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s64>) + ; GCN: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>), [[DEF]](<4 x s64>), [[DEF]](<4 x s64>) + ; GCN: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x s64>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -126,29 +136,31 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) - ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), 0 - ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s32>) + ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; GCN: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; GCN: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GCN: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) - ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; GCN: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32) + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) - ; GCN: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; GCN: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; GCN: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; GCN: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) + ; GCN: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) + ; GCN: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32) ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32) - ; GCN: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; GCN: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; GCN: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; GCN: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) + ; GCN: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) + ; GCN: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32) + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32) ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) @@ -195,8 +207,8 @@ body: | ; GCN: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) ; GCN: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] ; GCN: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GCN: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) + ; GCN: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GCN: S_ENDPGM 0, implicit [[CONCAT_VECTORS1]](<12 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -215,8 +227,10 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load 12, align 4) - ; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), 0 - ; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>) + ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index d91886d1626da..739b76cad892c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -5884,37 +5884,42 @@ body: | ; CI-LABEL: name: test_load_constant_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load 6, align 8, addrspace 4) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 4) @@ -5992,15 +5997,18 @@ body: | ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -6022,15 +6030,18 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -6046,15 +6057,18 @@ body: | ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -6076,15 +6090,18 @@ body: | ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -6100,15 +6117,18 @@ body: | ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 2 + 4, addrspace 4) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 4) @@ -6172,15 +6192,18 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_constant_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -6225,15 +6248,18 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-LABEL: name: test_load_constant_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -6275,15 +6301,18 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_constant_v3s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -6334,15 +6363,18 @@ body: | ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -6384,15 +6416,18 @@ body: | ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 - ; GFX9-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 4) @@ -8262,37 +8297,42 @@ body: | ; CI-LABEL: name: test_load_constant_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_constant_v3s64_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_constant_v3s64_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load 24, align 32, addrspace 4) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index a3f43848e8feb..034f3903c6538 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -5924,37 +5924,42 @@ body: | ; CI-LABEL: name: test_load_flat_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load 6, align 8) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 0) @@ -6032,15 +6037,18 @@ body: | ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -6062,15 +6070,18 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -6086,15 +6097,18 @@ body: | ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v3s16_align2 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -6116,15 +6130,18 @@ body: | ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align2 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -6140,15 +6157,18 @@ body: | ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 2 + 4) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 0) @@ -6212,15 +6232,18 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_flat_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -6265,15 +6288,18 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-LABEL: name: test_load_flat_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -6315,15 +6341,18 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_flat_v3s16_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -6374,15 +6403,18 @@ body: | ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_flat_v3s16_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -6424,15 +6456,18 @@ body: | ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 - ; GFX9-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index a8e3aa5c8c7ce..8169aa76bfa6e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -1203,8 +1203,10 @@ body: | ; SI-LABEL: name: test_load_global_s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[UV]](<3 x s32>) ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -5929,44 +5931,50 @@ body: | ; SI-LABEL: name: test_load_global_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-HSA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 1) @@ -6056,15 +6064,18 @@ body: | ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 + 4, addrspace 1) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align2 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6092,15 +6103,18 @@ body: | ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 + 4, addrspace 1) ; CI-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6122,15 +6136,18 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 + 4, addrspace 1) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align2 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6152,15 +6169,18 @@ body: | ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 2 + 4, addrspace 1) ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 1) @@ -6224,15 +6244,18 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-HSA-LABEL: name: test_load_global_v3s16_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6289,15 +6312,18 @@ body: | ; CI-MESA: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-MESA: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_global_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6342,15 +6368,18 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -6398,15 +6427,18 @@ body: | ; GFX9-MESA: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9-MESA: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9-MESA: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9-MESA: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-MESA: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9-MESA: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 - ; GFX9-MESA: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-MESA: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9-MESA: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9-MESA: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 1) @@ -7036,8 +7068,10 @@ body: | ; SI-LABEL: name: test_load_global_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>) ; CI-HSA-LABEL: name: test_load_global_v3s32_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) @@ -8167,44 +8201,50 @@ body: | ; SI-LABEL: name: test_load_global_v3s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; SI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; CI-HSA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; CI-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; CI-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; CI-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; CI-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; CI-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-HSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; GFX9-HSA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; GFX9-HSA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; GFX9-HSA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load 24, align 32, addrspace 1) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[LOAD]](<4 x s64>), %2(<4 x s64>), %2(<4 x s64>) + ; GFX9-MESA: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) ; GFX9-MESA: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 + ; GFX9-MESA: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[UV]](<3 x s64>), 0 ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 1) @@ -12643,23 +12683,25 @@ body: | ; SI-LABEL: name: test_extload_global_v2s96_from_24_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; SI: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[UV]](<3 x s32>) ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 8 + 12, align 4, addrspace 1) ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 4 + 20, addrspace 1) - ; SI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD1]](<2 x s32>), 0 + ; SI: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; SI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF1]], [[LOAD1]](<2 x s32>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD2]](s32), 64 ; SI: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[INSERT1]](<3 x s32>) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s96>) = G_BUILD_VECTOR [[BITCAST]](s96), [[BITCAST1]](s96) - ; SI: [[EXTRACT1:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 0 - ; SI: [[EXTRACT2:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 96 - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT1]](s96) - ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[EXTRACT2]](s96) + ; SI: [[EXTRACT:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 0 + ; SI: [[EXTRACT1:%[0-9]+]]:_(s96) = G_EXTRACT [[BUILD_VECTOR]](<2 x s96>), 96 + ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](s96) + ; SI: $vgpr3_vgpr4_vgpr5 = COPY [[EXTRACT1]](s96) ; CI-HSA-LABEL: name: test_extload_global_v2s96_from_24_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load 12, align 16, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index f5a852bd98f36..376339482f5a1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -6061,37 +6061,42 @@ body: | ; SI-LABEL: name: test_load_local_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 6, align 8, addrspace 3) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[LOAD]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[LOAD]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 3) @@ -6126,15 +6131,18 @@ body: | ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6156,15 +6164,18 @@ body: | ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align2 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6186,15 +6197,18 @@ body: | ; CI-DS128: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; CI-DS128: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-DS128: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI-DS128: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-DS128: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI-DS128: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI-DS128: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI-DS128: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI-DS128: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-DS128: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI-DS128: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6216,15 +6230,18 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6240,15 +6257,18 @@ body: | ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p3) :: (load 2 + 4, addrspace 3) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 3) @@ -6312,15 +6332,18 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-LABEL: name: test_load_local_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6371,15 +6394,18 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-DS128-LABEL: name: test_load_local_v3s16_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6430,15 +6456,18 @@ body: | ; CI-DS128: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI-DS128: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI-DS128: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI-DS128: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI-DS128: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI-DS128: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI-DS128: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI-DS128: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI-DS128: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI-DS128: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-DS128: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI-DS128: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI-DS128: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI-DS128: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI-DS128: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI-DS128: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI-DS128: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI-DS128: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI-DS128: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_local_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6483,15 +6512,18 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-LABEL: name: test_load_local_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 @@ -6533,15 +6565,18 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 3) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index 58f13f172d9ab..085cb619570d2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -5027,15 +5027,18 @@ body: | ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 + 4, align 4, addrspace 5) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5045,15 +5048,18 @@ body: | ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 + 4, align 4, addrspace 5) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5063,15 +5069,18 @@ body: | ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 + 4, align 4, addrspace 5) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5081,15 +5090,18 @@ body: | ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load 2 + 4, align 4, addrspace 5) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[LOAD]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 8, addrspace 5) @@ -5124,15 +5136,18 @@ body: | ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 + 4, addrspace 5) ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align2 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5154,15 +5169,18 @@ body: | ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 + 4, addrspace 5) ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align2 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5184,15 +5202,18 @@ body: | ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 + 4, addrspace 5) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5208,15 +5229,18 @@ body: | ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load 2 + 4, addrspace 5) ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[TRUNC]](s16), 32 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 2, addrspace 5) @@ -5280,15 +5304,18 @@ body: | ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; SI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; SI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; SI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CI-LABEL: name: test_load_private_v3s16_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5339,15 +5366,18 @@ body: | ; CI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) ; CI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[TRUNC5]] ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; CI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; CI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; CI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; CI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; CI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; VI-LABEL: name: test_load_private_v3s16_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5392,15 +5422,18 @@ body: | ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BITCAST]](<2 x s16>), 0 - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; VI: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR3]](s16), 32 - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; VI: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; VI: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; GFX9-LABEL: name: test_load_private_v3s16_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 @@ -5442,15 +5475,18 @@ body: | ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), %2(<4 x s16>), %2(<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT2]], [[OR2]](s16), 32 - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT3]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT2]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT3]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s16>) = G_LOAD %0 :: (load 6, align 1, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir index f5f7baeba73a4..4c69ff2edfa48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -726,14 +726,17 @@ body: | ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT6]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[LSHR]](<2 x s16>), 0 - ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 - ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT7]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[LSHR1]](s16), 32 - ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 - ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT8]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT7]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index b4a5c48eb2e68..a91139fc7fd50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -361,15 +361,18 @@ body: | ; CHECK-LABEL: name: test_or_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT]], [[INSERT1]] - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[OR]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[OR]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF @@ -403,27 +406,32 @@ body: | ; CHECK-LABEL: name: test_or_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[DEF1]](<6 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<5 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 + ; CHECK: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) + ; CHECK: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0 + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT2]], [[INSERT3]] - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[OR]](<4 x s16>), 0 - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV1]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV3]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[OR]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) + ; CHECK: [[UV16:%[0-9]+]]:_(<3 x s16>), [[UV17:%[0-9]+]]:_(<3 x s16>), [[UV18:%[0-9]+]]:_(<3 x s16>), [[UV19:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 ; CHECK: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]] - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[OR1]](<4 x s16>), 0 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT2]](<3 x s16>), [[EXTRACT3]](<3 x s16>) - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[OR1]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) + ; CHECK: [[UV20:%[0-9]+]]:_(<3 x s16>), [[UV21:%[0-9]+]]:_(<3 x s16>), [[UV22:%[0-9]+]]:_(<3 x s16>), [[UV23:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) + ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV16]](<3 x s16>), [[UV20]](<3 x s16>) + ; CHECK: [[CONCAT_VECTORS5:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS4]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>), [[UV26:%[0-9]+]]:_(<5 x s16>), [[UV27:%[0-9]+]]:_(<5 x s16>), [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>) ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[EXTRACT4]](<5 x s16>), 0 + ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV24]](<5 x s16>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF @@ -463,8 +471,9 @@ body: | ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) + ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 9a91d908bb7b0..997dc3a521201 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -174,9 +174,10 @@ body: | ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[PHI]](<4 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[PHI]](<4 x s16>), %7(<4 x s16>), %7(<4 x s16>) + ; CHECK: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir index 51b6e014a9376..d96c88b027e59 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -528,9 +528,11 @@ body: | ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX6: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) @@ -590,9 +592,11 @@ body: | ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) @@ -626,9 +630,11 @@ body: | ; GFX9: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<2 x s16>), [[SADDSAT1]](<2 x s16>), [[DEF2]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_SADDSAT %1, %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir index 88689ae03616f..6dae4e59373c6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -291,8 +291,9 @@ body: | ; CHECK: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]] ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s16) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) + ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 @@ -399,8 +400,9 @@ body: | ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 ; CHECK: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[INSERT]], [[INSERT1]] - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[SELECT]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[SELECT]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(s32) = COPY $vgpr0 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index 10e32d7f87c90..2d8812d5e7e6e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -710,14 +710,17 @@ body: | ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT3]], [[EXTRACT5]](s16) ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT6:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT6]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 ; GFX9: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT4]], [[SHL]](<2 x s16>), 0 - ; GFX9: [[EXTRACT7:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT5]](<4 x s16>), 0 - ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT7]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT5]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT6:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 ; GFX9: [[INSERT7:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT6]], [[SHL1]](s16), 32 - ; GFX9: [[EXTRACT8:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT7]](<4 x s16>), 0 - ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT8]](<3 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT7]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; GFX9: [[INSERT8:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV8]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT8]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir index a1087058ae5d6..0350c1cc53a01 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -349,21 +349,24 @@ body: | ; SI-LABEL: name: test_smax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 @@ -394,31 +397,34 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_smax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] @@ -434,24 +440,27 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] - ; GFX9: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV8]], [[UV10]] + ; GFX9: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV9]], [[UV11]] + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) + ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir index 483681f18de84..0e2dbbc948bdb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -349,21 +349,24 @@ body: | ; SI-LABEL: name: test_smin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 @@ -394,31 +397,34 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_smin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] @@ -434,24 +440,27 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] - ; GFX9: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV8]], [[UV10]] + ; GFX9: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV9]], [[UV11]] + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) + ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir index f38da863cba90..fc1f5ab266ec0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -528,9 +528,11 @@ body: | ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX6: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) @@ -590,9 +592,11 @@ body: | ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) @@ -626,9 +630,11 @@ body: | ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<2 x s16>), [[SSUBSAT1]](<2 x s16>), [[DEF2]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_SSUBSAT %1, %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir index 9d51870b4fed2..e6a7193a3b0f5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -419,9 +419,11 @@ body: | ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX6: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) @@ -460,9 +462,11 @@ body: | ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) @@ -496,9 +500,11 @@ body: | ; GFX9: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<2 x s16>), [[UADDSAT1]](<2 x s16>), [[DEF2]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_UADDSAT %1, %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir index 7a1bf1b4467b4..da102794a474c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -351,21 +351,24 @@ body: | ; SI-LABEL: name: test_umax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) @@ -396,31 +399,34 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_umax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] @@ -436,24 +442,27 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] - ; GFX9: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV8]], [[UV10]] + ; GFX9: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV9]], [[UV11]] + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) + ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir index de480c00783b5..d5cddf28000c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -351,21 +351,24 @@ body: | ; SI-LABEL: name: test_umin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; SI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; SI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; SI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; SI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) @@ -396,31 +399,34 @@ body: | ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; SI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; SI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; SI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; VI-LABEL: name: test_umin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 - ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; VI: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; VI: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; VI: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT1]](<3 x s16>), 0 - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; VI: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] @@ -436,24 +442,27 @@ body: | ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; VI: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; VI: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<4 x s16>), 0 + ; VI: S_NOP 0, implicit [[EXTRACT]](<3 x s16>) ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] - ; GFX9: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>) - ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX9: S_NOP 0, implicit [[EXTRACT2]](<3 x s16>) + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV8]], [[UV10]] + ; GFX9: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV9]], [[UV11]] + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>) + ; GFX9: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>), [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) + ; GFX9: S_NOP 0, implicit [[UV12]](<3 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir index f40ed9796923c..11b9da883008e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -60,13 +60,15 @@ body: | bb.0: ; CHECK-LABEL: name: test_unmerge_s16_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir index 5bb430cf4a062..6b5d9c8cb5c59 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -403,9 +403,11 @@ body: | ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX6: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX6: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX6: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX6: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) @@ -444,9 +446,11 @@ body: | ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; GFX8: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX8: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX8: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX8: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) @@ -480,9 +484,11 @@ body: | ; GFX9: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<2 x s16>), [[USUBSAT1]](<2 x s16>), [[DEF2]](<2 x s16>) ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 - ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[EXTRACT1]](<3 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) + ; GFX9: [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>), [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT]](<3 x s16>), [[UV6]](<3 x s16>) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_USUBSAT %1, %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index 110ed17400d26..3c3987a61cf4f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -361,15 +361,18 @@ body: | ; CHECK-LABEL: name: test_xor_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF1]](<4 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 ; CHECK: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT]], [[INSERT1]] - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[XOR]](<4 x s16>), 0 - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT2]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[XOR]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV8]](<3 x s16>), 0 ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF @@ -403,27 +406,32 @@ body: | ; CHECK-LABEL: name: test_xor_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 0 ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[DEF1]](<6 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<5 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 + ; CHECK: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) + ; CHECK: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV2]](<3 x s16>), 0 + ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 + ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 ; CHECK: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT2]], [[INSERT3]] - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[XOR]](<4 x s16>), 0 - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV1]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV3]](<3 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[XOR]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) + ; CHECK: [[UV16:%[0-9]+]]:_(<3 x s16>), [[UV17:%[0-9]+]]:_(<3 x s16>), [[UV18:%[0-9]+]]:_(<3 x s16>), [[UV19:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 + ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 ; CHECK: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]] - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[XOR1]](<4 x s16>), 0 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[EXTRACT2]](<3 x s16>), [[EXTRACT3]](<3 x s16>) - ; CHECK: [[EXTRACT4:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 + ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[XOR1]](<4 x s16>), [[DEF3]](<4 x s16>), [[DEF3]](<4 x s16>) + ; CHECK: [[UV20:%[0-9]+]]:_(<3 x s16>), [[UV21:%[0-9]+]]:_(<3 x s16>), [[UV22:%[0-9]+]]:_(<3 x s16>), [[UV23:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<12 x s16>) + ; CHECK: [[CONCAT_VECTORS4:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV16]](<3 x s16>), [[UV20]](<3 x s16>) + ; CHECK: [[CONCAT_VECTORS5:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS4]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK: [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>), [[UV26:%[0-9]+]]:_(<5 x s16>), [[UV27:%[0-9]+]]:_(<5 x s16>), [[UV28:%[0-9]+]]:_(<5 x s16>), [[UV29:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS5]](<30 x s16>) ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[EXTRACT4]](<5 x s16>), 0 + ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV24]](<5 x s16>), 0 ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF @@ -463,8 +471,9 @@ body: | ; CHECK: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]] ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[BUILD_VECTOR]](<4 x s8>), 0 - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) + ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index aba47890f61cb..805aa301f9383 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -172,19 +172,24 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) + ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 + ; GFX6: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 + ; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 + ; GFX6: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 + ; GFX6: [[COPY8:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 + ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 + ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 + ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 + ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX7-LABEL: name: s_buffer_load_v3i32 @@ -196,19 +201,24 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) + ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 + ; GFX7: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 + ; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 + ; GFX7: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 + ; GFX7: [[COPY8:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 + ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 + ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 + ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 + ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX8-LABEL: name: s_buffer_load_v3i32 @@ -220,19 +230,24 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0_sub1_sub2 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) + ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 + ; GFX8: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 + ; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 + ; GFX8: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 + ; GFX8: [[COPY8:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 + ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0 + ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1 + ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2 + ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) @@ -1580,13 +1595,20 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX6: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2 - ; GFX6: $vgpr0 = COPY [[COPY6]] - ; GFX6: $vgpr1 = COPY [[COPY7]] - ; GFX6: $vgpr2 = COPY [[COPY8]] + ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; GFX6: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] + ; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 + ; GFX6: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 + ; GFX6: [[COPY8:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 + ; GFX6: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 + ; GFX6: [[COPY10:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 + ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 + ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 + ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 + ; GFX6: $vgpr0 = COPY [[COPY11]] + ; GFX6: $vgpr1 = COPY [[COPY12]] + ; GFX6: $vgpr2 = COPY [[COPY13]] ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): @@ -1599,13 +1621,20 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX7: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2 - ; GFX7: $vgpr0 = COPY [[COPY6]] - ; GFX7: $vgpr1 = COPY [[COPY7]] - ; GFX7: $vgpr2 = COPY [[COPY8]] + ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; GFX7: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] + ; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 + ; GFX7: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 + ; GFX7: [[COPY8:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 + ; GFX7: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 + ; GFX7: [[COPY10:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 + ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 + ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 + ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 + ; GFX7: $vgpr0 = COPY [[COPY11]] + ; GFX7: $vgpr1 = COPY [[COPY12]] + ; GFX7: $vgpr2 = COPY [[COPY13]] ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): @@ -1618,13 +1647,20 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16, align 4) - ; GFX8: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0_sub1_sub2 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub0 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub1 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]].sub2 - ; GFX8: $vgpr0 = COPY [[COPY6]] - ; GFX8: $vgpr1 = COPY [[COPY7]] - ; GFX8: $vgpr2 = COPY [[COPY8]] + ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; GFX8: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] + ; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 + ; GFX8: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 + ; GFX8: [[COPY8:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 + ; GFX8: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 + ; GFX8: [[COPY10:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11 + ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 + ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 + ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 + ; GFX8: $vgpr0 = COPY [[COPY11]] + ; GFX8: $vgpr1 = COPY [[COPY12]] + ; GFX8: $vgpr2 = COPY [[COPY13]] ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index b8e69433913ea..670c9898c2798 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -56,15 +56,17 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load 12, align 4) - ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(<3 x s32>) = G_EXTRACT [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), 0 - ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; CHECK: [[UV:%[0-9]+]]:sgpr(<3 x s32>), [[UV1:%[0-9]+]]:sgpr(<3 x s32>), [[UV2:%[0-9]+]]:sgpr(<3 x s32>), [[UV3:%[0-9]+]]:sgpr(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) ; CHECK: $sgpr2 = COPY [[INT2]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 @@ -232,11 +234,15 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) - ; CHECK: [[EXTRACT:%[0-9]+]]:vgpr(<3 x s32>) = G_EXTRACT [[AMDGPU_BUFFER_LOAD]](<4 x s32>), 0 - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) + ; CHECK: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) + ; CHECK: [[UV:%[0-9]+]]:vgpr(<3 x s32>), [[UV1:%[0-9]+]]:vgpr(<3 x s32>), [[UV2:%[0-9]+]]:vgpr(<3 x s32>), [[UV3:%[0-9]+]]:vgpr(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) + ; CHECK: $vgpr0 = COPY [[UV4]](s32) + ; CHECK: $vgpr1 = COPY [[UV5]](s32) + ; CHECK: $vgpr2 = COPY [[UV6]](s32) ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 2cfab39d45622..3a9fb59cae6f0 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -3041,11 +3041,10 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) { const auto *CheckStr = R"( CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]] - CHECK: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF CHECK: [[CV:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BITCAST]]:_(<2 x s32>), [[UNDEF]] CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[CV]] - CHECK: [[EXTR:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[FREEZE]]:_(<4 x s32>), 0 + CHECK: [[EXTR0:%[0-9]+]]:_(<2 x s32>), [[EXTR1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]]:_(<4 x s32>) )"; // Check From 2414bab5d7d6b5b247f3f3b97140a2673fa8414b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 26 Jul 2020 10:47:08 -0400 Subject: [PATCH 189/600] AMDGPU/GlobalISel: Remove old hacks for boolean selection There were various hacks used to try to avoid making s1 SGPR vs. s1 VCC ambiguous after constraining the register before we had a strategy to deal with this. This also attempted to handle undef operands, which are now illegal gMIR. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 70 ++++--------------- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 ++ .../AMDGPU/GlobalISel/inst-select-brcond.mir | 6 +- .../AMDGPU/GlobalISel/inst-select-phi.mir | 18 ++--- 4 files changed, 31 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 5aceb40933c37..16fc759f0cbf6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -170,19 +170,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) return false; - // Don't constrain the source register to a class so the def instruction - // handles it (unless it's undef). - // - // FIXME: This is a hack. When selecting the def, we neeed to know - // specifically know that the result is VCCRegBank, and not just an SGPR - // with size 1. An SReg_32 with size 1 is ambiguous with wave32. - if (Src.isUndef()) { - const TargetRegisterClass *SrcRC = - TRI.getConstrainedRegClassForOperand(Src, *MRI); - if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) - return false; - } - return true; } @@ -286,50 +273,24 @@ static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { } bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { - MachineOperand &Dst = I.getOperand(0); - MachineOperand &Src0 = I.getOperand(1); - MachineOperand &Src1 = I.getOperand(2); - Register DstReg = Dst.getReg(); + Register DstReg = I.getOperand(0).getReg(); unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); - if (DstRB->getID() == AMDGPU::VCCRegBankID) { - const TargetRegisterClass *RC = TRI.getBoolRC(); - unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), - RC == &AMDGPU::SReg_64RegClass); - I.setDesc(TII.get(InstOpc)); - // Dead implicit-def of scc - I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef - true, // isImp - false, // isKill - true)); // isDead - - // FIXME: Hack to avoid turning the register bank into a register class. - // The selector for G_ICMP relies on seeing the register bank for the result - // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will - // be ambiguous whether it's a scalar or vector bool. - if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg())) - MRI->setRegClass(Src0.getReg(), RC); - if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg())) - MRI->setRegClass(Src1.getReg(), RC); - - return RBI.constrainGenericRegister(DstReg, *RC, *MRI); - } - - // TODO: Should this allow an SCC bank result, and produce a copy from SCC for - // the result? - if (DstRB->getID() == AMDGPU::SGPRRegBankID) { - unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); - I.setDesc(TII.get(InstOpc)); - // Dead implicit-def of scc - I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef - true, // isImp - false, // isKill - true)); // isDead - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); - } + if (DstRB->getID() != AMDGPU::SGPRRegBankID && + DstRB->getID() != AMDGPU::VCCRegBankID) + return false; - return false; + bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID && + STI.isWave64()); + I.setDesc(TII.get(getLogicalBitOpcode(I.getOpcode(), Is64))); + + // Dead implicit-def of scc + I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef + true, // isImp + false, // isKill + true)); // isDead + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { @@ -2338,8 +2299,7 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { CondPhysReg = AMDGPU::SCC; BrOpcode = AMDGPU::S_CBRANCH_SCC1; - // FIXME: Hack for isSCC tests - ConstrainRC = &AMDGPU::SGPR_32RegClass; + ConstrainRC = &AMDGPU::SReg_32RegClass; } else { // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? // We sort of know that a VCC producer based on the register bank, that ands diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 6848f762fc276..0f57d34ba6aaf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -1210,6 +1210,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo, return getWavefrontSize() == 32; } + bool isWave64() const { + return getWavefrontSize() == 64; + } + const TargetRegisterClass *getBoolRC() const { return getRegisterInfo()->getBoolRC(); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir index 496d496a42c58..6adb3549778c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir @@ -20,7 +20,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY2]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: bb.1: @@ -46,7 +46,7 @@ body: | ; GCN-LABEL: name: brcond_scc_impdef ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) - ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN: $scc = COPY [[DEF]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: bb.1: @@ -73,7 +73,7 @@ body: | ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY2]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir index b4ef0caebfc13..4e7c81f5c79ad 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir @@ -17,7 +17,7 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -66,7 +66,7 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -116,7 +116,7 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -165,7 +165,7 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -215,7 +215,7 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -263,7 +263,7 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -314,7 +314,7 @@ body: | ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -363,7 +363,7 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 @@ -412,7 +412,7 @@ body: | ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc ; GCN: $scc = COPY [[COPY3]] ; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc ; GCN: S_BRANCH %bb.2 From 42a9f6c554e378f1c010375eca30f04296aa0052 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 28 Jul 2020 10:15:30 -0400 Subject: [PATCH 190/600] GlobalISel: Handle arbitrary FewerElementsVector for G_IMPLICIT_DEF --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 24 +++++------------- .../AArch64/GlobalISel/legalize-freeze.mir | 3 +-- .../AArch64/GlobalISel/legalize-itofp.mir | 6 ++--- .../AArch64/GlobalISel/legalize-undef.mir | 3 +-- .../AMDGPU/GlobalISel/legalize-freeze.mir | 22 ++++++++-------- .../GlobalISel/legalize-implicit-def.mir | 23 ++++++++++------- .../AMDGPU/GlobalISel/legalize-phi.mir | 25 ++++++++----------- 7 files changed, 45 insertions(+), 61 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c11f91bc0a6ca..592f79aa7b712 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3001,28 +3001,16 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - SmallVector DstRegs; - - unsigned NarrowSize = NarrowTy.getSizeInBits(); Register DstReg = MI.getOperand(0).getReg(); - unsigned Size = MRI.getType(DstReg).getSizeInBits(); - int NumParts = Size / NarrowSize; - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (Size % NarrowSize != 0) - return UnableToLegalize; + LLT DstTy = MRI.getType(DstReg); + LLT LCMTy = getLCMType(DstTy, NarrowTy); - for (int i = 0; i < NumParts; ++i) { - Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUndef(TmpReg); - DstRegs.push_back(TmpReg); - } + unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); + auto NewUndef = MIRBuilder.buildUndef(NarrowTy); + SmallVector Parts(NumParts, NewUndef.getReg(0)); + buildWidenedRemergeToDst(DstReg, LCMTy, Parts); MI.eraseFromParent(); return Legalized; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir index f96e2e65ce8f4..9417df066a46b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir @@ -39,9 +39,8 @@ body: | ; CHECK-LABEL: name: test_freeze_v4s64 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY [[DEF]](<2 x s64>) ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s64>) = G_FREEZE [[DEF]] - ; CHECK: [[FREEZE1:%[0-9]+]]:_(<2 x s64>) = G_FREEZE [[COPY]] + ; CHECK: [[FREEZE1:%[0-9]+]]:_(<2 x s64>) = G_FREEZE [[DEF]] ; CHECK: $q0 = COPY [[FREEZE]](<2 x s64>) ; CHECK: $q1 = COPY [[FREEZE1]](<2 x s64>) %undef:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir index e348c0e454b80..2af37dff5fd49 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir @@ -249,8 +249,7 @@ body: | liveins: $q0 ; CHECK-LABEL: name: test_uitofp_v2s64_v2i1 ; CHECK: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(s1) = COPY [[DEF]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR [[DEF]](s1), [[COPY]](s1) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR [[DEF]](s1), [[DEF]](s1) ; CHECK: [[ZEXT:%[0-9]+]]:_(<2 x s64>) = G_ZEXT [[BUILD_VECTOR]](<2 x s1>) ; CHECK: [[UITOFP:%[0-9]+]]:_(<2 x s64>) = G_UITOFP [[ZEXT]](<2 x s64>) ; CHECK: $q0 = COPY [[UITOFP]](<2 x s64>) @@ -266,8 +265,7 @@ body: | liveins: $q0 ; CHECK-LABEL: name: test_sitofp_v2s64_v2i1 ; CHECK: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(s1) = COPY [[DEF]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR [[DEF]](s1), [[COPY]](s1) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s1>) = G_BUILD_VECTOR [[DEF]](s1), [[DEF]](s1) ; CHECK: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR]](<2 x s1>) ; CHECK: [[SITOFP:%[0-9]+]]:_(<2 x s64>) = G_SITOFP [[SEXT]](<2 x s64>) ; CHECK: $q0 = COPY [[SITOFP]](<2 x s64>) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir index 984909d342072..228e3a44b0c50 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir @@ -54,9 +54,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v4s64 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY [[DEF]](<2 x s64>) ; CHECK: $q0 = COPY [[DEF]](<2 x s64>) - ; CHECK: $q1 = COPY [[COPY]](<2 x s64>) + ; CHECK: $q1 = COPY [[DEF]](<2 x s64>) %0:_(<4 x s64>) = G_IMPLICIT_DEF %1:_(<2 x s64> ), %2:_(<2 x s64>) = G_UNMERGE_VALUES %0 $q0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir index b2a525da27515..8b7460d6a73a0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -422,19 +422,20 @@ body: | bb.0: ; CHECK-LABEL: name: test_freeze_v33s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<33 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<33 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<33 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<528 x s32>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<33 x s32>) ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32) ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32) - ; CHECK: [[DEF2:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR]] ; CHECK: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR1]] ; CHECK: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR2]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<33 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<528 x s32>), 0 - ; CHECK: S_NOP 0, implicit [[EXTRACT]](<33 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<33 x s32>) = G_EXTRACT [[CONCAT_VECTORS1]](<528 x s32>), 0 + ; CHECK: S_NOP 0, implicit [[EXTRACT1]](<33 x s32>) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(<33 x s32>) = G_FREEZE %0 S_NOP 0, implicit %1 @@ -447,13 +448,10 @@ body: | ; CHECK-LABEL: name: test_freeze_v64s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) - ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) - ; CHECK: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] - ; CHECK: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]] - ; CHECK: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY1]] - ; CHECK: [[FREEZE3:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY2]] + ; CHECK: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] + ; CHECK: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] + ; CHECK: [[FREEZE3:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]] ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[FREEZE3]](<16 x s32>) ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index 993bb6a437561..5fe42c120db81 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -327,14 +327,22 @@ body: | name: test_implicit_def_v33s32 body: | bb.0: + liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: test_implicit_def_v33s32 - ; CHECK: [[DEF:%[0-9]+]]:_(<33 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<33 x s32>) - ; CHECK: S_NOP 0, implicit [[UV]](s32), implicit [[UV32]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<33 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<528 x s32>), 0 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<33 x s32>) + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store 4, addrspace 1) + ; CHECK: G_STORE [[UV32]](s32), [[COPY]](p1) :: (volatile store 4, addrspace 1) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0 - S_NOP 0, implicit %1, implicit %33 + %34:_(p1) = COPY $vgpr0_vgpr1 + G_STORE %1, %34 :: (volatile store 4, align 4, addrspace 1) + G_STORE %33, %34 :: (volatile store 4, align 4, addrspace 1) + ... --- @@ -344,11 +352,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v64s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) - ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) - ; CHECK: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[COPY]](<16 x s32>), [[COPY1]](<16 x s32>), [[COPY2]](<16 x s32>) - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[COPY]](<16 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[CONCAT_VECTORS1]](<32 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF %1:_(<32 x s32>), %2:_(<32 x s32>) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 997dc3a521201..0b1f7be76a0fa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -652,24 +652,21 @@ body: | ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) - ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) - ; CHECK: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<16 x s32>) + ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<16 x s32>) - ; CHECK: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<16 x s32>) + ; CHECK: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV64]] ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV65]] ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV66]] @@ -741,9 +738,9 @@ body: | ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[COPY]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 - ; CHECK: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[COPY1]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 - ; CHECK: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[COPY2]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 + ; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 + ; CHECK: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 + ; CHECK: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) bb.0: From 0c40af6b594f6eb2dcd43cdb2bc2f4584ec8ca15 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Wed, 29 Jul 2020 14:58:09 +0200 Subject: [PATCH 191/600] [mlir] First-party modeling of LLVM types The current modeling of LLVM IR types in MLIR is based on the LLVMType class that wraps a raw `llvm::Type *` and delegates uniquing, printing and parsing to LLVM itself. This model makes thread-safe type manipulation hard and is being progressively replaced with a cleaner MLIR model that replicates the type system. Introduce a set of classes reflecting the LLVM IR type system in MLIR instead of wrapping the existing types. These are currently introduced as separate classes without affecting the dialect flow, and are exercised through a test dialect. Once feature parity is reached, the old implementation will be gradually substituted with the new one. Depends On D84171 Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D84339 --- mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h | 470 +++++++++++++++++ mlir/include/mlir/IR/DialectImplementation.h | 6 + mlir/lib/Dialect/LLVMIR/CMakeLists.txt | 2 + mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp | 477 ++++++++++++++++++ mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 163 ++++++ mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h | 458 +++++++++++++++++ mlir/lib/Parser/DialectSymbolParser.cpp | 15 + mlir/test/Dialect/LLVMIR/types-invalid.mlir | 95 ++++ mlir/test/Dialect/LLVMIR/types.mlir | 184 +++++++ mlir/test/lib/Dialect/CMakeLists.txt | 1 + mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt | 14 + .../Dialect/LLVMIR/LLVMTypeTestDialect.cpp | 52 ++ mlir/tools/mlir-opt/CMakeLists.txt | 1 + mlir/tools/mlir-opt/mlir-opt.cpp | 8 +- 14 files changed, 1943 insertions(+), 3 deletions(-) create mode 100644 mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h create mode 100644 mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp create mode 100644 mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp create mode 100644 mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h create mode 100644 mlir/test/Dialect/LLVMIR/types-invalid.mlir create mode 100644 mlir/test/Dialect/LLVMIR/types.mlir create mode 100644 mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt create mode 100644 mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h new file mode 100644 index 0000000000000..6764f9815c3fb --- /dev/null +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -0,0 +1,470 @@ +//===- LLVMDialect.h - MLIR LLVM dialect types ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the types for the LLVM dialect in MLIR. These MLIR types +// correspond to the LLVM IR type system. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LLVMIR_LLVMTYPES_H_ +#define MLIR_DIALECT_LLVMIR_LLVMTYPES_H_ + +#include "mlir/IR/Types.h" + +namespace llvm { +class ElementCount; +} // namespace llvm + +namespace mlir { + +class DialectAsmParser; +class DialectAsmPrinter; + +namespace LLVM { +namespace detail { +struct LLVMFunctionTypeStorage; +struct LLVMIntegerTypeStorage; +struct LLVMPointerTypeStorage; +struct LLVMStructTypeStorage; +struct LLVMTypeAndSizeStorage; +} // namespace detail + +//===----------------------------------------------------------------------===// +// LLVMTypeNew. +//===----------------------------------------------------------------------===// + +/// Base class for LLVM dialect types. +/// +/// The LLVM dialect in MLIR fully reflects the LLVM IR type system, prodiving a +/// sperate MLIR type for each LLVM IR type. All types are represted as separate +/// subclasses and are compatible with the isa/cast infrastructure. For +/// convenience, the base class provides most of the APIs available on +/// llvm::Type in addition to MLIR-compatible APIs. +/// +/// The LLVM dialect type system is closed: parametric types can only refer to +/// other LLVM dialect types. This is consistent with LLVM IR and enables a more +/// concise pretty-printing format. +/// +/// Similarly to other MLIR types, LLVM dialect types are owned by the MLIR +/// context, have an immutable identifier (for most types except identified +/// structs, the entire type is the identifier) and are thread-safe. +class LLVMTypeNew : public Type { +public: + enum Kind { + // Keep non-parametric types contiguous in the enum. + VoidType = FIRST_LLVM_TYPE + 1, + HalfType, + BFloatType, + FloatType, + DoubleType, + FP128Type, + X86FP80Type, + PPCFP128Type, + X86MMXType, + LabelType, + TokenType, + MetadataType, + // End of non-parametric types. + FunctionType, + IntegerType, + PointerType, + FixedVectorType, + ScalableVectorType, + ArrayType, + StructType, + FIRST_NEW_LLVM_TYPE = VoidType, + LAST_NEW_LLVM_TYPE = StructType, + FIRST_TRIVIAL_TYPE = VoidType, + LAST_TRIVIAL_TYPE = MetadataType + }; + + /// Inherit base constructors. + using Type::Type; + + /// Support for PointerLikeTypeTraits. + using Type::getAsOpaquePointer; + static LLVMTypeNew getFromOpaquePointer(const void *ptr) { + return LLVMTypeNew(static_cast(const_cast(ptr))); + } + + /// Support for isa/cast. + static bool kindof(unsigned kind) { + return FIRST_NEW_LLVM_TYPE <= kind && kind <= LAST_NEW_LLVM_TYPE; + } +}; + +//===----------------------------------------------------------------------===// +// Trivial types. +//===----------------------------------------------------------------------===// + +// Batch-define trivial types. +#define DEFINE_TRIVIAL_LLVM_TYPE(ClassName, Kind) \ + class ClassName \ + : public Type::TypeBase { \ + public: \ + using Base::Base; \ + static bool kindof(unsigned kind) { return kind == Kind; } \ + static ClassName get(MLIRContext *context) { \ + return Base::get(context, Kind); \ + } \ + } + +DEFINE_TRIVIAL_LLVM_TYPE(LLVMVoidType, LLVMTypeNew::VoidType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMHalfType, LLVMTypeNew::HalfType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMBFloatType, LLVMTypeNew::BFloatType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMFloatType, LLVMTypeNew::FloatType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMDoubleType, LLVMTypeNew::DoubleType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMFP128Type, LLVMTypeNew::FP128Type); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86FP80Type, LLVMTypeNew::X86FP80Type); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMPPCFP128Type, LLVMTypeNew::PPCFP128Type); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86MMXType, LLVMTypeNew::X86MMXType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMTokenType, LLVMTypeNew::TokenType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMLabelType, LLVMTypeNew::LabelType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMMetadataType, LLVMTypeNew::MetadataType); + +#undef DEFINE_TRIVIAL_LLVM_TYPE + +//===----------------------------------------------------------------------===// +// LLVMArrayType. +//===----------------------------------------------------------------------===// + +/// LLVM dialect array type. It is an aggregate type representing consecutive +/// elements in memory, parameterized by the number of elements and the element +/// type. +class LLVMArrayType : public Type::TypeBase { +public: + /// Inherit base constructors. + using Base::Base; + + /// Support for isa/cast. + static bool kindof(unsigned kind) { return kind == LLVMTypeNew::ArrayType; } + + /// Gets or creates an instance of LLVM dialect array type containing + /// `numElements` of `elementType`, in the same context as `elementType`. + static LLVMArrayType get(LLVMTypeNew elementType, unsigned numElements); + + /// Returns the element type of the array. + LLVMTypeNew getElementType(); + + /// Returns the number of elements in the array type. + unsigned getNumElements(); +}; + +//===----------------------------------------------------------------------===// +// LLVMFunctionType. +//===----------------------------------------------------------------------===// + +/// LLVM dialect function type. It consists of a single return type (unlike MLIR +/// which can have multiple), a list of parameter types and can optionally be +/// variadic. +class LLVMFunctionType + : public Type::TypeBase { +public: + /// Inherit base constructors. + using Base::Base; + + /// Support for isa/cast. + static bool kindof(unsigned kind) { + return kind == LLVMTypeNew::FunctionType; + } + + /// Gets or creates an instance of LLVM dialect function in the same context + /// as the `result` type. + static LLVMFunctionType get(LLVMTypeNew result, + ArrayRef arguments, + bool isVarArg = false); + + /// Returns the result type of the function. + LLVMTypeNew getReturnType(); + + /// Returns the number of arguments to the function. + unsigned getNumParams(); + + /// Returns `i`-th argument of the function. Asserts on out-of-bounds. + LLVMTypeNew getParamType(unsigned i); + + /// Returns whether the function is variadic. + bool isVarArg(); + + /// Returns a list of argument types of the function. + ArrayRef getParams(); + ArrayRef params() { return getParams(); } +}; + +//===----------------------------------------------------------------------===// +// LLVMIntegerType. +//===----------------------------------------------------------------------===// + +/// LLVM dialect signless integer type parameterized by bitwidth. +class LLVMIntegerType : public Type::TypeBase { +public: + /// Inherit base constructor. + using Base::Base; + + /// Support for isa/cast. + static bool kindof(unsigned kind) { return kind == LLVMTypeNew::IntegerType; } + + /// Gets or creates an instance of the integer of the specified `bitwidth` in + /// the given context. + static LLVMIntegerType get(MLIRContext *ctx, unsigned bitwidth); + + /// Returns the bitwidth of this integer type. + unsigned getBitWidth(); +}; + +//===----------------------------------------------------------------------===// +// LLVMPointerType. +//===----------------------------------------------------------------------===// + +/// LLVM dialect pointer type. This type typically represents a reference to an +/// object in memory. It is parameterized by the element type and the address +/// space. +class LLVMPointerType : public Type::TypeBase { +public: + /// Inherit base constructors. + using Base::Base; + + /// Support for isa/cast. + static bool kindof(unsigned kind) { return kind == LLVMTypeNew::PointerType; } + + /// Gets or creates an instance of LLVM dialect pointer type pointing to an + /// object of `pointee` type in the given address space. The pointer type is + /// created in the same context as `pointee`. + static LLVMPointerType get(LLVMTypeNew pointee, unsigned addressSpace = 0); + + /// Returns the pointed-to type. + LLVMTypeNew getElementType(); + + /// Returns the address space of the pointer. + unsigned getAddressSpace(); +}; + +//===----------------------------------------------------------------------===// +// LLVMStructType. +//===----------------------------------------------------------------------===// + +/// LLVM dialect structure type representing a collection of different-typed +/// elements manipulated together. Structured can optionally be packed, meaning +/// that their elements immediately follow each other in memory without +/// accounting for potential alignment. +/// +/// Structure types can be identified (named) or literal. Literal structures +/// are uniquely represented by the list of types they contain and packedness. +/// Literal structure types are immutable after construction. +/// +/// Identified structures are uniquely represented by their name, a string. They +/// have a mutable component, consisting of the list of types they contain, +/// the packedness and the opacity bits. Identified structs can be created +/// without providing the lists of element types, making them suitable to +/// represent recursive, i.e. self-referring, structures. Identified structs +/// without body are considered opaque. For such structs, one can set the body. +/// Identified structs can be created as intentionally-opaque, implying that the +/// caller does not intend to ever set the body (e.g. forward-declarations of +/// structs from another module) and wants to disallow further modification of +/// the body. For intentionally-opaque structs or non-opaque structs with the +/// body, one is not allowed to set another body (however, one can set exactly +/// the same body). +/// +/// Note that the packedness of the struct takes place in uniquing of literal +/// structs, but does not in uniquing of identified structs. +class LLVMStructType : public Type::TypeBase { +public: + /// Inherit base construtors. + using Base::Base; + + /// Support for isa/cast. + static bool kindof(unsigned kind) { return kind == LLVMTypeNew::StructType; } + + /// Gets or creates an identified struct with the given name in the provided + /// context. Note that unlike llvm::StructType::create, this function will + /// _NOT_ rename a struct in case a struct with the same name already exists + /// in the context. Instead, it will just return the existing struct, + /// similarly to the rest of MLIR type ::get methods. + static LLVMStructType getIdentified(MLIRContext *context, StringRef name); + + /// Gets or creates a literal struct with the given body in the provided + /// context. + static LLVMStructType getLiteral(MLIRContext *context, + ArrayRef types, + bool isPacked = false); + + /// Gets or creates an intentionally-opaque identified struct. Such a struct + /// cannot have its body set. To create an opaque struct with a mutable body, + /// use `getIdentified`. Note that unlike llvm::StructType::create, this + /// function will _NOT_ rename a struct in case a struct with the same name + /// already exists in the context. Instead, it will just return the existing + /// struct, similarly to the rest of MLIR type ::get methods. + static LLVMStructType getOpaque(StringRef name, MLIRContext *context); + + /// Set the body of an identified struct. Returns failure if the body could + /// not be set, e.g. if the struct already has a body or if it was marked as + /// intentionally opaque. This might happen in a multi-threaded context when a + /// different thread modified the struct after it was created. Most callers + /// are likely to assert this always succeeds, but it is possible to implement + /// a local renaming scheme based on the result of this call. + LogicalResult setBody(ArrayRef types, bool isPacked); + + /// Checks if a struct is packed. + bool isPacked(); + + /// Checks if a struct is identified. + bool isIdentified(); + + /// Checks if a struct is opaque. + bool isOpaque(); + + /// Returns the name of an identified struct. + StringRef getName(); + + /// Returns the list of element types contained in a non-opaque struct. + ArrayRef getBody(); +}; + +//===----------------------------------------------------------------------===// +// LLVMVectorType. +//===----------------------------------------------------------------------===// + +/// LLVM dialect vector type, represents a sequence of elements that can be +/// processed as one, typically in SIMD context. This is a base class for fixed +/// and scalable vectors. +class LLVMVectorType : public LLVMTypeNew { +public: + /// Inherit base constructor. + using LLVMTypeNew::LLVMTypeNew; + + /// Support for isa/cast. + static bool kindof(unsigned kind) { + return kind == LLVMTypeNew::FixedVectorType || + kind == LLVMTypeNew::ScalableVectorType; + } + + /// Returns the element type of the vector. + LLVMTypeNew getElementType(); + + /// Returns the number of elements in the vector. + llvm::ElementCount getElementCount(); +}; + +//===----------------------------------------------------------------------===// +// LLVMFixedVectorType. +//===----------------------------------------------------------------------===// + +/// LLVM dialect fixed vector type, represents a sequence of elements of known +/// length that can be processed as one. +class LLVMFixedVectorType + : public Type::TypeBase { +public: + /// Inherit base constructor. + using Base::Base; + + /// Support for isa/cast. + static bool kindof(unsigned kind) { + return kind == LLVMTypeNew::FixedVectorType; + } + + /// Gets or creates a fixed vector type containing `numElements` of + /// `elementType` in the same context as `elementType`. + static LLVMFixedVectorType get(LLVMTypeNew elementType, unsigned numElements); + + /// Returns the number of elements in the fixed vector. + unsigned getNumElements(); +}; + +//===----------------------------------------------------------------------===// +// LLVMScalableVectorType. +//===----------------------------------------------------------------------===// + +/// LLVM dialect scalable vector type, represents a sequence of elements of +/// unknown length that is known to be divisible by some constant. These +/// elements can be processed as one in SIMD context. +class LLVMScalableVectorType + : public Type::TypeBase { +public: + /// Inherit base constructor. + using Base::Base; + + /// Support for isa/cast. + static bool kindof(unsigned kind) { + return kind == LLVMTypeNew::ScalableVectorType; + } + + /// Gets or creates a scalable vector type containing a non-zero multiple of + /// `minNumElements` of `elementType` in the same context as `elementType`. + static LLVMScalableVectorType get(LLVMTypeNew elementType, + unsigned minNumElements); + + /// Returns the scaling factor of the number of elements in the vector. The + /// vector contains at least the resulting number of elements, or any non-zero + /// multiple of this number. + unsigned getMinNumElements(); +}; + +//===----------------------------------------------------------------------===// +// Printing and parsing. +//===----------------------------------------------------------------------===// + +namespace detail { +/// Parses an LLVM dialect type. +LLVMTypeNew parseType(DialectAsmParser &parser); + +/// Prints an LLVM Dialect type. +void printType(LLVMTypeNew type, DialectAsmPrinter &printer); +} // namespace detail + +} // namespace LLVM +} // namespace mlir + +//===----------------------------------------------------------------------===// +// Support for hashing and containers. +//===----------------------------------------------------------------------===// + +namespace llvm { + +// LLVMTypeNew instances hash just like pointers. +template <> struct DenseMapInfo { + static mlir::LLVM::LLVMTypeNew getEmptyKey() { + void *pointer = llvm::DenseMapInfo::getEmptyKey(); + return mlir::LLVM::LLVMTypeNew( + static_cast(pointer)); + } + static mlir::LLVM::LLVMTypeNew getTombstoneKey() { + void *pointer = llvm::DenseMapInfo::getTombstoneKey(); + return mlir::LLVM::LLVMTypeNew( + static_cast(pointer)); + } + static unsigned getHashValue(mlir::LLVM::LLVMTypeNew val) { + return mlir::hash_value(val); + } + static bool isEqual(mlir::LLVM::LLVMTypeNew lhs, + mlir::LLVM::LLVMTypeNew rhs) { + return lhs == rhs; + } +}; + +// LLVMTypeNew behaves like a pointer similarly to mlir::Type. +template <> struct PointerLikeTypeTraits { + static inline void *getAsVoidPointer(mlir::LLVM::LLVMTypeNew type) { + return const_cast(type.getAsOpaquePointer()); + } + static inline mlir::LLVM::LLVMTypeNew getFromVoidPointer(void *ptr) { + return mlir::LLVM::LLVMTypeNew::getFromOpaquePointer(ptr); + } + static constexpr int NumLowBitsAvailable = + PointerLikeTypeTraits::NumLowBitsAvailable; +}; + +} // namespace llvm + +#endif // MLIR_DIALECT_LLVMIR_LLVMTYPES_H_ diff --git a/mlir/include/mlir/IR/DialectImplementation.h b/mlir/include/mlir/IR/DialectImplementation.h index e2d7e2c409c45..c478b200b5d91 100644 --- a/mlir/include/mlir/IR/DialectImplementation.h +++ b/mlir/include/mlir/IR/DialectImplementation.h @@ -203,6 +203,9 @@ class DialectAsmParser { /// Parse a `=` token if present. virtual ParseResult parseOptionalEqual() = 0; + /// Parse a quoted string token if present. + virtual ParseResult parseOptionalString(StringRef *string) = 0; + /// Parse a given keyword. ParseResult parseKeyword(StringRef keyword, const Twine &msg = "") { auto loc = getCurrentLocation(); @@ -323,6 +326,9 @@ class DialectAsmParser { return success(); } + /// Parse a type if present. + virtual OptionalParseResult parseOptionalType(Type &result) = 0; + /// Parse a 'x' separated dimension list. This populates the dimension list, /// using -1 for the `?` dimensions if `allowDynamic` is set and errors out on /// `?` otherwise. diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt index e858a0a70c730..ff6560305cb80 100644 --- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt @@ -2,6 +2,8 @@ add_subdirectory(Transforms) add_mlir_dialect_library(MLIRLLVMIR IR/LLVMDialect.cpp + IR/LLVMTypes.cpp + IR/LLVMTypeSyntax.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp new file mode 100644 index 0000000000000..d272297525c1e --- /dev/null +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp @@ -0,0 +1,477 @@ +//===- LLVMTypeSyntax.cpp - Parsing/printing for MLIR LLVM Dialect types --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/DialectImplementation.h" +#include "llvm/ADT/SetVector.h" + +using namespace mlir; +using namespace mlir::LLVM; + +//===----------------------------------------------------------------------===// +// Printing. +//===----------------------------------------------------------------------===// + +static void printTypeImpl(llvm::raw_ostream &os, LLVMTypeNew type, + llvm::SetVector &stack); + +/// Returns the keyword to use for the given type. +static StringRef getTypeKeyword(LLVMTypeNew type) { + switch (type.getKind()) { + case LLVMTypeNew::VoidType: + return "void"; + case LLVMTypeNew::HalfType: + return "half"; + case LLVMTypeNew::BFloatType: + return "bfloat"; + case LLVMTypeNew::FloatType: + return "float"; + case LLVMTypeNew::DoubleType: + return "double"; + case LLVMTypeNew::FP128Type: + return "fp128"; + case LLVMTypeNew::X86FP80Type: + return "x86_fp80"; + case LLVMTypeNew::PPCFP128Type: + return "ppc_fp128"; + case LLVMTypeNew::X86MMXType: + return "x86_mmx"; + case LLVMTypeNew::TokenType: + return "token"; + case LLVMTypeNew::LabelType: + return "label"; + case LLVMTypeNew::MetadataType: + return "metadata"; + case LLVMTypeNew::FunctionType: + return "func"; + case LLVMTypeNew::IntegerType: + return "i"; + case LLVMTypeNew::PointerType: + return "ptr"; + case LLVMTypeNew::FixedVectorType: + case LLVMTypeNew::ScalableVectorType: + return "vec"; + case LLVMTypeNew::ArrayType: + return "array"; + case LLVMTypeNew::StructType: + return "struct"; + } + llvm_unreachable("unhandled type kind"); +} + +/// Prints the body of a structure type. Uses `stack` to avoid printing +/// recursive structs indefinitely. +static void printStructTypeBody(llvm::raw_ostream &os, LLVMStructType type, + llvm::SetVector &stack) { + if (type.isIdentified() && type.isOpaque()) { + os << "opaque"; + return; + } + + if (type.isPacked()) + os << "packed "; + + // Put the current type on stack to avoid infinite recursion. + os << '('; + if (type.isIdentified()) + stack.insert(type.getName()); + llvm::interleaveComma(type.getBody(), os, [&](LLVMTypeNew subtype) { + printTypeImpl(os, subtype, stack); + }); + if (type.isIdentified()) + stack.pop_back(); + os << ')'; +} + +/// Prints a structure type. Uses `stack` to keep track of the identifiers of +/// the structs being printed. Checks if the identifier of a struct is contained +/// in `stack`, i.e. whether a self-reference to a recursive stack is being +/// printed, and only prints the name to avoid infinite recursion. +static void printStructType(llvm::raw_ostream &os, LLVMStructType type, + llvm::SetVector &stack) { + os << "<"; + if (type.isIdentified()) { + os << '"' << type.getName() << '"'; + // If we are printing a reference to one of the enclosing structs, just + // print the name and stop to avoid infinitely long output. + if (stack.count(type.getName())) { + os << '>'; + return; + } + os << ", "; + } + + printStructTypeBody(os, type, stack); + os << '>'; +} + +/// Prints a type containing a fixed number of elements. +template +static void printArrayOrVectorType(llvm::raw_ostream &os, TypeTy type, + llvm::SetVector &stack) { + os << '<' << type.getNumElements() << " x "; + printTypeImpl(os, type.getElementType(), stack); + os << '>'; +} + +/// Prints a function type. +static void printFunctionType(llvm::raw_ostream &os, LLVMFunctionType funcType, + llvm::SetVector &stack) { + os << '<'; + printTypeImpl(os, funcType.getReturnType(), stack); + os << " ("; + llvm::interleaveComma(funcType.getParams(), os, + [&os, &stack](LLVMTypeNew subtype) { + printTypeImpl(os, subtype, stack); + }); + if (funcType.isVarArg()) { + if (funcType.getNumParams() != 0) + os << ", "; + os << "..."; + } + os << ")>"; +} + +/// Prints the given LLVM dialect type recursively. This leverages closedness of +/// the LLVM dialect type system to avoid printing the dialect prefix +/// repeatedly. For recursive structures, only prints the name of the structure +/// when printing a self-reference. Note that this does not apply to sibling +/// references. For example, +/// struct<"a", (ptr>)> +/// struct<"c", (ptr>)>>, +/// ptr>)>>)> +/// note that "b" is printed twice. +static void printTypeImpl(llvm::raw_ostream &os, LLVMTypeNew type, + llvm::SetVector &stack) { + if (!type) { + os << "<>"; + return; + } + + unsigned kind = type.getKind(); + os << getTypeKeyword(type); + + // Trivial types only consist of their keyword. + if (LLVMTypeNew::FIRST_TRIVIAL_TYPE <= kind && + kind <= LLVMTypeNew::LAST_TRIVIAL_TYPE) + return; + + if (auto intType = type.dyn_cast()) { + os << intType.getBitWidth(); + return; + } + + if (auto ptrType = type.dyn_cast()) { + os << '<'; + printTypeImpl(os, ptrType.getElementType(), stack); + if (ptrType.getAddressSpace() != 0) + os << ", " << ptrType.getAddressSpace(); + os << '>'; + return; + } + + if (auto arrayType = type.dyn_cast()) + return printArrayOrVectorType(os, arrayType, stack); + if (auto vectorType = type.dyn_cast()) + return printArrayOrVectorType(os, vectorType, stack); + + if (auto vectorType = type.dyn_cast()) { + os << "'; + return; + } + + if (auto structType = type.dyn_cast()) + return printStructType(os, structType, stack); + + printFunctionType(os, type.cast(), stack); +} + +void mlir::LLVM::detail::printType(LLVMTypeNew type, + DialectAsmPrinter &printer) { + llvm::SetVector stack; + return printTypeImpl(printer.getStream(), type, stack); +} + +//===----------------------------------------------------------------------===// +// Parsing. +//===----------------------------------------------------------------------===// + +static LLVMTypeNew parseTypeImpl(DialectAsmParser &parser, + llvm::SetVector &stack); + +/// Helper to be chained with other parsing functions. +static ParseResult parseTypeImpl(DialectAsmParser &parser, + llvm::SetVector &stack, + LLVMTypeNew &result) { + result = parseTypeImpl(parser, stack); + return success(result != nullptr); +} + +/// Parses an LLVM dialect function type. +/// llvm-type :: = `func<` llvm-type `(` llvm-type-list `...`? `)>` +static LLVMFunctionType parseFunctionType(DialectAsmParser &parser, + llvm::SetVector &stack) { + LLVMTypeNew returnType; + if (parser.parseLess() || parseTypeImpl(parser, stack, returnType) || + parser.parseLParen()) + return LLVMFunctionType(); + + // Function type without arguments. + if (succeeded(parser.parseOptionalRParen())) { + if (succeeded(parser.parseGreater())) + return LLVMFunctionType::get(returnType, {}, /*isVarArg=*/false); + return LLVMFunctionType(); + } + + // Parse arguments. + SmallVector argTypes; + do { + if (succeeded(parser.parseOptionalEllipsis())) { + if (parser.parseOptionalRParen() || parser.parseOptionalGreater()) + return LLVMFunctionType(); + return LLVMFunctionType::get(returnType, argTypes, /*isVarArg=*/true); + } + + argTypes.push_back(parseTypeImpl(parser, stack)); + if (!argTypes.back()) + return LLVMFunctionType(); + } while (succeeded(parser.parseOptionalComma())); + + if (parser.parseOptionalRParen() || parser.parseOptionalGreater()) + return LLVMFunctionType(); + return LLVMFunctionType::get(returnType, argTypes, /*isVarArg=*/false); +} + +/// Parses an LLVM dialect pointer type. +/// llvm-type ::= `ptr<` llvm-type (`,` integer)? `>` +static LLVMPointerType parsePointerType(DialectAsmParser &parser, + llvm::SetVector &stack) { + LLVMTypeNew elementType; + if (parser.parseLess() || parseTypeImpl(parser, stack, elementType)) + return LLVMPointerType(); + + unsigned addressSpace = 0; + if (succeeded(parser.parseOptionalComma()) && + failed(parser.parseInteger(addressSpace))) + return LLVMPointerType(); + if (failed(parser.parseGreater())) + return LLVMPointerType(); + return LLVMPointerType::get(elementType, addressSpace); +} + +/// Parses an LLVM dialect vector type. +/// llvm-type ::= `vec<` `? x`? integer `x` llvm-type `>` +/// Supports both fixed and scalable vectors. +static LLVMVectorType parseVectorType(DialectAsmParser &parser, + llvm::SetVector &stack) { + SmallVector dims; + llvm::SMLoc dimPos; + LLVMTypeNew elementType; + if (parser.parseLess() || parser.getCurrentLocation(&dimPos) || + parser.parseDimensionList(dims, /*allowDynamic=*/true) || + parseTypeImpl(parser, stack, elementType) || parser.parseGreater()) + return LLVMVectorType(); + + // We parsed a generic dimension list, but vectors only support two forms: + // - single non-dynamic entry in the list (fixed vector); + // - two elements, the first dynamic (indicated by -1) and the second + // non-dynamic (scalable vector). + if (dims.empty() || dims.size() > 2 || + ((dims.size() == 2) ^ (dims[0] == -1)) || + (dims.size() == 2 && dims[1] == -1)) { + parser.emitError(dimPos) + << "expected '? x x ' or ' x '"; + return LLVMVectorType(); + } + + bool isScalable = dims.size() == 2; + return isScalable ? static_cast( + LLVMScalableVectorType::get(elementType, dims[1])) + : LLVMFixedVectorType::get(elementType, dims[0]); +} + +/// Parses an LLVM dialect array type. +/// llvm-type ::= `array<` integer `x` llvm-type `>` +static LLVMArrayType parseArrayType(DialectAsmParser &parser, + llvm::SetVector &stack) { + SmallVector dims; + llvm::SMLoc sizePos; + LLVMTypeNew elementType; + if (parser.parseLess() || parser.getCurrentLocation(&sizePos) || + parser.parseDimensionList(dims, /*allowDynamic=*/false) || + parseTypeImpl(parser, stack, elementType) || parser.parseGreater()) + return LLVMArrayType(); + + if (dims.size() != 1) { + parser.emitError(sizePos) << "expected ? x "; + return LLVMArrayType(); + } + + return LLVMArrayType::get(elementType, dims[0]); +} + +/// Attempts to set the body of an identified structure type. Reports a parsing +/// error at `subtypesLoc` in case of failure, uses `stack` to make sure the +/// types printed in the error message look like they did when parsed. +static LLVMStructType trySetStructBody(LLVMStructType type, + ArrayRef subtypes, + bool isPacked, DialectAsmParser &parser, + llvm::SMLoc subtypesLoc, + llvm::SetVector &stack) { + if (succeeded(type.setBody(subtypes, isPacked))) + return type; + + std::string currentBody; + llvm::raw_string_ostream currentBodyStream(currentBody); + printStructTypeBody(currentBodyStream, type, stack); + auto diag = parser.emitError(subtypesLoc) + << "identified type already used with a different body"; + diag.attachNote() << "existing body: " << currentBodyStream.str(); + return LLVMStructType(); +} + +/// Parses an LLVM dialect structure type. +/// llvm-type ::= `struct<` (string-literal `,`)? `packed`? +/// `(` llvm-type-list `)` `>` +/// | `struct<` string-literal `>` +/// | `struct<` string-literal `, opaque>` +static LLVMStructType parseStructType(DialectAsmParser &parser, + llvm::SetVector &stack) { + MLIRContext *ctx = parser.getBuilder().getContext(); + + if (failed(parser.parseLess())) + return LLVMStructType(); + + // If we are parsing a self-reference to a recursive struct, i.e. the parsing + // stack already contains a struct with the same identifier, bail out after + // the name. + StringRef name; + bool isIdentified = succeeded(parser.parseOptionalString(&name)); + if (isIdentified) { + if (stack.count(name)) { + if (failed(parser.parseGreater())) + return LLVMStructType(); + return LLVMStructType::getIdentified(ctx, name); + } + if (failed(parser.parseComma())) + return LLVMStructType(); + } + + // Handle intentionally opaque structs. + llvm::SMLoc kwLoc = parser.getCurrentLocation(); + if (succeeded(parser.parseOptionalKeyword("opaque"))) { + if (!isIdentified) + return parser.emitError(kwLoc, "only identified structs can be opaque"), + LLVMStructType(); + if (failed(parser.parseGreater())) + return LLVMStructType(); + auto type = LLVMStructType::getOpaque(name, ctx); + if (!type.isOpaque()) { + parser.emitError(kwLoc, "redeclaring defined struct as opaque"); + return LLVMStructType(); + } + return type; + } + + // Check for packedness. + bool isPacked = succeeded(parser.parseOptionalKeyword("packed")); + if (failed(parser.parseLParen())) + return LLVMStructType(); + + // Fast pass for structs with zero subtypes. + if (succeeded(parser.parseOptionalRParen())) { + if (failed(parser.parseGreater())) + return LLVMStructType(); + if (!isIdentified) + return LLVMStructType::getLiteral(ctx, {}, isPacked); + auto type = LLVMStructType::getIdentified(ctx, name); + return trySetStructBody(type, {}, isPacked, parser, kwLoc, stack); + } + + // Parse subtypes. For identified structs, put the identifier of the struct on + // the stack to support self-references in the recursive calls. + SmallVector subtypes; + llvm::SMLoc subtypesLoc = parser.getCurrentLocation(); + do { + if (isIdentified) + stack.insert(name); + LLVMTypeNew type = parseTypeImpl(parser, stack); + if (!type) + return LLVMStructType(); + subtypes.push_back(type); + if (isIdentified) + stack.pop_back(); + } while (succeeded(parser.parseOptionalComma())); + + if (parser.parseRParen() || parser.parseGreater()) + return LLVMStructType(); + + // Construct the struct with body. + if (!isIdentified) + return LLVMStructType::getLiteral(ctx, subtypes, isPacked); + auto type = LLVMStructType::getIdentified(ctx, name); + return trySetStructBody(type, subtypes, isPacked, parser, subtypesLoc, stack); +} + +/// Parses one of the LLVM dialect types. +static LLVMTypeNew parseTypeImpl(DialectAsmParser &parser, + llvm::SetVector &stack) { + // Special case for integers (i[1-9][0-9]*) that are literals rather than + // keywords for the parser, so they are not caught by the main dispatch below. + // Try parsing it a built-in integer type instead. + Type maybeIntegerType; + MLIRContext *ctx = parser.getBuilder().getContext(); + llvm::SMLoc keyLoc = parser.getCurrentLocation(); + OptionalParseResult result = parser.parseOptionalType(maybeIntegerType); + if (result.hasValue()) { + if (failed(*result)) + return LLVMTypeNew(); + + if (!maybeIntegerType.isSignlessInteger()) { + parser.emitError(keyLoc) << "unexpected type, expected i* or keyword"; + return LLVMTypeNew(); + } + return LLVMIntegerType::get(ctx, maybeIntegerType.getIntOrFloatBitWidth()); + } + + // Dispatch to concrete functions. + StringRef key; + if (failed(parser.parseKeyword(&key))) + return LLVMTypeNew(); + + return llvm::StringSwitch>(key) + .Case("void", [&] { return LLVMVoidType::get(ctx); }) + .Case("half", [&] { return LLVMHalfType::get(ctx); }) + .Case("bfloat", [&] { return LLVMBFloatType::get(ctx); }) + .Case("float", [&] { return LLVMFloatType::get(ctx); }) + .Case("double", [&] { return LLVMDoubleType::get(ctx); }) + .Case("fp128", [&] { return LLVMFP128Type::get(ctx); }) + .Case("x86_fp80", [&] { return LLVMX86FP80Type::get(ctx); }) + .Case("ppc_fp128", [&] { return LLVMPPCFP128Type::get(ctx); }) + .Case("x86_mmx", [&] { return LLVMX86MMXType::get(ctx); }) + .Case("token", [&] { return LLVMTokenType::get(ctx); }) + .Case("label", [&] { return LLVMLabelType::get(ctx); }) + .Case("metadata", [&] { return LLVMMetadataType::get(ctx); }) + .Case("func", [&] { return parseFunctionType(parser, stack); }) + .Case("ptr", [&] { return parsePointerType(parser, stack); }) + .Case("vec", [&] { return parseVectorType(parser, stack); }) + .Case("array", [&] { return parseArrayType(parser, stack); }) + .Case("struct", [&] { return parseStructType(parser, stack); }) + .Default([&] { + parser.emitError(keyLoc) << "unknown LLVM type: " << key; + return LLVMTypeNew(); + })(); +} + +LLVMTypeNew mlir::LLVM::detail::parseType(DialectAsmParser &parser) { + llvm::SetVector stack; + return parseTypeImpl(parser, stack); +} diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp new file mode 100644 index 0000000000000..3540091e90a3e --- /dev/null +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -0,0 +1,163 @@ +//===- LLVMTypes.cpp - MLIR LLVM Dialect types ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the types for the LLVM dialect in MLIR. These MLIR types +// correspond to the LLVM IR type system. +// +//===----------------------------------------------------------------------===// + +#include "TypeDetail.h" + +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/TypeSupport.h" + +#include "llvm/Support/TypeSize.h" + +using namespace mlir; +using namespace mlir::LLVM; + +//===----------------------------------------------------------------------===// +// Array type. + +LLVMArrayType LLVMArrayType::get(LLVMTypeNew elementType, + unsigned numElements) { + assert(elementType && "expected non-null subtype"); + return Base::get(elementType.getContext(), LLVMTypeNew::ArrayType, + elementType, numElements); +} + +LLVMTypeNew LLVMArrayType::getElementType() { return getImpl()->elementType; } + +unsigned LLVMArrayType::getNumElements() { return getImpl()->numElements; } + +//===----------------------------------------------------------------------===// +// Function type. + +LLVMFunctionType LLVMFunctionType::get(LLVMTypeNew result, + ArrayRef arguments, + bool isVarArg) { + assert(result && "expected non-null result"); + return Base::get(result.getContext(), LLVMTypeNew::FunctionType, result, + arguments, isVarArg); +} + +LLVMTypeNew LLVMFunctionType::getReturnType() { + return getImpl()->getReturnType(); +} + +unsigned LLVMFunctionType::getNumParams() { + return getImpl()->getArgumentTypes().size(); +} + +LLVMTypeNew LLVMFunctionType::getParamType(unsigned i) { + return getImpl()->getArgumentTypes()[i]; +} + +bool LLVMFunctionType::isVarArg() { return getImpl()->isVariadic(); } + +ArrayRef LLVMFunctionType::getParams() { + return getImpl()->getArgumentTypes(); +} + +//===----------------------------------------------------------------------===// +// Integer type. + +LLVMIntegerType LLVMIntegerType::get(MLIRContext *ctx, unsigned bitwidth) { + return Base::get(ctx, LLVMTypeNew::IntegerType, bitwidth); +} + +unsigned LLVMIntegerType::getBitWidth() { return getImpl()->bitwidth; } + +//===----------------------------------------------------------------------===// +// Pointer type. + +LLVMPointerType LLVMPointerType::get(LLVMTypeNew pointee, + unsigned addressSpace) { + assert(pointee && "expected non-null subtype"); + return Base::get(pointee.getContext(), LLVMTypeNew::PointerType, pointee, + addressSpace); +} + +LLVMTypeNew LLVMPointerType::getElementType() { return getImpl()->pointeeType; } + +unsigned LLVMPointerType::getAddressSpace() { return getImpl()->addressSpace; } + +//===----------------------------------------------------------------------===// +// Struct type. + +LLVMStructType LLVMStructType::getIdentified(MLIRContext *context, + StringRef name) { + return Base::get(context, LLVMTypeNew::StructType, name, /*opaque=*/false); +} + +LLVMStructType LLVMStructType::getLiteral(MLIRContext *context, + ArrayRef types, + bool isPacked) { + return Base::get(context, LLVMTypeNew::StructType, types, isPacked); +} + +LLVMStructType LLVMStructType::getOpaque(StringRef name, MLIRContext *context) { + return Base::get(context, LLVMTypeNew::StructType, name, /*opaque=*/true); +} + +LogicalResult LLVMStructType::setBody(ArrayRef types, + bool isPacked) { + assert(isIdentified() && "can only set bodies of identified structs"); + return Base::mutate(types, isPacked); +} + +bool LLVMStructType::isPacked() { return getImpl()->isPacked(); } +bool LLVMStructType::isIdentified() { return getImpl()->isIdentified(); } +bool LLVMStructType::isOpaque() { + return getImpl()->isOpaque() || !getImpl()->isInitialized(); +} +StringRef LLVMStructType::getName() { return getImpl()->getIdentifier(); } +ArrayRef LLVMStructType::getBody() { + return isIdentified() ? getImpl()->getIdentifiedStructBody() + : getImpl()->getTypeList(); +} + +//===----------------------------------------------------------------------===// +// Vector types. + +LLVMTypeNew LLVMVectorType::getElementType() { + // Both derived classes share the implementation type. + return static_cast(impl)->elementType; +} + +llvm::ElementCount LLVMVectorType::getElementCount() { + // Both derived classes share the implementation type. + return llvm::ElementCount( + static_cast(impl)->numElements, + this->isa()); +} + +LLVMFixedVectorType LLVMFixedVectorType::get(LLVMTypeNew elementType, + unsigned numElements) { + assert(elementType && "expected non-null subtype"); + return Base::get(elementType.getContext(), LLVMTypeNew::FixedVectorType, + elementType, numElements) + .cast(); +} + +unsigned LLVMFixedVectorType::getNumElements() { + return getImpl()->numElements; +} + +LLVMScalableVectorType LLVMScalableVectorType::get(LLVMTypeNew elementType, + unsigned minNumElements) { + assert(elementType && "expected non-null subtype"); + return Base::get(elementType.getContext(), LLVMTypeNew::ScalableVectorType, + elementType, minNumElements) + .cast(); +} + +unsigned LLVMScalableVectorType::getMinNumElements() { + return getImpl()->numElements; +} diff --git a/mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h b/mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h new file mode 100644 index 0000000000000..2b72e43e51648 --- /dev/null +++ b/mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h @@ -0,0 +1,458 @@ +//===- TypeDetail.h - Details of MLIR LLVM dialect types --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains implementation details, such as storage structures, of +// MLIR LLVM dialect types. +// +//===----------------------------------------------------------------------===// + +#ifndef DIALECT_LLVMIR_IR_TYPEDETAIL_H +#define DIALECT_LLVMIR_IR_TYPEDETAIL_H + +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/IR/TypeSupport.h" +#include "mlir/IR/Types.h" + +#include "llvm/ADT/Bitfields.h" +#include "llvm/ADT/PointerIntPair.h" + +namespace mlir { +namespace LLVM { +namespace detail { + +//===----------------------------------------------------------------------===// +// LLVMStructTypeStorage. +//===----------------------------------------------------------------------===// + +/// Type storage for LLVM structure types. +/// +/// Structures are uniqued using: +/// - a bit indicating whether a struct is literal or identified; +/// - for identified structs, in addition to the bit: +/// - a string identifier; +/// - for literal structs, in addition to the bit: +/// - a list of contained types; +/// - a bit indicating whether the literal struct is packed. +/// +/// Identified structures only have a mutable component consisting of: +/// - a list of contained types; +/// - a bit indicating whether the identified struct is packed; +/// - a bit indicating whether the identified struct is intentionally opaque; +/// - a bit indicating whether the identified struct has been initialized. +/// Uninitialized structs are considered opaque by the user, and can be mutated. +/// Initialized and still opaque structs cannot be mutated. +/// +/// The struct storage consists of: +/// - immutable part: +/// - a pointer to the first element of the key (character for identified +/// structs, type for literal structs); +/// - the number of elements in the key packed together with bits indicating +/// whether a type is literal or identified, and the packedness bit for +/// literal structs only; +/// - mutable part: +/// - a pointer to the first contained type for identified structs only; +/// - the number of contained types packed together with bits of the mutable +/// component, for identified structs only. +struct LLVMStructTypeStorage : public TypeStorage { +public: + /// Construction/uniquing key class for LLVM dialect structure storage. Note + /// that this is a transient helper data structure that is NOT stored. + /// Therefore, it intentionally avoids bit manipulation and type erasure in + /// pointers to make manipulation more straightforward. Not all elements of + /// the key participate in uniquing, but all elements participate in + /// construction. + class Key { + public: + /// Constructs a key for an identified struct. + Key(StringRef name, bool opaque) + : name(name), identified(true), packed(false), opaque(opaque) {} + /// Constructs a key for a literal struct. + Key(ArrayRef types, bool packed) + : types(types), identified(false), packed(packed), opaque(false) {} + + /// Checks a specific property of the struct. + bool isIdentified() const { return identified; } + bool isPacked() const { + assert(!isIdentified() && + "'packed' bit is not part of the key for identified stucts"); + return packed; + } + bool isOpaque() const { + assert(isIdentified() && + "'opaque' bit is meaningless on literal structs"); + return opaque; + } + + /// Returns the identifier of a key for identified structs. + StringRef getIdentifier() const { + assert(isIdentified() && + "non-identified struct key canont have an identifier"); + return name; + } + + /// Returns the list of type contained in the key of a literal struct. + ArrayRef getTypeList() const { + assert(!isIdentified() && + "identified struct key cannot have a type list"); + return types; + } + + /// Returns the hash value of the key. This combines various flags into a + /// single value: the identified flag sets the first bit, and the packedness + /// flag sets the second bit. Opacity bit is only used for construction and + /// does not participate in uniquing. + llvm::hash_code hashValue() const { + constexpr static unsigned kIdentifiedHashFlag = 1; + constexpr static unsigned kPackedHashFlag = 2; + + unsigned flags = 0; + if (isIdentified()) { + flags |= kIdentifiedHashFlag; + return llvm::hash_combine(flags, getIdentifier()); + } + if (isPacked()) + flags |= kPackedHashFlag; + return llvm::hash_combine(flags, getTypeList()); + } + + /// Compares two keys. + bool operator==(const Key &other) const { + if (isIdentified()) + return other.isIdentified() && + other.getIdentifier().equals(getIdentifier()); + + return !other.isIdentified() && other.isPacked() == isPacked() && + other.getTypeList() == getTypeList(); + } + + /// Copies dynamically-sized components of the key into the given allocator. + Key copyIntoAllocator(TypeStorageAllocator &allocator) const { + if (isIdentified()) + return Key(allocator.copyInto(name), opaque); + return Key(allocator.copyInto(types), packed); + } + + private: + ArrayRef types; + StringRef name; + bool identified; + bool packed; + bool opaque; + }; + using KeyTy = Key; + + /// Returns the string identifier of an identified struct. + StringRef getIdentifier() const { + assert(isIdentified() && "requested identifier on a non-identified struct"); + return StringRef(static_cast(keyPtr), keySize()); + } + + /// Returns the list of types (partially) identifying a literal struct. + ArrayRef getTypeList() const { + // If this triggers, use getIdentifiedStructBody() instead. + assert(!isIdentified() && "requested typelist on an identified struct"); + return ArrayRef(static_cast(keyPtr), + keySize()); + } + + /// Returns the list of types contained in an identified struct. + ArrayRef getIdentifiedStructBody() const { + // If this triggers, use getTypeList() instead. + assert(isIdentified() && + "requested struct body on a non-identified struct"); + return ArrayRef(identifiedBodyArray, identifiedBodySize()); + } + + /// Checks whether the struct is identified. + bool isIdentified() const { + return llvm::Bitfield::get(keySizeAndFlags); + } + + /// Checks whether the struct is packed (both literal and identified structs). + bool isPacked() const { + return isIdentified() ? llvm::Bitfield::get( + identifiedBodySizeAndFlags) + : llvm::Bitfield::get(keySizeAndFlags); + } + + /// Checks whether a struct is marked as intentionally opaque (an + /// uninitialized struct is also considered opaque by the user, call + /// isInitialized to check that). + bool isOpaque() const { + return llvm::Bitfield::get(identifiedBodySizeAndFlags); + } + + /// Checks whether an identified struct has been explicitly initialized either + /// by setting its body or by marking it as intentionally opaque. + bool isInitialized() const { + return llvm::Bitfield::get( + identifiedBodySizeAndFlags); + } + + /// Constructs the storage from the given key. This sets up the uniquing key + /// components and optionally the mutable component if they construction key + /// has the relevant information. In the latter case, the struct is considered + /// as initalized and can no longer be mutated. + LLVMStructTypeStorage(const KeyTy &key) { + if (!key.isIdentified()) { + ArrayRef types = key.getTypeList(); + keyPtr = static_cast(types.data()); + setKeySize(types.size()); + llvm::Bitfield::set(keySizeAndFlags, key.isPacked()); + return; + } + + StringRef name = key.getIdentifier(); + keyPtr = static_cast(name.data()); + setKeySize(name.size()); + llvm::Bitfield::set(keySizeAndFlags, true); + + // If the struct is being constructed directly as opaque, mark it as + // initialized. + llvm::Bitfield::set(identifiedBodySizeAndFlags, + key.isOpaque()); + llvm::Bitfield::set(identifiedBodySizeAndFlags, + key.isOpaque()); + } + + /// Hook into the type unquing infrastructure. + bool operator==(const KeyTy &other) const { return getKey() == other; }; + static llvm::hash_code hashKey(const KeyTy &key) { return key.hashValue(); } + static LLVMStructTypeStorage *construct(TypeStorageAllocator &allocator, + const KeyTy &key) { + return new (allocator.allocate()) + LLVMStructTypeStorage(key.copyIntoAllocator(allocator)); + } + + /// Sets the body of an identified struct. If the struct is already + /// initialized, succeeds only if the body is equal to the current body. Fails + /// if the struct is marked as intentionally opaque. The struct will be marked + /// as initialized as a result of this operation and can no longer be changed. + LogicalResult mutate(TypeStorageAllocator &allocator, + ArrayRef body, bool packed) { + if (!isIdentified()) + return failure(); + if (isInitialized()) + return success(!isOpaque() && body == getIdentifiedStructBody() && + packed == isPacked()); + + llvm::Bitfield::set(identifiedBodySizeAndFlags, + true); + llvm::Bitfield::set(identifiedBodySizeAndFlags, packed); + + ArrayRef typesInAllocator = allocator.copyInto(body); + identifiedBodyArray = typesInAllocator.data(); + setIdentifiedBodySize(typesInAllocator.size()); + + return success(); + } + +private: + /// Returns the number of elements in the key. + unsigned keySize() const { + return llvm::Bitfield::get(keySizeAndFlags); + } + + /// Sets the number of elements in the key. + void setKeySize(unsigned value) { + llvm::Bitfield::set(keySizeAndFlags, value); + } + + /// Returns the number of types contained in an identified struct. + unsigned identifiedBodySize() const { + return llvm::Bitfield::get(identifiedBodySizeAndFlags); + } + /// Sets the number of types contained in an identified struct. + void setIdentifiedBodySize(unsigned value) { + llvm::Bitfield::set(identifiedBodySizeAndFlags, value); + } + + /// Returns the key for the current storage. + Key getKey() const { + if (isIdentified()) + return Key(getIdentifier(), isOpaque()); + return Key(getTypeList(), isPacked()); + } + + /// Bitfield elements for `keyAndSizeFlags`: + /// - bit 0: identified key flag; + /// - bit 1: packed key flag; + /// - bits 2..bitwidth(unsigned): size of the key. + using KeyFlagIdentified = + llvm::Bitfield::Element; + using KeyFlagPacked = llvm::Bitfield::Element; + using KeySize = + llvm::Bitfield::Element::digits - 2>; + + /// Bitfield elements for `identifiedBodySizeAndFlags`: + /// - bit 0: opaque flag; + /// - bit 1: packed mutable flag; + /// - bit 2: initialized flag; + /// - bits 3..bitwidth(unsigned): size of the identified body. + using MutableFlagOpaque = + llvm::Bitfield::Element; + using MutableFlagPacked = + llvm::Bitfield::Element; + using MutableFlagInitialized = + llvm::Bitfield::Element; + using MutableSize = + llvm::Bitfield::Element::digits - 3>; + + /// Pointer to the first element of the uniquing key. + // Note: cannot use PointerUnion because bump-ptr allocator does not guarantee + // address alignment. + const void *keyPtr = nullptr; + + /// Pointer to the first type contained in an identified struct. + const LLVMTypeNew *identifiedBodyArray = nullptr; + + /// Size of the uniquing key combined with identified/literal and + /// packedness bits. Must only be used through the Key* bitfields. + unsigned keySizeAndFlags = 0; + + /// Number of the types contained in an identified struct combined with + /// mutable flags. Must only be used through the Mutable* bitfields. + unsigned identifiedBodySizeAndFlags = 0; +}; + +//===----------------------------------------------------------------------===// +// LLVMFunctionTypeStorage. +//===----------------------------------------------------------------------===// + +/// Type storage for LLVM dialect function types. These are uniqued using the +/// list of types they contain and the vararg bit. +struct LLVMFunctionTypeStorage : public TypeStorage { + using KeyTy = std::tuple, bool>; + + /// Construct a storage from the given components. The list is expected to be + /// allocated in the context. + LLVMFunctionTypeStorage(LLVMTypeNew result, ArrayRef arguments, + bool variadic) + : argumentTypes(arguments) { + returnTypeAndVariadic.setPointerAndInt(result, variadic); + } + + /// Hook into the type uniquing infrastructure. + static LLVMFunctionTypeStorage *construct(TypeStorageAllocator &allocator, + const KeyTy &key) { + return new (allocator.allocate()) + LLVMFunctionTypeStorage(std::get<0>(key), + allocator.copyInto(std::get<1>(key)), + std::get<2>(key)); + } + + static unsigned hashKey(const KeyTy &key) { + // LLVM doesn't like hashing bools in tuples. + return llvm::hash_combine(std::get<0>(key), std::get<1>(key), + static_cast(std::get<2>(key))); + } + + bool operator==(const KeyTy &key) const { + return std::make_tuple(getReturnType(), getArgumentTypes(), isVariadic()) == + key; + } + + /// Returns the list of function argument types. + ArrayRef getArgumentTypes() const { return argumentTypes; } + + /// Checks whether the function type is variadic. + bool isVariadic() const { return returnTypeAndVariadic.getInt(); } + + /// Returns the function result type. + LLVMTypeNew getReturnType() const { + return returnTypeAndVariadic.getPointer(); + } + +private: + /// Function result type packed with the variadic bit. + llvm::PointerIntPair returnTypeAndVariadic; + /// Argument types. + ArrayRef argumentTypes; +}; + +//===----------------------------------------------------------------------===// +// LLVMIntegerTypeStorage. +//===----------------------------------------------------------------------===// + +/// Storage type for LLVM dialect integer types. These are uniqued by bitwidth. +struct LLVMIntegerTypeStorage : public TypeStorage { + using KeyTy = unsigned; + + LLVMIntegerTypeStorage(unsigned width) : bitwidth(width) {} + + static LLVMIntegerTypeStorage *construct(TypeStorageAllocator &allocator, + const KeyTy &key) { + return new (allocator.allocate()) + LLVMIntegerTypeStorage(key); + } + + bool operator==(const KeyTy &key) const { return key == bitwidth; } + + unsigned bitwidth; +}; + +//===----------------------------------------------------------------------===// +// LLVMPointerTypeStorage. +//===----------------------------------------------------------------------===// + +/// Storage type for LLVM dialect pointer types. These are uniqued by a pair of +/// element type and address space. +struct LLVMPointerTypeStorage : public TypeStorage { + using KeyTy = std::tuple; + + LLVMPointerTypeStorage(const KeyTy &key) + : pointeeType(std::get<0>(key)), addressSpace(std::get<1>(key)) {} + + static LLVMPointerTypeStorage *construct(TypeStorageAllocator &allocator, + const KeyTy &key) { + return new (allocator.allocate()) + LLVMPointerTypeStorage(key); + } + + bool operator==(const KeyTy &key) const { + return std::make_tuple(pointeeType, addressSpace) == key; + } + + LLVMTypeNew pointeeType; + unsigned addressSpace; +}; + +//===----------------------------------------------------------------------===// +// LLVMTypeAndSizeStorage. +//===----------------------------------------------------------------------===// + +/// Common storage used for LLVM dialect types that need an element type and a +/// number: arrays, fixed and scalable vectors. The actual semantics of the +/// type is defined by its kind. +struct LLVMTypeAndSizeStorage : public TypeStorage { + using KeyTy = std::tuple; + + LLVMTypeAndSizeStorage(const KeyTy &key) + : elementType(std::get<0>(key)), numElements(std::get<1>(key)) {} + + static LLVMTypeAndSizeStorage *construct(TypeStorageAllocator &allocator, + const KeyTy &key) { + return new (allocator.allocate()) + LLVMTypeAndSizeStorage(key); + } + + bool operator==(const KeyTy &key) const { + return std::make_tuple(elementType, numElements) == key; + } + + LLVMTypeNew elementType; + unsigned numElements; +}; + +} // end namespace detail +} // end namespace LLVM +} // end namespace mlir + +#endif // DIALECT_LLVMIR_IR_TYPEDETAIL_H diff --git a/mlir/lib/Parser/DialectSymbolParser.cpp b/mlir/lib/Parser/DialectSymbolParser.cpp index 1a7e2c5448c19..3b522a876f254 100644 --- a/mlir/lib/Parser/DialectSymbolParser.cpp +++ b/mlir/lib/Parser/DialectSymbolParser.cpp @@ -237,6 +237,17 @@ class CustomDialectAsmParser : public DialectAsmParser { return success(parser.consumeIf(Token::star)); } + /// Parses a quoted string token if present. + ParseResult parseOptionalString(StringRef *string) override { + if (!parser.getToken().is(Token::string)) + return failure(); + + if (string) + *string = parser.getTokenSpelling().drop_front().drop_back(); + parser.consumeToken(); + return success(); + } + /// Returns if the current token corresponds to a keyword. bool isCurrentTokenAKeyword() const { return parser.getToken().is(Token::bare_identifier) || @@ -297,6 +308,10 @@ class CustomDialectAsmParser : public DialectAsmParser { return parser.parseDimensionListRanked(dimensions, allowDynamic); } + OptionalParseResult parseOptionalType(Type &result) override { + return parser.parseOptionalType(result); + } + private: /// The full symbol specification. StringRef fullSpec; diff --git a/mlir/test/Dialect/LLVMIR/types-invalid.mlir b/mlir/test/Dialect/LLVMIR/types-invalid.mlir new file mode 100644 index 0000000000000..bb281087412c9 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/types-invalid.mlir @@ -0,0 +1,95 @@ +// RUN: mlir-opt --allow-unregistered-dialect -split-input-file -verify-diagnostics %s + +func @repeated_struct_name() { + "some.op"() : () -> !llvm2.struct<"a", (ptr>)> + // expected-error @+2 {{identified type already used with a different body}} + // expected-note @+1 {{existing body: (ptr>)}} + "some.op"() : () -> !llvm2.struct<"a", (i32)> +} + +// ----- + +func @repeated_struct_name_packed() { + "some.op"() : () -> !llvm2.struct<"a", packed (i32)> + // expected-error @+2 {{identified type already used with a different body}} + // expected-note @+1 {{existing body: packed (i32)}} + "some.op"() : () -> !llvm2.struct<"a", (i32)> +} + +// ----- + +func @repeated_struct_opaque() { + "some.op"() : () -> !llvm2.struct<"a", opaque> + // expected-error @+2 {{identified type already used with a different body}} + // expected-note @+1 {{existing body: opaque}} + "some.op"() : () -> !llvm2.struct<"a", ()> +} + +// ----- + +func @repeated_struct_opaque_non_empty() { + "some.op"() : () -> !llvm2.struct<"a", opaque> + // expected-error @+2 {{identified type already used with a different body}} + // expected-note @+1 {{existing body: opaque}} + "some.op"() : () -> !llvm2.struct<"a", (i32, i32)> +} + +// ----- + +func @repeated_struct_opaque_redefinition() { + "some.op"() : () -> !llvm2.struct<"a", ()> + // expected-error @+1 {{redeclaring defined struct as opaque}} + "some.op"() : () -> !llvm2.struct<"a", opaque> +} + +// ----- + +func @struct_literal_opaque() { + // expected-error @+1 {{only identified structs can be opaque}} + "some.op"() : () -> !llvm2.struct +} + +// ----- + +func @unexpected_type() { + // expected-error @+1 {{unexpected type, expected i* or keyword}} + "some.op"() : () -> !llvm2.f32 +} + +// ----- + +func @unexpected_type() { + // expected-error @+1 {{unknown LLVM type}} + "some.op"() : () -> !llvm2.ifoo +} + +// ----- + +func @explicitly_opaque_struct() { + "some.op"() : () -> !llvm2.struct<"a", opaque> + // expected-error @+2 {{identified type already used with a different body}} + // expected-note @+1 {{existing body: opaque}} + "some.op"() : () -> !llvm2.struct<"a", ()> +} + +// ----- + +func @dynamic_vector() { + // expected-error @+1 {{expected '? x x ' or ' x '}} + "some.op"() : () -> !llvm2.vec +} + +// ----- + +func @dynamic_scalable_vector() { + // expected-error @+1 {{expected '? x x ' or ' x '}} + "some.op"() : () -> !llvm2.vec +} + +// ----- + +func @unscalable_vector() { + // expected-error @+1 {{expected '? x x ' or ' x '}} + "some.op"() : () -> !llvm2.vec<4 x 4 x i32> +} + diff --git a/mlir/test/Dialect/LLVMIR/types.mlir b/mlir/test/Dialect/LLVMIR/types.mlir new file mode 100644 index 0000000000000..7ce606fe8c6a8 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/types.mlir @@ -0,0 +1,184 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file | mlir-opt -allow-unregistered-dialect | FileCheck %s + +// CHECK-LABEL: @primitive +func @primitive() { + // CHECK: !llvm2.void + "some.op"() : () -> !llvm2.void + // CHECK: !llvm2.half + "some.op"() : () -> !llvm2.half + // CHECK: !llvm2.bfloat + "some.op"() : () -> !llvm2.bfloat + // CHECK: !llvm2.float + "some.op"() : () -> !llvm2.float + // CHECK: !llvm2.double + "some.op"() : () -> !llvm2.double + // CHECK: !llvm2.fp128 + "some.op"() : () -> !llvm2.fp128 + // CHECK: !llvm2.x86_fp80 + "some.op"() : () -> !llvm2.x86_fp80 + // CHECK: !llvm2.ppc_fp128 + "some.op"() : () -> !llvm2.ppc_fp128 + // CHECK: !llvm2.x86_mmx + "some.op"() : () -> !llvm2.x86_mmx + // CHECK: !llvm2.token + "some.op"() : () -> !llvm2.token + // CHECK: !llvm2.label + "some.op"() : () -> !llvm2.label + // CHECK: !llvm2.metadata + "some.op"() : () -> !llvm2.metadata + return +} + +// CHECK-LABEL: @func +func @func() { + // CHECK: !llvm2.func + "some.op"() : () -> !llvm2.func + // CHECK: !llvm2.func + "some.op"() : () -> !llvm2.func + // CHECK: !llvm2.func + "some.op"() : () -> !llvm2.func + // CHECK: !llvm2.func + "some.op"() : () -> !llvm2.func + // CHECK: !llvm2.func + "some.op"() : () -> !llvm2.func + // CHECK: !llvm2.func + "some.op"() : () -> !llvm2.func + // CHECK: !llvm2.func + "some.op"() : () -> !llvm2.func + return +} + +// CHECK-LABEL: @integer +func @integer() { + // CHECK: !llvm2.i1 + "some.op"() : () -> !llvm2.i1 + // CHECK: !llvm2.i8 + "some.op"() : () -> !llvm2.i8 + // CHECK: !llvm2.i16 + "some.op"() : () -> !llvm2.i16 + // CHECK: !llvm2.i32 + "some.op"() : () -> !llvm2.i32 + // CHECK: !llvm2.i64 + "some.op"() : () -> !llvm2.i64 + // CHECK: !llvm2.i57 + "some.op"() : () -> !llvm2.i57 + // CHECK: !llvm2.i129 + "some.op"() : () -> !llvm2.i129 + return +} + +// CHECK-LABEL: @ptr +func @ptr() { + // CHECK: !llvm2.ptr + "some.op"() : () -> !llvm2.ptr + // CHECK: !llvm2.ptr + "some.op"() : () -> !llvm2.ptr + // CHECK: !llvm2.ptr> + "some.op"() : () -> !llvm2.ptr> + // CHECK: !llvm2.ptr>>>> + "some.op"() : () -> !llvm2.ptr>>>> + // CHECK: !llvm2.ptr + "some.op"() : () -> !llvm2.ptr + // CHECK: !llvm2.ptr + "some.op"() : () -> !llvm2.ptr + // CHECK: !llvm2.ptr + "some.op"() : () -> !llvm2.ptr + // CHECK: !llvm2.ptr, 9> + "some.op"() : () -> !llvm2.ptr, 9> + return +} + +// CHECK-LABEL: @vec +func @vec() { + // CHECK: !llvm2.vec<4 x i32> + "some.op"() : () -> !llvm2.vec<4 x i32> + // CHECK: !llvm2.vec<4 x float> + "some.op"() : () -> !llvm2.vec<4 x float> + // CHECK: !llvm2.vec + "some.op"() : () -> !llvm2.vec + // CHECK: !llvm2.vec + "some.op"() : () -> !llvm2.vec + // CHECK: !llvm2.vec<4 x ptr> + "some.op"() : () -> !llvm2.vec<4 x ptr> + return +} + +// CHECK-LABEL: @array +func @array() { + // CHECK: !llvm2.array<10 x i32> + "some.op"() : () -> !llvm2.array<10 x i32> + // CHECK: !llvm2.array<8 x float> + "some.op"() : () -> !llvm2.array<8 x float> + // CHECK: !llvm2.array<10 x ptr> + "some.op"() : () -> !llvm2.array<10 x ptr> + // CHECK: !llvm2.array<10 x array<4 x float>> + "some.op"() : () -> !llvm2.array<10 x array<4 x float>> + return +} + +// CHECK-LABEL: @literal_struct +func @literal_struct() { + // CHECK: !llvm2.struct<()> + "some.op"() : () -> !llvm2.struct<()> + // CHECK: !llvm2.struct<(i32)> + "some.op"() : () -> !llvm2.struct<(i32)> + // CHECK: !llvm2.struct<(float, i32)> + "some.op"() : () -> !llvm2.struct<(float, i32)> + // CHECK: !llvm2.struct<(struct<(i32)>)> + "some.op"() : () -> !llvm2.struct<(struct<(i32)>)> + // CHECK: !llvm2.struct<(i32, struct<(i32)>, float)> + "some.op"() : () -> !llvm2.struct<(i32, struct<(i32)>, float)> + + // CHECK: !llvm2.struct + "some.op"() : () -> !llvm2.struct + // CHECK: !llvm2.struct + "some.op"() : () -> !llvm2.struct + // CHECK: !llvm2.struct + "some.op"() : () -> !llvm2.struct + // CHECK: !llvm2.struct + "some.op"() : () -> !llvm2.struct + // CHECK: !llvm2.struct)> + "some.op"() : () -> !llvm2.struct)> + // CHECK: !llvm2.struct, float)> + "some.op"() : () -> !llvm2.struct, float)> + + // CHECK: !llvm2.struct<(struct)> + "some.op"() : () -> !llvm2.struct<(struct)> + // CHECK: !llvm2.struct)> + "some.op"() : () -> !llvm2.struct)> + return +} + +// CHECK-LABEL: @identified_struct +func @identified_struct() { + // CHECK: !llvm2.struct<"empty", ()> + "some.op"() : () -> !llvm2.struct<"empty", ()> + // CHECK: !llvm2.struct<"opaque", opaque> + "some.op"() : () -> !llvm2.struct<"opaque", opaque> + // CHECK: !llvm2.struct<"long", (i32, struct<(i32, i1)>, float, ptr>)> + "some.op"() : () -> !llvm2.struct<"long", (i32, struct<(i32, i1)>, float, ptr>)> + // CHECK: !llvm2.struct<"self-recursive", (ptr>)> + "some.op"() : () -> !llvm2.struct<"self-recursive", (ptr>)> + // CHECK: !llvm2.struct<"unpacked", (i32)> + "some.op"() : () -> !llvm2.struct<"unpacked", (i32)> + // CHECK: !llvm2.struct<"packed", packed (i32)> + "some.op"() : () -> !llvm2.struct<"packed", packed (i32)> + // CHECK: !llvm2.struct<"name with spaces and !^$@$#", packed (i32)> + "some.op"() : () -> !llvm2.struct<"name with spaces and !^$@$#", packed (i32)> + + // CHECK: !llvm2.struct<"mutually-a", (ptr, 3>)>>)> + "some.op"() : () -> !llvm2.struct<"mutually-a", (ptr, 3>)>>)> + // CHECK: !llvm2.struct<"mutually-b", (ptr>)>, 3>)> + "some.op"() : () -> !llvm2.struct<"mutually-b", (ptr>)>, 3>)> + // CHECK: !llvm2.struct<"referring-another", (ptr>)> + "some.op"() : () -> !llvm2.struct<"referring-another", (ptr>)> + + // CHECK: !llvm2.struct<"struct-of-arrays", (array<10 x i32>)> + "some.op"() : () -> !llvm2.struct<"struct-of-arrays", (array<10 x i32>)> + // CHECK: !llvm2.array<10 x struct<"array-of-structs", (i32)>> + "some.op"() : () -> !llvm2.array<10 x struct<"array-of-structs", (i32)>> + // CHECK: !llvm2.ptr> + "some.op"() : () -> !llvm2.ptr> + return +} + diff --git a/mlir/test/lib/Dialect/CMakeLists.txt b/mlir/test/lib/Dialect/CMakeLists.txt index 9008b86314be0..36a18f79a8cbf 100644 --- a/mlir/test/lib/Dialect/CMakeLists.txt +++ b/mlir/test/lib/Dialect/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(Affine) +add_subdirectory(LLVMIR) add_subdirectory(SPIRV) add_subdirectory(Test) diff --git a/mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt new file mode 100644 index 0000000000000..2a42bc6974850 --- /dev/null +++ b/mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt @@ -0,0 +1,14 @@ + +add_mlir_library(MLIRLLVMTypeTestDialect + LLVMTypeTestDialect.cpp + + EXCLUDE_FROM_LIBMLIR + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRDialect + MLIRIR + MLIRLLVMIR + ) diff --git a/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp b/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp new file mode 100644 index 0000000000000..8ac1ef0a8c170 --- /dev/null +++ b/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp @@ -0,0 +1,52 @@ +#ifndef DIALECT_LLVMIR_LLVMTYPETESTDIALECT_H_ +#define DIALECT_LLVMIR_LLVMTYPETESTDIALECT_H_ + +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/IR/Dialect.h" + +namespace mlir { +namespace LLVM { +namespace { +class LLVMDialectNewTypes : public Dialect { +public: + LLVMDialectNewTypes(MLIRContext *ctx) : Dialect(getDialectNamespace(), ctx) { + // clang-format off + addTypes(); + // clang-format on + } + static StringRef getDialectNamespace() { return "llvm2"; } + + Type parseType(DialectAsmParser &parser) const override { + return detail::parseType(parser); + } + void printType(Type type, DialectAsmPrinter &printer) const override { + detail::printType(type.cast(), printer); + } +}; +} // namespace +} // namespace LLVM + +void registerLLVMTypeTestDialect() { + mlir::registerDialect(); +} +} // namespace mlir + +#endif // DIALECT_LLVMIR_LLVMTYPETESTDIALECT_H_ diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt index 483dcfec0c0ff..f52c5f41b22b6 100644 --- a/mlir/tools/mlir-opt/CMakeLists.txt +++ b/mlir/tools/mlir-opt/CMakeLists.txt @@ -13,6 +13,7 @@ set(LLVM_LINK_COMPONENTS if(MLIR_INCLUDE_TESTS) set(test_libs MLIRAffineTransformsTestPasses + MLIRLLVMTypeTestDialect MLIRSPIRVTestPasses MLIRTestDialect MLIRTestIR diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 620c5871a420c..05fba34092cba 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -31,6 +31,7 @@ namespace mlir { // Defined in the test directory, no public header. void registerConvertToTargetEnvPass(); void registerInliner(); +void registerLLVMTypeTestDialect(); void registerMemRefBoundCheck(); void registerPassManagerTestPass(); void registerPatternsTestPass(); @@ -39,10 +40,9 @@ void registerSideEffectTestPasses(); void registerSimpleParametricTilingPass(); void registerSymbolTestPasses(); void registerTestAffineDataCopyPass(); -void registerTestAllReduceLoweringPass(); void registerTestAffineLoopUnswitchingPass(); +void registerTestAllReduceLoweringPass(); void registerTestBufferPlacementPreparationPass(); -void registerTestLoopPermutationPass(); void registerTestCallGraphPass(); void registerTestConstantFold(); void registerTestConvertGPUKernelToCubinPass(); @@ -51,12 +51,14 @@ void registerTestDominancePass(); void registerTestExpandTanhPass(); void registerTestFunc(); void registerTestGpuMemoryPromotionPass(); +void registerTestGpuParallelLoopMappingPass(); void registerTestInterfaces(); void registerTestLinalgHoisting(); void registerTestLinalgTransforms(); void registerTestLivenessPass(); void registerTestLoopFusion(); void registerTestLoopMappingPass(); +void registerTestLoopPermutationPass(); void registerTestLoopUnrollingPass(); void registerTestMatchers(); void registerTestMemRefDependenceCheck(); @@ -65,7 +67,6 @@ void registerTestOpaqueLoc(); void registerTestPreparationPassWithAllowedMemrefResults(); void registerTestRecursiveTypesPass(); void registerTestReducer(); -void registerTestGpuParallelLoopMappingPass(); void registerTestSpirvEntryPointABIPass(); void registerTestSCFUtilsPass(); void registerTestVectorConversions(); @@ -104,6 +105,7 @@ static cl::opt allowUnregisteredDialects( void registerTestPasses() { registerConvertToTargetEnvPass(); registerInliner(); + registerLLVMTypeTestDialect(); registerMemRefBoundCheck(); registerPassManagerTestPass(); registerPatternsTestPass(); From 2d8ca4ae2b1a512d31566e042a4bf4fa1043def9 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Mon, 3 Aug 2020 22:04:33 +0800 Subject: [PATCH 192/600] [DWARFYAML] Offsets should be omitted when the OffsetEntryCount is 0. The offsets field should be omitted when the 'OffsetEntryCount' entry is specified to be 0. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D85006 --- llvm/lib/ObjectYAML/DWARFEmitter.cpp | 2 +- .../yaml2obj/ELF/DWARF/debug-rnglists.yaml | 64 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index ab3cd05a6495d..030b0e01b6f17 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -749,7 +749,7 @@ Error writeDWARFLists(raw_ostream &OS, EmitOffsets(ArrayRef((const uint64_t *)Table.Offsets->data(), Table.Offsets->size()), 0); - else + else if (OffsetEntryCount != 0) EmitOffsets(Offsets, OffsetsSize); OS.write(ListBuffer.data(), ListBuffer.size()); diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-rnglists.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-rnglists.yaml index 248cb190235b2..de35adec57a0b 100644 --- a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-rnglists.yaml +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-rnglists.yaml @@ -669,3 +669,67 @@ DWARF: - Lists: - Entries: [] Content: '' + +## u) Test that when the "OffsetEntryCount" is specified to be 0 and "Offsets" is not specified, +## the offsets array is not emitted. + +# RUN: yaml2obj --docnum=19 -DENTRYCOUNT=0 %s -o %t19.o +# RUN: llvm-readelf --hex-dump=.debug_rnglists %t19.o | \ +# RUN: FileCheck %s --check-prefix=NO-OFFSETS + +# NO-OFFSETS: Hex dump of section '.debug_rnglists': +# NO-OFFSETS-NEXT: 0x00000000 0e000000 05000800 00000000 02010202 ................ +## ^------- offset_entry_count (4-byte) +## ^- DW_RLE_startx_endx +## ^- operands[0] (ULEB128) 0x01 +## ^- operands[1] (ULEB128) 0x02 +## ^- DW_RLE_startx_endx +# NO-OFFSETS-NEXT: 0x00000010 0102 .. +## ^- operands[0] (ULEB128) 0x01 +## ^- operands[1] (ULEB128) 0x02 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_rnglists: + - OffsetEntryCount: [[ENTRYCOUNT=]] + Offsets: [[OFFSETS=]] + Lists: + - Entries: + - Operator: DW_RLE_startx_endx + Values: [ 0x01, 0x02 ] + - Entries: + - Operator: DW_RLE_startx_endx + Values: [ 0x01, 0x02 ] + +## v) Test that when the "Offsets" entry is specified to be empty and the "OffsetEntryCount" is not specified, +## the offsets array will be omitted. + +# RUN: yaml2obj --docnum=19 -DOFFSETS=[] %s -o %t20.o +# RUN: llvm-readelf --hex-dump=.debug_rnglists %t20.o | \ +# RUN: FileCheck %s --check-prefix=NO-OFFSETS + +## w) Test that if "Offsets" is specified, the offsets array will be emitted accordingly, even when +## the "OffsetEntryCount" is specified to be 0. + +# RUN: yaml2obj --docnum=19 -DOFFSETS=[0x01,0x02,0x03] -DENTRYCOUNT=0 %s -o %t21.o +# RUN: llvm-readelf --hex-dump=.debug_rnglists %t21.o | \ +# RUN: FileCheck %s --check-prefix=OFFSETS + +# OFFSETS: Hex dump of section '.debug_rnglists': +# OFFSETS-NEXT: 0x00000000 0e000000 05000800 00000000 01000000 ................ +## ^------- offset_entry_count (4-byte) +## ^------- offsets[0] (4-byte) +# OFFSETS-NEXT: 0x00000010 02000000 03000000 02010202 0102 .............. +## ^------- offsets[1] (4-byte) +## ^------- offsets[2] (4-byte) +## ^- DW_RLE_startx_endx +## ^- operands[0] (ULEB128) 0x01 +## ^- operands[1] (ULEB128) 0x02 +## ^- DW_RLE_startx_endx +## ^- operands[0] (ULEB128) 0x01 +## ^- operands[1] (ULEB128) 0x02 From 23693ffc3ba6146a22cd1e9373e25dc1e1a41a17 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 3 Aug 2020 09:19:11 -0400 Subject: [PATCH 193/600] [InstCombine] reduce xor-of-or's bitwise logic (PR46955); 2nd try The 1st try at this (rG2265d01f2a5b) exposed what looks like unspecified behavior in C/C++ resulting in test variations. The arguments to BinaryOperator::CreateAnd() were both IRBuilder function calls, and the order in which they execute determines the order of the new instructions in the IR. But the order of function arg evaluation is not fixed by the rules of C/C++, so depending on compiler config, the test would fail because the test expected a single fixed ordering of instructions. Original commit message: I tried to use m_Deferred() on this, but didn't find a clean way to do that. http://bugs.llvm.org/PR46955 https://alive2.llvm.org/ce/z/2h6QTq --- .../InstCombine/InstCombineAndOrXor.cpp | 15 ++++++++++++ llvm/test/Transforms/InstCombine/xor.ll | 24 +++++++++---------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 030d2f203ed6c..9d7effc724beb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3351,6 +3351,21 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { match(Op1, m_Not(m_Specific(A)))) return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); + // (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants. + // TODO: Loosen one-use restriction if common operand is a constant. + Value *D; + if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_Or(m_Value(C), m_Value(D))))) { + if (B == C || B == D) + std::swap(A, B); + if (A == C) + std::swap(C, D); + if (A == D) { + Value *NotA = Builder.CreateNot(A); + return BinaryOperator::CreateAnd(Builder.CreateXor(B, C), NotA); + } + } + if (auto *LHS = dyn_cast(I.getOperand(0))) if (auto *RHS = dyn_cast(I.getOperand(1))) if (Value *V = foldXorOfICmps(LHS, RHS, I)) diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll index a133f2a0e009b..363fa8ff8fdb5 100644 --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -915,9 +915,9 @@ define <2 x i32> @test51vec(<2 x i32> %x, <2 x i32> %y) { define i4 @or_or_xor(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_or_xor( -; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[Z:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret i4 [[R]] ; %o1 = or i4 %z, %x @@ -928,9 +928,9 @@ define i4 @or_or_xor(i4 %x, i4 %y, i4 %z) { define i4 @or_or_xor_commute1(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_or_xor_commute1( -; CHECK-NEXT: [[O1:%.*]] = or i4 [[X:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[O2:%.*]] = or i4 [[Z]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[Z:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret i4 [[R]] ; %o1 = or i4 %x, %z @@ -941,9 +941,9 @@ define i4 @or_or_xor_commute1(i4 %x, i4 %y, i4 %z) { define i4 @or_or_xor_commute2(i4 %x, i4 %y, i4 %z) { ; CHECK-LABEL: @or_or_xor_commute2( -; CHECK-NEXT: [[O1:%.*]] = or i4 [[Z:%.*]], [[X:%.*]] -; CHECK-NEXT: [[O2:%.*]] = or i4 [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = xor i4 [[O1]], [[O2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i4 [[Z:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i4 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = and i4 [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret i4 [[R]] ; %o1 = or i4 %z, %x @@ -954,9 +954,9 @@ define i4 @or_or_xor_commute2(i4 %x, i4 %y, i4 %z) { define <2 x i4> @or_or_xor_commute3(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) { ; CHECK-LABEL: @or_or_xor_commute3( -; CHECK-NEXT: [[O1:%.*]] = or <2 x i4> [[X:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[O2:%.*]] = or <2 x i4> [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i4> [[O1]], [[O2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i4> [[Z:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i4> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <2 x i4> [[R]] ; %o1 = or <2 x i4> %x, %z From 1c2777f585fc0e5e8f853dab455c62ae50298f9a Mon Sep 17 00:00:00 2001 From: Shinji Okumura Date: Mon, 3 Aug 2020 23:31:13 +0900 Subject: [PATCH 194/600] [NFC][APInt][DenseMapInfo] Move DenseMapAPIntKeyInfo into DenseMap.h as DenseMapInfo `DenseMapAPIntKeyInfo` is now located in `lib/IR/LLVMContextImpl.h`. Moved it into `include/ADT/DenseMapInfo.h` to use it. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D85131 --- llvm/include/llvm/ADT/APInt.h | 3 ++- llvm/include/llvm/ADT/DenseMapInfo.h | 24 ++++++++++++++++++++++++ llvm/lib/IR/LLVMContextImpl.h | 22 +--------------------- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index f7df648d27ed6..715f7cd4fdf3f 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -31,6 +31,7 @@ class raw_ostream; template class SmallVectorImpl; template class ArrayRef; template class Optional; +template struct DenseMapInfo; class APInt; @@ -96,7 +97,7 @@ class LLVM_NODISCARD APInt { unsigned BitWidth; ///< The number of bits in this APInt. - friend struct DenseMapAPIntKeyInfo; + friend struct DenseMapInfo; friend class APSInt; diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h index e465331ac6f7b..1cace4b3192fb 100644 --- a/llvm/include/llvm/ADT/DenseMapInfo.h +++ b/llvm/include/llvm/ADT/DenseMapInfo.h @@ -13,6 +13,7 @@ #ifndef LLVM_ADT_DENSEMAPINFO_H #define LLVM_ADT_DENSEMAPINFO_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" @@ -347,6 +348,29 @@ template <> struct DenseMapInfo { static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; } }; +/// Provide DenseMapInfo for APInt. +template <> struct DenseMapInfo { + static inline APInt getEmptyKey() { + APInt V(nullptr, 0); + V.U.VAL = 0; + return V; + } + + static inline APInt getTombstoneKey() { + APInt V(nullptr, 0); + V.U.VAL = 1; + return V; + } + + static unsigned getHashValue(const APInt &Key) { + return static_cast(hash_value(Key)); + } + + static bool isEqual(const APInt &LHS, const APInt &RHS) { + return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; + } +}; + } // end namespace llvm #endif // LLVM_ADT_DENSEMAPINFO_H diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index b97ac37c5fcfd..e8fdaa23761c3 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -57,27 +57,7 @@ class Type; class Value; class ValueHandleBase; -struct DenseMapAPIntKeyInfo { - static inline APInt getEmptyKey() { - APInt V(nullptr, 0); - V.U.VAL = 0; - return V; - } - - static inline APInt getTombstoneKey() { - APInt V(nullptr, 0); - V.U.VAL = 1; - return V; - } - - static unsigned getHashValue(const APInt &Key) { - return static_cast(hash_value(Key)); - } - - static bool isEqual(const APInt &LHS, const APInt &RHS) { - return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; - } -}; +using DenseMapAPIntKeyInfo = DenseMapInfo; struct DenseMapAPFloatKeyInfo { static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); } From 08649d4321bb73c888e03ac316f8ccab600a9533 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Mon, 3 Aug 2020 23:19:42 +0800 Subject: [PATCH 195/600] [DWARFYAML] Implement the .debug_loclists section. This patch implements the .debug_loclists section. There are only two DWARF expressions are implemented in this patch (DW_OP_consts, DW_OP_stack_value). We will implement more in the future. The YAML description of the .debug_loclists section is: ``` debug_loclists: - Format: DWARF32 ## Optional Length: 0x1234 ## Optional Version: 5 ## Optional (5 by default) AddressSize: 8 ## Optional SegmentSelectorSize: 0 ## Optional (0 by default) OffsetEntryCount: 1 ## Optional Offsets: [ 1 ] ## Optional Lists: - Entries: - Operator: DW_LLE_startx_endx Values: [ 0x1234, 0x4321 ] DescriptorsLength: 0x1234 ## Optional Descriptors: - Operator: DW_OP_consts Values: [ 0x1234 ] ``` Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D84234 --- llvm/include/llvm/ObjectYAML/DWARFEmitter.h | 1 + llvm/include/llvm/ObjectYAML/DWARFYAML.h | 46 + llvm/lib/ObjectYAML/DWARFEmitter.cpp | 137 +++ llvm/lib/ObjectYAML/DWARFYAML.cpp | 17 + .../yaml2obj/ELF/DWARF/debug-loclists.yaml | 914 ++++++++++++++++++ 5 files changed, 1115 insertions(+) create mode 100644 llvm/test/tools/yaml2obj/ELF/DWARF/debug-loclists.yaml diff --git a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h index 89d01cecb9b7b..c7c3070651504 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h +++ b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h @@ -42,6 +42,7 @@ Error emitDebugLine(raw_ostream &OS, const Data &DI); Error emitDebugAddr(raw_ostream &OS, const Data &DI); Error emitDebugStrOffsets(raw_ostream &OS, const Data &DI); Error emitDebugRnglists(raw_ostream &OS, const Data &DI); +Error emitDebugLoclists(raw_ostream &OS, const Data &DI); std::function getDWARFEmitterByName(StringRef SecName); diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 127a529139786..ae3eff1fe8564 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -184,11 +184,23 @@ struct StringOffsetsTable { std::vector Offsets; }; +struct DWARFOperation { + dwarf::LocationAtom Operator; + std::vector Values; +}; + struct RnglistEntry { dwarf::RnglistEntries Operator; std::vector Values; }; +struct LoclistEntry { + dwarf::LoclistEntries Operator; + std::vector Values; + Optional DescriptionsLength; + std::vector Descriptions; +}; + template struct ListEntries { Optional> Entries; Optional Content; @@ -224,6 +236,7 @@ struct Data { std::vector DebugLines; Optional>> DebugRnglists; + Optional>> DebugLoclists; bool isEmpty() const; @@ -254,6 +267,12 @@ LLVM_YAML_IS_SEQUENCE_VECTOR( LLVM_YAML_IS_SEQUENCE_VECTOR( llvm::DWARFYAML::ListEntries) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::RnglistEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR( + llvm::DWARFYAML::ListTable) +LLVM_YAML_IS_SEQUENCE_VECTOR( + llvm::DWARFYAML::ListEntries) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LoclistEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::DWARFOperation) namespace llvm { namespace yaml { @@ -322,6 +341,10 @@ template <> struct MappingTraits { static void mapping(IO &IO, DWARFYAML::SegAddrPair &SegAddrPair); }; +template <> struct MappingTraits { + static void mapping(IO &IO, DWARFYAML::DWARFOperation &DWARFOperation); +}; + template struct MappingTraits> { static void mapping(IO &IO, DWARFYAML::ListTable &ListTable); @@ -338,6 +361,10 @@ template <> struct MappingTraits { static void mapping(IO &IO, DWARFYAML::RnglistEntry &RnglistEntry); }; +template <> struct MappingTraits { + static void mapping(IO &IO, DWARFYAML::LoclistEntry &LoclistEntry); +}; + template <> struct MappingTraits { static void mapping(IO &IO, DWARFYAML::AddrTableEntry &AddrTable); }; @@ -434,6 +461,25 @@ template <> struct ScalarEnumerationTraits { } }; +#define HANDLE_DW_LLE(unused, name) \ + io.enumCase(value, "DW_LLE_" #name, dwarf::DW_LLE_##name); + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, dwarf::LoclistEntries &value) { +#include "llvm/BinaryFormat/Dwarf.def" + } +}; + +#define HANDLE_DW_OP(id, name, version, vendor) \ + io.enumCase(value, "DW_OP_" #name, dwarf::DW_OP_##name); + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &io, dwarf::LocationAtom &value) { +#include "llvm/BinaryFormat/Dwarf.def" + io.enumFallback(value); + } +}; + } // end namespace yaml } // end namespace llvm diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index 030b0e01b6f17..deff6a68363b2 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -611,6 +611,39 @@ static Error writeListEntryAddress(StringRef EncodingName, raw_ostream &OS, return Error::success(); } +static Expected +writeDWARFExpression(raw_ostream &OS, + const DWARFYAML::DWARFOperation &Operation, + uint8_t AddrSize, bool IsLittleEndian) { + auto CheckOperands = [&](uint64_t ExpectedOperands) -> Error { + return checkOperandCount(dwarf::OperationEncodingString(Operation.Operator), + Operation.Values, ExpectedOperands); + }; + + uint64_t ExpressionBegin = OS.tell(); + writeInteger((uint8_t)Operation.Operator, OS, IsLittleEndian); + switch (Operation.Operator) { + case dwarf::DW_OP_consts: + if (Error Err = CheckOperands(1)) + return std::move(Err); + encodeSLEB128(Operation.Values[0], OS); + break; + case dwarf::DW_OP_stack_value: + if (Error Err = CheckOperands(0)) + return std::move(Err); + break; + default: + StringRef EncodingStr = dwarf::OperationEncodingString(Operation.Operator); + return createStringError(errc::not_supported, + "DWARF expression: " + + (EncodingStr.empty() + ? "0x" + utohexstr(Operation.Operator) + : EncodingStr) + + " is not supported"); + } + return OS.tell() - ExpressionBegin; +} + static Expected writeListEntry(raw_ostream &OS, const DWARFYAML::RnglistEntry &Entry, uint8_t AddrSize, @@ -672,6 +705,103 @@ static Expected writeListEntry(raw_ostream &OS, return OS.tell() - BeginOffset; } +static Expected writeListEntry(raw_ostream &OS, + const DWARFYAML::LoclistEntry &Entry, + uint8_t AddrSize, + bool IsLittleEndian) { + uint64_t BeginOffset = OS.tell(); + writeInteger((uint8_t)Entry.Operator, OS, IsLittleEndian); + + StringRef EncodingName = dwarf::LocListEncodingString(Entry.Operator); + + auto CheckOperands = [&](uint64_t ExpectedOperands) -> Error { + return checkOperandCount(EncodingName, Entry.Values, ExpectedOperands); + }; + + auto WriteAddress = [&](uint64_t Addr) -> Error { + return writeListEntryAddress(EncodingName, OS, Addr, AddrSize, + IsLittleEndian); + }; + + auto WriteDWARFOperations = [&]() -> Error { + std::string OpBuffer; + raw_string_ostream OpBufferOS(OpBuffer); + uint64_t DescriptionsLength = 0; + + for (const DWARFYAML::DWARFOperation &Op : Entry.Descriptions) { + if (Expected OpSize = + writeDWARFExpression(OpBufferOS, Op, AddrSize, IsLittleEndian)) + DescriptionsLength += *OpSize; + else + return OpSize.takeError(); + } + + if (Entry.DescriptionsLength) + DescriptionsLength = *Entry.DescriptionsLength; + else + DescriptionsLength = OpBuffer.size(); + + encodeULEB128(DescriptionsLength, OS); + OS.write(OpBuffer.data(), OpBuffer.size()); + + return Error::success(); + }; + + switch (Entry.Operator) { + case dwarf::DW_LLE_end_of_list: + if (Error Err = CheckOperands(0)) + return std::move(Err); + break; + case dwarf::DW_LLE_base_addressx: + if (Error Err = CheckOperands(1)) + return std::move(Err); + encodeULEB128(Entry.Values[0], OS); + break; + case dwarf::DW_LLE_startx_endx: + case dwarf::DW_LLE_startx_length: + case dwarf::DW_LLE_offset_pair: + if (Error Err = CheckOperands(2)) + return std::move(Err); + encodeULEB128(Entry.Values[0], OS); + encodeULEB128(Entry.Values[1], OS); + if (Error Err = WriteDWARFOperations()) + return std::move(Err); + break; + case dwarf::DW_LLE_default_location: + if (Error Err = CheckOperands(0)) + return std::move(Err); + if (Error Err = WriteDWARFOperations()) + return std::move(Err); + break; + case dwarf::DW_LLE_base_address: + if (Error Err = CheckOperands(1)) + return std::move(Err); + if (Error Err = WriteAddress(Entry.Values[0])) + return std::move(Err); + break; + case dwarf::DW_LLE_start_end: + if (Error Err = CheckOperands(2)) + return std::move(Err); + if (Error Err = WriteAddress(Entry.Values[0])) + return std::move(Err); + cantFail(WriteAddress(Entry.Values[1])); + if (Error Err = WriteDWARFOperations()) + return std::move(Err); + break; + case dwarf::DW_LLE_start_length: + if (Error Err = CheckOperands(2)) + return std::move(Err); + if (Error Err = WriteAddress(Entry.Values[0])) + return std::move(Err); + encodeULEB128(Entry.Values[1], OS); + if (Error Err = WriteDWARFOperations()) + return std::move(Err); + break; + } + + return OS.tell() - BeginOffset; +} + template Error writeDWARFLists(raw_ostream &OS, ArrayRef> Tables, @@ -764,6 +894,12 @@ Error DWARFYAML::emitDebugRnglists(raw_ostream &OS, const Data &DI) { OS, *DI.DebugRnglists, DI.IsLittleEndian, DI.Is64BitAddrSize); } +Error DWARFYAML::emitDebugLoclists(raw_ostream &OS, const Data &DI) { + assert(DI.DebugLoclists && "unexpected emitDebugRnglists() call"); + return writeDWARFLists( + OS, *DI.DebugLoclists, DI.IsLittleEndian, DI.Is64BitAddrSize); +} + std::function DWARFYAML::getDWARFEmitterByName(StringRef SecName) { auto EmitFunc = @@ -776,6 +912,7 @@ DWARFYAML::getDWARFEmitterByName(StringRef SecName) { .Case("debug_gnu_pubtypes", DWARFYAML::emitDebugGNUPubtypes) .Case("debug_info", DWARFYAML::emitDebugInfo) .Case("debug_line", DWARFYAML::emitDebugLine) + .Case("debug_loclists", DWARFYAML::emitDebugLoclists) .Case("debug_pubnames", DWARFYAML::emitDebugPubnames) .Case("debug_pubtypes", DWARFYAML::emitDebugPubtypes) .Case("debug_ranges", DWARFYAML::emitDebugRanges) diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 37d45996786d3..a0caad10a36c1 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -50,6 +50,8 @@ SetVector DWARFYAML::Data::getNonEmptySectionNames() const { SecNames.insert("debug_str_offsets"); if (DebugRnglists) SecNames.insert("debug_rnglists"); + if (DebugLoclists) + SecNames.insert("debug_loclists"); return SecNames; } @@ -74,6 +76,7 @@ void MappingTraits::mapping(IO &IO, DWARFYAML::Data &DWARF) { IO.mapOptional("debug_addr", DWARF.DebugAddr); IO.mapOptional("debug_str_offsets", DWARF.DebugStrOffsets); IO.mapOptional("debug_rnglists", DWARF.DebugRnglists); + IO.mapOptional("debug_loclists", DWARF.DebugLoclists); IO.setContext(OldContext); } @@ -235,12 +238,26 @@ void MappingTraits::mapping( IO.mapOptional("Offsets", StrOffsetsTable.Offsets); } +void MappingTraits::mapping( + IO &IO, DWARFYAML::DWARFOperation &DWARFOperation) { + IO.mapRequired("Operator", DWARFOperation.Operator); + IO.mapOptional("Values", DWARFOperation.Values); +} + void MappingTraits::mapping( IO &IO, DWARFYAML::RnglistEntry &RnglistEntry) { IO.mapRequired("Operator", RnglistEntry.Operator); IO.mapOptional("Values", RnglistEntry.Values); } +void MappingTraits::mapping( + IO &IO, DWARFYAML::LoclistEntry &LoclistEntry) { + IO.mapRequired("Operator", LoclistEntry.Operator); + IO.mapOptional("Values", LoclistEntry.Values); + IO.mapOptional("DescriptionsLength", LoclistEntry.DescriptionsLength); + IO.mapOptional("Descriptions", LoclistEntry.Descriptions); +} + template void MappingTraits>::mapping( IO &IO, DWARFYAML::ListEntries &ListEntries) { diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-loclists.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-loclists.yaml new file mode 100644 index 0000000000000..4da5595827d1e --- /dev/null +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-loclists.yaml @@ -0,0 +1,914 @@ +## Test that yaml2obj emits a .debug_loclists section when requested. + +## a) Generate and verify a little endian DWARF32 .debug_loclists section in a 64-bit object file. + +# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2LSB %s -o %t1.dwarf32.le.o +# RUN: llvm-readobj --sections --section-data %t1.dwarf32.le.o | \ +# RUN: FileCheck -DSIZE=133 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-LE + +# SHDR: Index: 1 +# SHDR-NEXT: Name: .debug_loclists (1) +# SHDR-NEXT: Type: SHT_PROGBITS (0x1) +# SHDR-NEXT: Flags [ (0x0) +# SHDR-NEXT: ] +# SHDR-NEXT: Address: 0x0 +# SHDR-NEXT: Offset: 0x40 +# SHDR-NEXT: Size: [[SIZE]] +# SHDR-NEXT: Link: 0 +# SHDR-NEXT: Info: 0 +# SHDR-NEXT: AddressAlignment: [[ADDRALIGN]] +# SHDR-NEXT: EntrySize: 0 +# DWARF32-LE-NEXT: SectionData ( +# DWARF32-LE-NEXT: 0000: 3D000000 05000800 03000000 0C000000 |=...............| +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +## ^------- offset_entry_count (4-byte) +## ^------- offsets[0] (4-byte) +# DWARF32-LE-NEXT: 0010: 1B000000 2F000000 01B42402 B424A186 |..../.....$..$..| +## ^------- offsets[1] (4-byte) +## ^------- offsets[2] (4-byte) +## ^- DW_LLE_base_addressx +## ^--- operands[0] (ULEB128) 0x1234 +## ^- DW_LLE_startx_endx +## ^--- operands[0] (ULEB128) 0x1234 +## ^--- operands[1] (ULEB128) 0x4321 +# DWARF32-LE-NEXT: 0020: 010411B4 249F0003 B424A186 0102117D |....$....$.....}| +## -- +## ^- location descriptions length (ULEB128) 0x04 +## ^- DW_OP_consts +## ^---- operands[0] (SLEB128) +0x1234 +## ^- DW_OP_stack_value +## ^- DW_LLE_end_of_list +## ^- DW_LLE_startx_length +## ^--- operands[0] (ULEB128) 0x1234 +## ^------ operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x02 +## ^- DW_OP_consts +## ^- operands[0] (SLEB128) -0x03 +# DWARF32-LE-NEXT: 0030: 04B424A1 86010311 B4240005 0311B424 |..$......$.....$| +## ^- DW_LLE_offset_pair +## ^--- operands[0] (ULEB128) 0x1234 +## ^------ operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x03 +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +## ^- DW_LLE_end_of_list +## ^- DW_LLE_default_location +## ^- location descriptions length (ULEB128) 0x03 +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +# DWARF32-LE-NEXT: 0040: 00400000 00050008 00020000 00080000 |.@..............| +## ^- DW_LLE_end_of_list +## ^-------- unit_length (4-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +## ^-------- offset_entry_count (4-byte) +## ^----- offsets[0] (4-byte) +# DWARF32-LE-NEXT: 0050: 00270000 00063412 00000000 00000734 |.'....4........4| +## -- +## ^-------- offsets[1] (4-byte) +## ^- DW_LLE_base_address +## ^----------------- operands[0] (8-byte) +## ^- DW_LLE_start_end +## ^- operands[0] (8-byte) +# DWARF32-LE-NEXT: 0060: 12000000 00000021 43000000 00000003 |.......!C.......| +## --------------- +## ^----------------- operands[1] (8-byte) +## ^- location descriptions length (ULEB128) 0x03 +# DWARF32-LE-NEXT: 0070: 11B42400 08341200 00000000 00A18601 |..$..4..........| +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +## ^- DW_LLE_end_of_list +## ^- DW_LLE_start_length +## ^----------------- operands[0] (8-byte) +## ^----- operands[1] (LEB128) 0x4321 +# DWARF32-LE-NEXT: 0080: 0311B424 00 |...$.| +## ^- location descriptions length (ULEB128) 0x03 +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +## ^- DW_LLE_end_of_list +# DWARF32-LE-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: [[ENDIAN]] + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Lists: + - Entries: + - Operator: DW_LLE_base_addressx + Values: [ 0x1234 ] + - Operator: DW_LLE_startx_endx + Values: [ 0x1234, 0x4321 ] + Descriptions: + - Operator: DW_OP_consts + Values: [ 0x1234 ] + - Operator: DW_OP_stack_value + - Operator: DW_LLE_end_of_list + - Entries: + - Operator: DW_LLE_startx_length + Values: [ 0x1234, 0x4321 ] + Descriptions: + - Operator: DW_OP_consts + ## Test a negative number (-3). + Values: [ 0xfffffffffffffffd ] + - Operator: DW_LLE_offset_pair + Values: [ 0x1234, 0x4321 ] + Descriptions: + - Operator: DW_OP_consts + Values: [ 0x1234 ] + - Operator: DW_LLE_end_of_list + - Entries: + - Operator: DW_LLE_default_location + Descriptions: + - Operator: DW_OP_consts + Values: [ 0x1234 ] + - Operator: DW_LLE_end_of_list + - Lists: + - Entries: + - Operator: DW_LLE_base_address + Values: [ 0x1234 ] + - Operator: DW_LLE_start_end + Values: [ 0x1234, 0x4321 ] + Descriptions: + - Operator: DW_OP_consts + Values: [ 0x1234 ] + - Operator: DW_LLE_end_of_list + - Entries: + - Operator: DW_LLE_start_length + Values: [ 0x1234, 0x4321 ] + Descriptions: + - Operator: DW_OP_consts + Values: [ 0x1234 ] + - Operator: DW_LLE_end_of_list + +## b) Generate and verify a big endian DWARF32 .debug_loclists section in a 64-bit object file. + +# RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2MSB %s -o %t.dwarf32.be.o +# RUN: llvm-readobj --sections --section-data %t.dwarf32.be.o | \ +# RUN: FileCheck -DSIZE=133 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-BE + +# DWARF32-BE-NEXT: SectionData ( +# DWARF32-BE-NEXT: 0000: 0000003D 00050800 00000003 0000000C |...=............| +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +## ^------- offset_entry_count (4-byte) +## ^------- offsets[0] (4-byte) +# DWARF32-BE-NEXT: 0010: 0000001B 0000002F 01B42402 B424A186 |......./..$..$..| +## ^------- offsets[1] (4-byte) +## ^------- offsets[2] (4-byte) +## ^- DW_LLE_base_addressx +## ^--- operands[0] (ULEB128) 0x1234 +## ^- DW_LLE_startx_endx +## ^--- operands[0] (ULEB128) 0x1234 +## ^--- operands[1] (ULEB128) 0x4321 +# DWARF32-BE-NEXT: 0020: 010411B4 249F0003 B424A186 0102117D |....$....$.....}| +## -- +## ^- location descriptions length (ULEB128) 0x04 +## ^- DW_OP_consts +## ^---- operands[0] (SLEB128) +0x1234 +## ^- DW_OP_stack_value +## ^- DW_LLE_end_of_list +## ^- DW_LLE_startx_length +## ^--- operands[0] (ULEB128) 0x1234 +## ^------ operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x02 +## ^- DW_OP_consts +## ^- operands[0] (SLEB128) -0x03 +# DWARF32-BE-NEXT: 0030: 04B424A1 86010311 B4240005 0311B424 |..$......$.....$| +## ^- DW_LLE_offset_pair +## ^--- operands[0] (ULEB128) 0x1234 +## ^------ operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x03 +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +## ^- DW_LLE_end_of_list +## ^- DW_LLE_default_location +## ^- location descriptions length (ULEB128) 0x03 +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +# DWARF32-BE-NEXT: 0040: 00000000 40000508 00000000 02000000 |....@...........| +## ^- DW_LLE_end_of_list +## ^-------- unit_length (4-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +## ^-------- offset_entry_count (4-byte) +## ^----- offsets[0] (4-byte) +# DWARF32-BE-NEXT: 0050: 08000000 27060000 00000000 12340700 |....'........4..| +## -- +## ^-------- offsets[1] (4-byte) +## ^- DW_LLE_base_address +## ^----------------- operands[0] (8-byte) +## ^- DW_LLE_start_end +## ^- operands[0] (8-byte) +# DWARF32-BE-NEXT: 0060: 00000000 00123400 00000000 00432103 |......4......C!.| +## --------------- +## ^----------------- operands[1] (8-byte) +## ^- location descriptions length (ULEB128) 0x03 +# DWARF32-BE-NEXT: 0070: 11B42400 08000000 00000012 34A18601 |..$.........4...| +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +## ^- DW_LLE_end_of_list +## ^- DW_LLE_start_length +## ^----------------- operands[0] (8-byte) +## ^----- operands[1] (LEB128) 0x4321 +# DWARF32-BE-NEXT: 0080: 0311B424 00 |...$.| +## ^- location descriptions length (ULEB128) 0x03 +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +## ^- DW_LLE_end_of_list +# DWARF32-BE-NEXT: ) + +## c) Generate and verify a little endian DWARF64 .debug_loclists section in a 64-bit object file. + +# RUN: yaml2obj --docnum=2 -DENDIAN=ELFDATA2LSB %s -o %t2.dwarf64.le.o +# RUN: llvm-readobj --sections --section-data %t2.dwarf64.le.o | \ +# RUN: FileCheck -DSIZE=47 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF64-LE + +# DWARF64-LE-NEXT: SectionData ( +# DWARF64-LE-NEXT: 0000: FFFFFFFF 23000000 00000000 05000800 |....#...........| +## ^------------------------- unit_length (12-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +# DWARF64-LE-NEXT: 0010: 02000000 10000000 00000000 1A000000 |................| +## ^------- offset_entry_count (4-byte) +## ^---------------- offsets[0] (8-byte) +## ^------- offsets[1] (8-byte) +# DWARF64-LE-NEXT: 0020: 00000000 02B424A1 86010311 B42400 |......$......$.| +## -------- +## ^- DW_LLE_startx_endx +## ^--- operands[0] (ULEB128) 0x1234 +## ^------ operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x03 +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +## ^- DW_LLE_end_of_list +# DWARF64-LE-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: [[ENDIAN]] + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Format: DWARF64 + Lists: + - Entries: + - Operator: DW_LLE_startx_endx + Values: [ 0x1234, 0x4321 ] + Descriptions: + - Operator: DW_OP_consts + Values: [ 0x1234 ] + - Entries: + - Operator: DW_LLE_end_of_list + +## d) Generate and verify a big endian DWARF64 .debug_loclists section in a 64-bit object file. + +# RUN: yaml2obj --docnum=2 -DENDIAN=ELFDATA2MSB %s -o %t2.dwarf64.be.o +# RUN: llvm-readobj --sections --section-data %t2.dwarf64.be.o | \ +# RUN: FileCheck -DSIZE=47 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF64-BE + +# DWARF64-BE-NEXT: SectionData ( +# DWARF64-BE-NEXT: 0000: FFFFFFFF 00000000 00000023 00050800 |...........#....| +## ^------------------------- unit_length (12-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +# DWARF64-BE-NEXT: 0010: 00000002 00000000 00000010 00000000 |................| +## ^------- offset_entry_count (4-byte) +## ^---------------- offsets[0] (8-byte) +## ^------- offsets[1] (8-byte) +# DWARF64-BE-NEXT: 0020: 0000001A 02B424A1 86010311 B42400 |......$......$.| +## -------- +## ^- DW_LLE_startx_endx +## ^--- operands[0] (ULEB128) 0x1234 +## ^------ operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x03 +## ^- DW_OP_consts +## ^--- operands[0] (SLEB128) +0x1234 +## ^- DW_LLE_end_of_list +# DWARF64-BE-NEXT: ) + +## e) Test that the length, version, segment_selector_size, address_size, offset_entry_count, +## offsets and location descriptions length fields can be specified manually. + +# RUN: yaml2obj --docnum=3 %s -o %t3.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t3.o | \ +# RUN: FileCheck %s --check-prefix=OVERWRITE + +# OVERWRITE: Hex dump of section '.debug_loclists': +# OVERWRITE-NEXT: 0x00000000 34120000 06000303 04000000 01000000 4............... +## ^------- unit_length (4-byte) 0x1234 +## ^--- version (2-byte) 0x06 +## ^- address_size (1-byte) 0x03 +## ^- segment_selector_size (1-byte) 0x03 +## ^------- offset_entry_count (4-byte) 0x04 +## ^------- offsets[0] (4-byte) 0x01 +# OVERWRITE-NEXT: 0x00000010 02b424a1 8601a186 019f00 ..$........ +## ^- DW_LLE_startx_endx +## ^--- operands[0] (ULEB128) 0x1234 +## ^------ operands[1] (ULEB128) 0x4321 +## ^------ location descriptions length (ULEB128) 0x4321 +## ^- DW_OP_stack_value +## ^- DW_LLE_end_of_list + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Length: 0x1234 + Version: 6 + AddressSize: 3 + SegmentSelectorSize: 3 + OffsetEntryCount: 4 + Offsets: [ 0x01 ] + Lists: + - Entries: + - Operator: DW_LLE_startx_endx + Values: [ 0x1234, 0x4321 ] + DescriptionsLength: 0x4321 + Descriptions: + - Operator: DW_OP_stack_value + - Entries: + - Operator: DW_LLE_end_of_list + +## f) Test that location descriptions can be omitted from the YAML description. + +# RUN: yaml2obj --docnum=4 %s -o %t4.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t4.o | \ +# RUN: FileCheck %s --check-prefix=OMIT-DESCRIPTIONS + +# OMIT-DESCRIPTIONS: Hex dump of section '.debug_loclists': +# OMIT-DESCRIPTIONS-NEXT: 0x00000000 42000000 05000800 01000000 04000000 B............... +# OMIT-DESCRIPTIONS-NEXT: 0x00000010 02b424a1 86010003 b424a186 010004b4 ..$......$...... +## ^- DW_LLE_startx_endx +## ^--- operands[0] (ULEB128) 0x1234 +## ^------ operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x00 +## ^- DW_LLE_startx_length +## ^--- operands[0] (ULEB128) 0x1234 +## ^------ operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x00 +## ^- DW_LLE_offset_pair +## ^- operands[0] (ULEB128) 0x1234 +# OMIT-DESCRIPTIONS-NEXT: 0x00000020 24a18601 00050007 34120000 00000000 $.......4....... +## -- +## ^----- operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x00 +## ^- DW_LLE_default_location +## ^- location descriptions length (ULEB128) 0x00 +## ^- DW_LLE_start_end +## ^---------------- operands[0] (8-byte) +# OMIT-DESCRIPTIONS-NEXT: 0x00000030 21430000 00000000 00083412 00000000 !C........4..... +## ^---------------- operands[1] (8-byte) +## ^- location descriptions length (ULEB128) 0x00 +## ^- DW_LLE_start_length +## ^------------ operands[0] (8-byte) +# OMIT-DESCRIPTIONS-NEXT: 0x00000040 0000a186 0100 ...... +## ---- +## ^------ operands[1] (ULEB128) 0x4321 +## ^- location descriptions length (ULEB128) 0x00 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Lists: + - Entries: + - Operator: DW_LLE_startx_endx + Values: [ 0x1234, 0x4321 ] + - Operator: DW_LLE_startx_length + Values: [ 0x1234, 0x4321 ] + - Operator: DW_LLE_offset_pair + Values: [ 0x1234, 0x4321 ] + - Operator: DW_LLE_default_location + - Operator: DW_LLE_start_end + Values: [ 0x1234, 0x4321 ] + - Operator: DW_LLE_start_length + Values: [ 0x1234, 0x4321 ] + +## g) Test that the default value of the address_size field in a 32-bit object file is 4. + +# RUN: yaml2obj --docnum=5 %s -o %t5.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t5.o | \ +# RUN: FileCheck %s --check-prefix=ADDRSIZE32 + +# ADDRSIZE32: Hex dump of section '.debug_loclists': +# ADDRSIZE32-NEXT: 0x00000000 24000000 05000400 01000000 04000000 $............... +## ^- address_size (1-byte) 0x04 +# ADDRSIZE32-NEXT: 0x00000010 06341200 00073412 00002143 00000008 .4....4...!C.... +## ^- DW_LLE_base_address +## ^-------- operands[0] (4-byte) +## ^- DW_LLE_start_end +## ^-------- operands[0] (4-byte) +## ^-------- operands[1] (4-byte) +## ^- counted location description +## ^- DW_LLE_start_length +# ADDRSIZE32-NEXT: 0x00000020 34120000 a1860100 4....... +## ^------- operands[0] (4-byte) +## ^----- operands[1] (ULEB128) 0x4321 +## ^- counted location description + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Lists: + - Entries: + - Operator: DW_LLE_base_address + Values: [ 0x1234 ] + - Operator: DW_LLE_start_end + Values: [ 0x1234, 0x4321 ] + - Operator: DW_LLE_start_length + Values: [ 0x1234, 0x4321 ] + +## h) Test that the address_size field can be specified manually and the size of +## corresponding operands will be changed accordingly. + +# RUN: yaml2obj --docnum=6 %s -o %t6.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t6.o | \ +# RUN: FileCheck %s --check-prefix=ADDRSIZE32 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - AddressSize: 4 + Lists: + - Entries: + - Operator: DW_LLE_base_address + Values: [ 0x1234 ] + - Operator: DW_LLE_start_end + Values: [ 0x1234, 0x4321 ] + - Operator: DW_LLE_start_length + Values: [ 0x1234, 0x4321 ] + +## i) Test that yaml2obj emits an error message if we try to assign an invalid value to +## 'AddressSize' when there is an entry whose operands contain address. + +# RUN: not yaml2obj -DOPERATOR=base_address -DVALUES=[0x01] --docnum=7 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=base_address %s --check-prefix=INVALID-ADDRSIZE + +# RUN: not yaml2obj -DOPERATOR=start_end -DVALUES=[0x01,0x02] --docnum=7 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=start_end %s --check-prefix=INVALID-ADDRSIZE + +# RUN: not yaml2obj -DOPERATOR=start_length -DVALUES=[0x01,0x02] --docnum=7 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=start_length %s --check-prefix=INVALID-ADDRSIZE + +# INVALID-ADDRSIZE: yaml2obj: error: unable to write address for the operator DW_LLE_[[OPERATOR]]: invalid integer write size: 3 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - AddressSize: 3 + Lists: + - Entries: + - Operator: DW_LLE_[[OPERATOR]] + Values: [[VALUES]] + +## j) Test that yaml2obj emits an error message if we specify invalid numbers of operands +## for a location list encoding. + +# RUN: not yaml2obj -DOPERATOR=end_of_list -DVALUES=[0x01] --docnum=8 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=end_of_list -DACTUAL=1 -DEXPECTED=0 %s --check-prefix=INVALID-LLE-OPERANDS + +# RUN: not yaml2obj -DOPERATOR=base_addressx -DVALUES=[] --docnum=8 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=base_addressx -DACTUAL=0 -DEXPECTED=1 %s --check-prefix=INVALID-LLE-OPERANDS + +# RUN: not yaml2obj -DOPERATOR=startx_endx -DVALUES=[0x01] --docnum=8 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=startx_endx -DACTUAL=1 -DEXPECTED=2 %s --check-prefix=INVALID-LLE-OPERANDS + +# RUN: not yaml2obj -DOPERATOR=startx_length -DVALUES=[0x01] --docnum=8 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=startx_length -DACTUAL=1 -DEXPECTED=2 %s --check-prefix=INVALID-LLE-OPERANDS + +# RUN: not yaml2obj -DOPERATOR=offset_pair -DVALUES=[] --docnum=8 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=offset_pair -DACTUAL=0 -DEXPECTED=2 %s --check-prefix=INVALID-LLE-OPERANDS + +# RUN: not yaml2obj -DOPERATOR=default_location -DVALUES=[0x01] --docnum=8 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=default_location -DACTUAL=1 -DEXPECTED=0 %s --check-prefix=INVALID-LLE-OPERANDS + +# RUN: not yaml2obj -DOPERATOR=base_address -DVALUES=[0x01,0x02] --docnum=8 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=base_address -DACTUAL=2 -DEXPECTED=1 %s --check-prefix=INVALID-LLE-OPERANDS + +# RUN: not yaml2obj -DOPERATOR=start_end -DVALUES=[0x01,0x02,0x03] --docnum=8 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=start_end -DACTUAL=3 -DEXPECTED=2 %s --check-prefix=INVALID-LLE-OPERANDS + +# RUN: not yaml2obj -DOPERATOR=start_length -DVALUES=[0x01] --docnum=8 %s 2>&1 | \ +# RUN: FileCheck -DOPERATOR=start_length -DACTUAL=1 -DEXPECTED=2 %s --check-prefix=INVALID-LLE-OPERANDS + +# INVALID-LLE-OPERANDS: yaml2obj: error: invalid number ([[ACTUAL]]) of operands for the operator: DW_LLE_[[OPERATOR]], [[EXPECTED]] expected + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Lists: + - Entries: + - Operator: DW_LLE_[[OPERATOR]] + Values: [[VALUES]] + +## k) Test that yaml2obj emits an error message if we specify invalid numbers of operands +## for a DWARF expression operator. + +# RUN: not yaml2obj --docnum=9 -DOPERATOR=consts -DVALUES=[0x01,0x02] %s 2>&1 | \ +# RUN: FileCheck -DACTUAL=2 -DEXPECTED=1 -DOPERATOR=consts %s --check-prefix=INVALID-OP-OPERANDS + +# RUN: not yaml2obj --docnum=9 -DOPERATOR=stack_value -DVALUES=[0x01] %s 2>&1 | \ +# RUN: FileCheck -DACTUAL=1 -DEXPECTED=0 -DOPERATOR=stack_value %s --check-prefix=INVALID-OP-OPERANDS + +# INVALID-OP-OPERANDS: yaml2obj: error: invalid number ([[ACTUAL]]) of operands for the operator: DW_OP_[[OPERATOR]], [[EXPECTED]] expected + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Lists: + - Entries: + - Operator: DW_LLE_startx_endx + Values: [ 0x01, 0x02 ] + Descriptions: + - Operator: DW_OP_[[OPERATOR]] + Values: [[VALUES]] + +## l) Test that an empty list is allowed for a location list table. + +# RUN: yaml2obj --docnum=10 %s -o %t10.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t10.o | \ +# RUN: FileCheck %s --check-prefix=EMPTY-LIST + +# EMPTY-LIST: Hex dump of section '.debug_loclists': +# EMPTY-LIST-NEXT: 0x00000000 08000000 05000800 00000000 ............ +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +## ^------- offset_entry_count (4-byte) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Lists: [] + +## m) Generate the .debug_loclists section from raw section content. + +# RUN: yaml2obj --docnum=11 %s -o %t11.o +# RUN: llvm-readobj --sections --section-data %t11.o | \ +# RUN: FileCheck %s -DSIZE=3 -DADDRALIGN=0 --check-prefixes=SHDR,ARBITRARY-CONTENT + +# ARBITRARY-CONTENT: SectionData ( +# ARBITRARY-CONTENT-NEXT: 0000: 112233 +# ARBITRARY-CONTENT-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_loclists + Type: SHT_PROGBITS + Content: "112233" + +## n) Generate the .debug_loclists section when the "Size" is specified. + +# RUN: yaml2obj --docnum=12 %s -o %t12.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t12.o | \ +# RUN: FileCheck %s --check-prefix=SIZE + +# SIZE: Hex dump of section '.debug_loclists': +# SIZE-NEXT: 0x00000000 00000000 00000000 00000000 00000000 ................ +# SIZE-EMPTY: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_loclists + Type: SHT_PROGBITS + Size: 0x10 + +## o) Test that yaml2obj emits an error message when both the "Size" and the +## "debug_loclists" entry are specified at the same time. + +# RUN: not yaml2obj --docnum=13 %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR: yaml2obj: error: cannot specify section '.debug_loclists' contents in the 'DWARF' entry and the 'Content' or 'Size' in the 'Sections' entry at the same time + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_loclists + Type: SHT_PROGBITS + Size: 0x10 +DWARF: + debug_loclists: + - Lists: [] + +## p) Test that yaml2obj emits an error message when both the "Content" and the +## "debug_loclists" entry are specified at the same time. + +# RUN: not yaml2obj --docnum=14 %s 2>&1 | FileCheck %s --check-prefix=ERROR + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_loclists + Type: SHT_PROGBITS + Content: "00" +DWARF: + debug_loclists: + - Lists: [] + +## q) Test that all the properties can be overridden by the section header when +## the "debug_loclists" entry doesn't exist. + +# RUN: yaml2obj --docnum=15 %s -o %t15.o +# RUN: llvm-readelf --sections %t15.o | FileCheck %s --check-prefix=OVERRIDDEN + +# OVERRIDDEN: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# OVERRIDDEN: [ 1] .debug_loclists STRTAB 0000000000002020 000050 00000c 01 A 2 1 2 +# OVERRIDDEN-NEXT: [ 2] .sec STRTAB 0000000000000000 00005c 000000 00 0 0 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_loclists + Type: SHT_STRTAB ## SHT_PROGBITS by default. + Flags: [SHF_ALLOC] ## 0 by default. + Link: .sec ## 0 by default. + EntSize: 1 ## 0 by default. + Info: 1 ## 0 by default. + AddressAlign: 2 ## 0 by default. + Address: 0x2020 ## 0x00 by default. + Offset: 0x50 ## 0x40 for the first section. + Size: 0x0c ## Set the "Size" so that we can reuse the check tag "OVERRIDDEN". + - Name: .sec ## Linked by .debug_loclists. + Type: SHT_STRTAB + +## r) Test that all the properties can be overridden by the section header when +## the "debug_loclists" entry exists. + +# RUN: yaml2obj --docnum=16 %s -o %t16.o +# RUN: llvm-readelf --sections %t16.o | FileCheck %s --check-prefix=OVERRIDDEN + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_loclists + Type: SHT_STRTAB ## SHT_PROGBITS by default. + Flags: [SHF_ALLOC] ## 0 by default. + Link: .sec ## 0 by default. + EntSize: 1 ## 0 by default. + Info: 1 ## 0 by default. + AddressAlign: 2 ## 1 by default. + Address: 0x2020 ## 0x00 by default. + Offset: 0x50 ## 0x40 for the first section. + - Name: .sec ## Linked by .debug_loclists. + Type: SHT_STRTAB +DWARF: + debug_loclists: + - Lists: [] + +## s) Test that the .debug_loclists section header is emitted if the "debug_loclists" +## entry is empty. + +# RUN: yaml2obj --docnum=17 %s -o %t17.o +# RUN: llvm-readobj --sections --section-data %t17.o | \ +# RUN: FileCheck -DSIZE=0 -DADDRALIGN=1 %s --check-prefixes=SHDR,EMPTY-CONTENT + +# EMPTY-CONTENT-NEXT: SectionData ( +# EMPTY-CONTENT-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: [] + +## t) Test that yaml2obj emits an error message if we use an unimplemented DWARF expression +## operator. + +# RUN: not yaml2obj --docnum=18 -DOP=0x01 %s 2>&1 | \ +# RUN: FileCheck -DOP=0x1 %s --check-prefix=UNSUPPORTED-OP + +# UNSUPPORTED-OP: yaml2obj: error: DWARF expression: [[OP]] is not supported + +# RUN: not yaml2obj --docnum=18 -DOP=DW_OP_entry_value %s 2>&1 | \ +# RUN: FileCheck -DOP=DW_OP_entry_value %s --check-prefix=UNSUPPORTED-OP + + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Lists: + - Entries: + - Operator: DW_LLE_default_location + Descriptions: + - Operator: [[OP]] + +## u) Test that we are able to generate a location list via raw binary data. + +# RUN: yaml2obj --docnum=19 %s -o %t19.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t19.o | \ +# RUN: FileCheck %s --check-prefix=CUSTOM-LIST + +# CUSTOM-LIST: Hex dump of section '.debug_loclists': +# CUSTOM-LIST-NEXT: 0x00000000 2a000000 05000800 03000000 0c000000 *............... +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +## ^------- offset_entry_count (4-byte) +## ^------- offsets[0] (4-byte) +# CUSTOM-LIST-NEXT: 0x00000010 12000000 1a000000 02b424b4 24001234 ..........$.$..4 +## ^------- offsets[1] (4-byte) +## ^------- offsets[2] (4-byte) +## ^- DW_LLE_startx_endx +## ^--- operands[0] (ULEB128) 0x1234 +## ^---- operands[1] (ULEB128) 0x1234 +## ^- location descriptions length (ULEB128) 0x00 +## ^--- custom list content +# CUSTOM-LIST-NEXT: 0x00000020 567890ab cdefabcd ef123456 7890 Vx........4Vx. +## ------------- +## ^----------------- custom list content + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Lists: + - Entries: + - Operator: DW_LLE_startx_endx + Values: [ 0x1234, 0x1234 ] + - Content: '1234567890abcdef' + - Content: 'abcdef1234567890' + +## v) Test that yaml2obj emits an error message when 'Content' and 'Entries' are specified +## at the same time. + +# RUN: not yaml2obj --docnum=20 %s 2>&1 | FileCheck %s --check-prefix=ERR + +# ERR: YAML:{{.*}}: error: Entries and Content can't be used together +# ERR-NEXT: - Entries: [] +# ERR-NEXT: ^ + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - Lists: + - Entries: [] + Content: '' + +## w) Test that when the "OffsetEntryCount" is specified to be 0 and "Offsets" is not specified, +## the offsets array is not emitted. + +# RUN: yaml2obj --docnum=21 -DOFFSETENTRIES=0 %s -o %t21.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t21.o | \ +# RUN: FileCheck %s --check-prefix=NO-OFFSETS + +# NO-OFFSETS: Hex dump of section '.debug_loclists': +# NO-OFFSETS-NEXT: 0x00000000 0e000000 05000800 00000000 01010201 ................ +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +## ^------- offset_entry_count (4-byte) +## ^- DW_LLE_base_addressx +## ^- operands[0] (ULEB128) 0x01 +## ^- DW_LLE_startx_endx +## ^- operands[0] (ULEB128) 0x01 +# NO-OFFSETS-NEXT: 0x00000010 0200 .. +## ^- operands[1] (ULEB128) 0x02 +## ^- location descriptions length (ULEB128) 0x00 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_loclists: + - OffsetEntryCount: [[OFFSETENTRIES=]] + Offsets: [[OFFSETS=]] + Lists: + - Entries: + - Operator: DW_LLE_base_addressx + Values: [ 0x01 ] + - Operator: DW_LLE_startx_endx + Values: [ 0x01, 0x02 ] + +## x) Test that when the "Offsets" entry is specified to be empty and the "OffsetEntryCount" is not specified, +## the offsets array will be omitted. + +# RUN: yaml2obj --docnum=21 -DOFFSETS=[] %s -o %t22.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t22.o | \ +# RUN: FileCheck %s --check-prefix=NO-OFFSETS + +## y) Test that if "Offsets" is specified, the offsets array will be emitted accordingly, even when +## the "OffsetEntryCount" is specified to be 0. + +# RUN: yaml2obj --docnum=21 -DOFFSETENTRIES=0 -DOFFSETS=[0x01,0x02,0x03] %s -o %t23.o +# RUN: llvm-readelf --hex-dump=.debug_loclists %t23.o | \ +# RUN: FileCheck %s --check-prefix=OFFSETS + +# OFFSETS: Hex dump of section '.debug_loclists': +# OFFSETS-NEXT: 0x00000000 0e000000 05000800 00000000 01000000 ................ +## ^------- unit_length (4-byte) +## ^--- version (2-byte) +## ^- address_size (1-byte) +## ^- segment_selector_size (1-byte) +## ^------- offset_entry_count (4-byte) +## ^------- offsets[0] (4-byte) +# OFFSETS-NEXT: 0x00000010 02000000 03000000 01010201 0200 .............. +## ^------- offsets[1] (4-byte) +## ^------- offsets[2] (4-byte) +## ^- DW_LLE_base_addressx +## ^- operands[0] (ULEB128) 0x01 +## ^- DW_LLE_startx_endx +## ^- operands[0] (ULEB128) 0x01 +## ^- operands[1] (ULEB128) 0x02 +## ^- location descriptions length (ULEB128) 0x00 From 10851f9db5f7d163135374b8dfc945e1b4a9c7d6 Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Wed, 22 Jul 2020 13:36:13 +0300 Subject: [PATCH 196/600] [analyzer][tests] Fix SATest update functionality Summary: Not all projects in the project map file might have newer results for updating, we should handle this situation gracefully. Additionally, not every user of the test system would want storing reference results in git. For this reason, git functionality is now optional. Differential Revision: https://reviews.llvm.org/D84303 --- clang/utils/analyzer/SATest.py | 5 ++-- clang/utils/analyzer/SATestUpdateDiffs.py | 33 +++++++++-------------- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/clang/utils/analyzer/SATest.py b/clang/utils/analyzer/SATest.py index 46e636ad2895e..86571902502f9 100755 --- a/clang/utils/analyzer/SATest.py +++ b/clang/utils/analyzer/SATest.py @@ -78,7 +78,7 @@ def update(parser, args): project_map = ProjectMap() for project in project_map.projects: - SATestUpdateDiffs.update_reference_results(project) + SATestUpdateDiffs.update_reference_results(project, args.git) def benchmark(parser, args): @@ -277,7 +277,8 @@ def main(): "update", help="Update static analyzer reference results based on the previous " "run of SATest build. Assumes that SATest build was just run.") - # TODO: add option to decide whether we should use git + upd_parser.add_argument("--git", action="store_true", + help="Stage updated results using git.") upd_parser.set_defaults(func=update) # docker subcommand diff --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py index 920fa15e4c6f5..69b3383beaf17 100644 --- a/clang/utils/analyzer/SATestUpdateDiffs.py +++ b/clang/utils/analyzer/SATestUpdateDiffs.py @@ -15,7 +15,7 @@ Verbose = 0 -def update_reference_results(project: ProjectInfo): +def update_reference_results(project: ProjectInfo, git: bool = False): test_info = SATestBuild.TestInfo(project) tester = SATestBuild.ProjectTester(test_info) project_dir = tester.get_project_dir() @@ -27,9 +27,10 @@ def update_reference_results(project: ProjectInfo): created_results_path = tester.get_output_dir() if not os.path.exists(created_results_path): - print("New results not found, was SATestBuild.py previously run?", + print(f"Skipping project '{project.name}', " + f"it doesn't have newer results.", file=sys.stderr) - sys.exit(1) + return build_log_path = SATestBuild.get_build_log_path(ref_results_path) build_log_dir = os.path.dirname(os.path.abspath(build_log_path)) @@ -45,7 +46,8 @@ def run_cmd(command: str): # Remove reference results: in git, and then again for a good measure # with rm, as git might not remove things fully if there are empty # directories involved. - run_cmd(f"git rm -r -q '{ref_results_path}'") + if git: + run_cmd(f"git rm -r -q '{ref_results_path}'") shutil.rmtree(ref_results_path) # Replace reference results with a freshly computed once. @@ -60,22 +62,11 @@ def run_cmd(command: str): # Clean up the generated difference results. SATestBuild.cleanup_reference_results(ref_results_path) - run_cmd(f"git add '{ref_results_path}'") + if git: + run_cmd(f"git add '{ref_results_path}'") -# TODO: use argparse -def main(argv): - if len(argv) == 2 and argv[1] in ("-h", "--help"): - print("Update static analyzer reference results based " - "\non the previous run of SATestBuild.py.\n" - "\nN.B.: Assumes that SATestBuild.py was just run", - file=sys.stderr) - sys.exit(1) - - project_map = ProjectMap() - for project in project_map.projects: - update_reference_results(project) - - -if __name__ == '__main__': - main(sys.argv) +if __name__ == "__main__": + print("SATestUpdateDiffs.py should not be used on its own.") + print("Please use 'SATest.py update' instead") + sys.exit(1) From 31c7a2fd5c9a5b980768bd06354517d99ac8fe87 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Mon, 3 Aug 2020 10:19:33 -0500 Subject: [PATCH 197/600] [FPEnv] Don't transform FSUB(-0,X)->FNEG(X) in SelectionDAGBuilder. This patch stops unconditionally transforming FSUB(-0,X) into an FNEG(X) while building the DAG. There is also one small change to handle the new FSUB(-0,X) similarly to FNEG(X) in the AMDGPU backend. Differential Revision: https://reviews.llvm.org/D84056 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 14 -- .../SelectionDAG/SelectionDAGBuilder.h | 2 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 11 +- .../AMDGPU/fcanonicalize-elimination.ll | 4 +- llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 12 +- llvm/test/CodeGen/AMDGPU/fcanonicalize.ll | 8 +- .../CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll | 2 +- llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll | 10 +- llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll | 10 +- llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 228 +++++++++--------- llvm/test/CodeGen/AMDGPU/selectcc-opt.ll | 4 +- llvm/test/CodeGen/AMDGPU/set-dx10.ll | 12 +- 12 files changed, 155 insertions(+), 162 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c2a284af592d7..9c1517ea74140 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3005,20 +3005,6 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } -void SelectionDAGBuilder::visitFSub(const User &I) { - // -0.0 - X --> fneg - Type *Ty = I.getType(); - if (isa(I.getOperand(0)) && - I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), - Op2.getValueType(), Op2)); - return; - } - - visitBinary(I, ISD::FSUB); -} - void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { SDNodeFlags Flags; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f0b7fb0d52299..7bad055198140 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -692,7 +692,7 @@ class SelectionDAGBuilder { void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } void visitSub(const User &I) { visitBinary(I, ISD::SUB); } - void visitFSub(const User &I); + void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); } void visitMul(const User &I) { visitBinary(I, ISD::MUL); } void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } void visitURem(const User &I) { visitBinary(I, ISD::UREM); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 3ffd9b79864a2..1f5d83d379495 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3795,8 +3795,15 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags()); if (Res.getOpcode() != AMDGPUISD::FMED3) return SDValue(); // Op got folded away. - if (!N0.hasOneUse()) - DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); + + if (!N0.hasOneUse()) { + SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res); + DAG.ReplaceAllUsesWith(N0, Neg); + + for (SDNode *U : Neg->uses()) + DCI.AddToWorklist(U); + } + return Res; } case ISD::FP_EXTEND: diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll index f7f075b095317..0ba4d8d82bdf8 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -311,7 +311,7 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fneg_value_f32(float addrsp %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id %load = load float, float addrspace(1)* %gep, align 4 - %v = fsub float -0.0, %load + %v = fneg float %load %canonicalized = tail call float @llvm.canonicalize.f32(float %v) store float %canonicalized, float addrspace(1)* %gep, align 4 ret void @@ -327,7 +327,7 @@ define amdgpu_kernel void @test_fold_canonicalize_fneg_value_f32(float addrspace %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id %load = load float, float addrspace(1)* %gep, align 4 %v0 = fadd float %load, 0.0 - %v = fsub float -0.0, %v0 + %v = fneg float %v0 %canonicalized = tail call float @llvm.canonicalize.f32(float %v) store float %canonicalized, float addrspace(1)* %gep, align 4 ret void diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index 7a44d11ad091d..c6af4baee6509 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -77,7 +77,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f16(half addrspace(1)* % define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 { %val = load half, half addrspace(1)* %out %val.fabs = call half @llvm.fabs.f16(half %val) - %val.fabs.fneg = fsub half -0.0, %val.fabs + %val.fabs.fneg = fneg half %val.fabs %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg) store half %canonicalized, half addrspace(1)* %out ret void @@ -91,7 +91,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace( ; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}} define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 { %val = load half, half addrspace(1)* %out - %val.fneg = fsub half -0.0, %val + %val.fneg = fneg half %val %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg) store half %canonicalized, half addrspace(1)* %out ret void @@ -103,7 +103,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* % ; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]] define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 { %val = load half, half addrspace(1)* %out - %val.fneg = fsub half -0.0, %val + %val.fneg = fneg half %val %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg) store half %canonicalized, half addrspace(1)* %out ret void @@ -120,7 +120,7 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half ad define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #2 { %val = load half, half addrspace(1)* %out %val.fabs = call half @llvm.fabs.f16(half %val) - %val.fabs.fneg = fsub half -0.0, %val.fabs + %val.fabs.fneg = fneg half %val.fabs %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg) store half %canonicalized, half addrspace(1)* %out ret void @@ -323,7 +323,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid %val = load <2 x half>, <2 x half> addrspace(1)* %gep %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) - %val.fabs.fneg = fsub <2 x half> , %val.fabs + %val.fabs.fneg = fneg <2 x half> %val.fabs %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg) store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out ret void @@ -340,7 +340,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid %val = load <2 x half>, <2 x half> addrspace(1)* %gep - %fneg.val = fsub <2 x half> , %val + %fneg.val = fneg <2 x half> %val %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val) store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll index e302d59b1ac74..d95194a8716df 100644 --- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll +++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll @@ -57,7 +57,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f32(float addrspace(1)* define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 { %val = load float, float addrspace(1)* %out %val.fabs = call float @llvm.fabs.f32(float %val) - %val.fabs.fneg = fsub float -0.0, %val.fabs + %val.fabs.fneg = fneg float %val.fabs %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg) store float %canonicalized, float addrspace(1)* %out ret void @@ -69,7 +69,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REG]] define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 { %val = load float, float addrspace(1)* %out - %val.fneg = fsub float -0.0, %val + %val.fneg = fneg float %val %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg) store float %canonicalized, float addrspace(1)* %out ret void @@ -264,7 +264,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_f64(double addrspace(1)* define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 { %val = load double, double addrspace(1)* %out %val.fabs = call double @llvm.fabs.f64(double %val) - %val.fabs.fneg = fsub double -0.0, %val.fabs + %val.fabs.fneg = fneg double %val.fabs %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg) store double %canonicalized, double addrspace(1)* %out ret void @@ -275,7 +275,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspac ; GCN: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[REG]] define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 { %val = load double, double addrspace(1)* %out - %val.fneg = fsub double -0.0, %val + %val.fneg = fneg double %val %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg) store double %canonicalized, double addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll index 197bdd77d2544..d62155c1220dc 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll @@ -311,7 +311,7 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) { ; GCN: global_store_dwordx4 define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) { %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 - %neg = fsub <4 x float> , %load + %neg = fneg <4 x float> %load %div = fdiv <4 x float> , %neg, !fpmath !0 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 ret void diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll index cd19e237be709..293f59275372d 100644 --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll @@ -256,7 +256,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f16(half addrspace(1)* %out, %r1 = load volatile half, half addrspace(1)* %gep.0 %r2 = load volatile half, half addrspace(1)* %gep.1 - %r1.fneg = fsub half -0.000000e+00, %r1 + %r1.fneg = fneg half %r1 %r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1.fneg, half %r2) store half %r3, half addrspace(1)* %gep.out @@ -287,7 +287,7 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f16(half addrspace(1)* %out, half %r1 = load volatile half, half addrspace(1)* %gep.0 %r2 = load volatile half, half addrspace(1)* %gep.1 - %r1.fneg = fsub half -0.000000e+00, %r1 + %r1.fneg = fneg half %r1 %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1.fneg, half %r2) store half %r3, half addrspace(1)* %gep.out @@ -312,7 +312,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f16(half addrspace(1)* %out, half %r1 = load volatile half, half addrspace(1)* %gep.0 %r2 = load volatile half, half addrspace(1)* %gep.1 - %r2.fneg = fsub half -0.000000e+00, %r2 + %r2.fneg = fneg half %r2 %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1, half %r2.fneg) store half %r3, half addrspace(1)* %gep.out @@ -494,8 +494,8 @@ define amdgpu_kernel void @neg_neg_mad_f16(half addrspace(1)* noalias nocapture %a = load volatile half, half addrspace(1)* %gep0, align 2 %b = load volatile half, half addrspace(1)* %gep1, align 2 %c = load volatile half, half addrspace(1)* %gep2, align 2 - %nega = fsub half -0.000000e+00, %a - %negb = fsub half -0.000000e+00, %b + %nega = fneg half %a + %negb = fneg half %b %mul = fmul half %nega, %negb %sub = fadd half %mul, %c store half %sub, half addrspace(1)* %outgep, align 2 diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll index 19862fbf13fe6..b97d36c046aff 100644 --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll @@ -274,7 +274,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %r1.fneg = fsub float -0.000000e+00, %r1 + %r1.fneg = fneg float %r1 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2) store float %r3, float addrspace(1)* %gep.out @@ -307,7 +307,7 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, flo %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %r1.fneg = fsub float -0.000000e+00, %r1 + %r1.fneg = fneg float %r1 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2) store float %r3, float addrspace(1)* %gep.out @@ -339,7 +339,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, flo %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %r2.fneg = fsub float -0.000000e+00, %r2 + %r2.fneg = fneg float %r2 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg) store float %r3, float addrspace(1)* %gep.out @@ -517,8 +517,8 @@ define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %a = load volatile float, float addrspace(1)* %gep0, align 4 %b = load volatile float, float addrspace(1)* %gep1, align 4 %c = load volatile float, float addrspace(1)* %gep2, align 4 - %nega = fsub float -0.000000e+00, %a - %negb = fsub float -0.000000e+00, %b + %nega = fneg float %a + %negb = fneg float %b %mul = fmul float %nega, %negb %sub = fadd float %mul, %c store float %sub, float addrspace(1)* %outgep, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index 54ccc8fd870d4..01b0e6d17d1ac 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -26,7 +26,7 @@ define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrsp %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %add = fadd float %a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store float %fneg, float addrspace(1)* %out.gep ret void } @@ -47,7 +47,7 @@ define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %add = fadd float %a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out store volatile float %add, float addrspace(1)* %out ret void @@ -75,7 +75,7 @@ define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %add = fadd float %a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add %use1 = fmul float %add, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -100,9 +100,9 @@ define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %add = fadd float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out ret void } @@ -124,9 +124,9 @@ define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.b = fsub float -0.000000e+00, %b + %fneg.b = fneg float %b %add = fadd float %a, %fneg.b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out ret void } @@ -148,10 +148,10 @@ define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, fl %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.b = fsub float -0.000000e+00, %b + %fneg.a = fneg float %a + %fneg.b = fneg float %b %add = fadd float %fneg.a, %fneg.b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out ret void } @@ -177,9 +177,9 @@ define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* % %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %add = fadd float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add store volatile float %fneg, float addrspace(1)* %out store volatile float %fneg.a, float addrspace(1)* %out ret void @@ -205,9 +205,9 @@ define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* % %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %add = fadd float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %add + %fneg = fneg float %add %use1 = fmul float %fneg.a, %c store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -226,7 +226,7 @@ define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8 %.i188 = fadd float %tmp9, 0.000000e+00 %tmp10 = fcmp uge float %.i188, %tmp2 - %tmp11 = fsub float -0.000000e+00, %.i188 + %tmp11 = fneg float %.i188 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11 %tmp12 = fcmp ule float %.i092, 0.000000e+00 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000 @@ -249,7 +249,7 @@ define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8 %.i188 = fadd float %tmp9, 0.000000e+00 %tmp10 = fcmp uge float %.i188, %tmp2 - %tmp11 = fsub float -0.000000e+00, %.i188 + %tmp11 = fneg float %.i188 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11 %tmp12 = fcmp ule float %.i092, 0.000000e+00 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000 @@ -274,7 +274,7 @@ define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrsp %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store float %fneg, float addrspace(1)* %out.gep ret void } @@ -295,7 +295,7 @@ define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out store volatile float %mul, float addrspace(1)* %out ret void @@ -318,7 +318,7 @@ define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul %use1 = fmul float %mul, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -338,9 +338,9 @@ define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = fmul float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -358,9 +358,9 @@ define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.b = fsub float -0.000000e+00, %b + %fneg.b = fneg float %b %mul = fmul float %a, %fneg.b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -378,10 +378,10 @@ define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, fl %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.b = fsub float -0.000000e+00, %b + %fneg.a = fneg float %a + %fneg.b = fneg float %b %mul = fmul float %fneg.a, %fneg.b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -402,9 +402,9 @@ define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* % %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = fmul float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out store volatile float %fneg.a, float addrspace(1)* %out ret void @@ -425,9 +425,9 @@ define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* % %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = fmul float %fneg.a, %b - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul %use1 = fmul float %fneg.a, %c store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -454,7 +454,7 @@ define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, floa %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %min = call float @llvm.minnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -466,7 +466,7 @@ define amdgpu_kernel void @v_fneg_minnum_f32_ieee(float addrspace(1)* %out, floa ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 { %min = call float @llvm.minnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min ret float %fneg } @@ -509,7 +509,7 @@ define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float 4.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -520,7 +520,7 @@ define amdgpu_kernel void @v_fneg_posk_minnum_f32_ieee(float addrspace(1)* %out, ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 { %min = call float @llvm.minnum.f32(float 4.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min ret float %fneg } @@ -536,7 +536,7 @@ define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float -4.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -547,7 +547,7 @@ define amdgpu_kernel void @v_fneg_negk_minnum_f32_ieee(float addrspace(1)* %out, ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 { %min = call float @llvm.minnum.f32(float -4.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min ret float %fneg } @@ -562,7 +562,7 @@ define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float a %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -579,7 +579,7 @@ define amdgpu_kernel void @v_fneg_neg0_minnum_f32_ieee(float addrspace(1)* %out, %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float -0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -602,7 +602,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_f32(float addrspace(1)* %out, fl %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -624,7 +624,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f32(float addrspace(1)* %out %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min store float %fneg, float addrspace(1)* %out.gep ret void } @@ -730,7 +730,7 @@ define amdgpu_kernel void @v_fneg_neg_inv2pi_minnum_f64(double addrspace(1)* %ou ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 { %min = call float @llvm.minnum.f32(float -0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min ret float %fneg } @@ -750,7 +750,7 @@ define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32_ieee(float addrspace %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %min = call float @llvm.minnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %mul = fmul float %fneg, %b store float %mul, float addrspace(1)* %out.gep ret void @@ -779,7 +779,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %mul = fmul float %fneg, %b store float %mul, float addrspace(1)* %out.gep ret void @@ -793,7 +793,7 @@ define amdgpu_kernel void @v_fneg_inv2pi_minnum_foldable_use_f32(float addrspace ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 { %min = call float @llvm.minnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %mul = fmul float %fneg, %b ret float %mul } @@ -816,7 +816,7 @@ define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspa %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %min = call float @llvm.minnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %use1 = fmul float %min, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -831,7 +831,7 @@ define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32_ieee(float addrspa ; GCN-NEXT: ; return define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 { %min = call float @llvm.minnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %min + %fneg = fneg float %min %use1 = fmul float %min, 4.0 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1 @@ -859,7 +859,7 @@ define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, floa %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %max = call float @llvm.maxnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -871,7 +871,7 @@ define amdgpu_kernel void @v_fneg_maxnum_f32_ieee(float addrspace(1)* %out, floa ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 { %max = call float @llvm.maxnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max ret float %fneg } @@ -914,7 +914,7 @@ define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float 4.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -925,7 +925,7 @@ define amdgpu_kernel void @v_fneg_posk_maxnum_f32_ieee(float addrspace(1)* %out, ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 { %max = call float @llvm.maxnum.f32(float 4.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max ret float %fneg } @@ -941,7 +941,7 @@ define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float -4.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -952,7 +952,7 @@ define amdgpu_kernel void @v_fneg_negk_maxnum_f32_ieee(float addrspace(1)* %out, ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 { %max = call float @llvm.maxnum.f32(float -4.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max ret float %fneg } @@ -967,7 +967,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float a %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -984,7 +984,7 @@ define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float -0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max store float %fneg, float addrspace(1)* %out.gep ret void } @@ -995,7 +995,7 @@ define amdgpu_kernel void @v_fneg_neg0_maxnum_f32_ieee(float addrspace(1)* %out, ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 { %max = call float @llvm.maxnum.f32(float -0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max ret float %fneg } @@ -1015,7 +1015,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %max = call float @llvm.maxnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max %mul = fmul float %fneg, %b store float %mul, float addrspace(1)* %out.gep ret void @@ -1029,7 +1029,7 @@ define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32_ieee(float addrspace ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 { %max = call float @llvm.maxnum.f32(float 0.0, float %a) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max %mul = fmul float %fneg, %b ret float %mul } @@ -1052,7 +1052,7 @@ define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspa %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %max = call float @llvm.maxnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max %use1 = fmul float %max, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1067,7 +1067,7 @@ define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float addrspa ; GCN-NEXT: ; return define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 { %max = call float @llvm.maxnum.f32(float %a, float %b) - %fneg = fsub float -0.000000e+00, %max + %fneg = fneg float %max %use1 = fmul float %max, 4.0 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1 @@ -1099,7 +1099,7 @@ define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrsp %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fma.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1123,7 +1123,7 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fma.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out store volatile float %fma, float addrspace(1)* %out ret void @@ -1154,7 +1154,7 @@ define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fma.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma %use1 = fmul float %fma, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1182,9 +1182,9 @@ define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, flo %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1210,9 +1210,9 @@ define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, flo %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.b = fsub float -0.000000e+00, %b + %fneg.b = fneg float %b %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1238,10 +1238,10 @@ define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.b = fsub float -0.000000e+00, %b + %fneg.a = fneg float %a + %fneg.b = fneg float %b %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1267,10 +1267,10 @@ define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.c = fsub float -0.000000e+00, %c + %fneg.a = fneg float %a + %fneg.c = fneg float %c %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1296,9 +1296,9 @@ define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, flo %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.c = fsub float -0.000000e+00, %c + %fneg.c = fneg float %c %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1330,9 +1330,9 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store volatile float %fneg, float addrspace(1)* %out store volatile float %fneg.a, float addrspace(1)* %out ret void @@ -1360,9 +1360,9 @@ define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma %use1 = fmul float %fneg.a, %d store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1394,7 +1394,7 @@ define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrs %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1446,7 +1446,7 @@ define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %o %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) - %fneg = fsub float -0.000000e+00, %fma + %fneg = fneg float %fma %use1 = fmul float %fma, 4.0 store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1483,7 +1483,7 @@ define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpext = fpext float %fneg.a to double %fneg = fsub double -0.000000e+00, %fpext store double %fneg, double addrspace(1)* %out.gep @@ -1502,7 +1502,7 @@ define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double add %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpext = fpext float %fneg.a to double %fneg = fsub double -0.000000e+00, %fpext store volatile double %fneg, double addrspace(1)* %out.gep @@ -1559,7 +1559,7 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addr %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile half, half addrspace(1)* %a.gep %fpext = fpext half %a to float - %fneg = fsub float -0.000000e+00, %fpext + %fneg = fneg float %fpext store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %fpext, float addrspace(1)* %out.gep ret void @@ -1573,7 +1573,7 @@ define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(f %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile half, half addrspace(1)* %a.gep %fpext = fpext half %a to float - %fneg = fsub float -0.000000e+00, %fpext + %fneg = fneg float %fpext %mul = fmul float %fpext, 4.0 store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %mul, float addrspace(1)* %out.gep @@ -1595,7 +1595,7 @@ define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile double, double addrspace(1)* %a.gep %fpround = fptrunc double %a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1612,7 +1612,7 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* % %a = load volatile double, double addrspace(1)* %a.gep %fneg.a = fsub double -0.000000e+00, %a %fpround = fptrunc double %fneg.a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1631,7 +1631,7 @@ define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrs %a = load volatile double, double addrspace(1)* %a.gep %fneg.a = fsub double -0.000000e+00, %a %fpround = fptrunc double %fneg.a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround store volatile float %fneg, float addrspace(1)* %out.gep store volatile double %fneg.a, double addrspace(1)* undef ret void @@ -1652,7 +1652,7 @@ define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrs %a = load volatile double, double addrspace(1)* %a.gep %fneg.a = fsub double -0.000000e+00, %a %fpround = fptrunc double %fneg.a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround %use1 = fmul double %fneg.a, %c store volatile float %fneg, float addrspace(1)* %out.gep store volatile double %use1, double addrspace(1)* undef @@ -1685,7 +1685,7 @@ define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %o %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpround = fptrunc float %fneg.a to half %fneg = fsub half -0.000000e+00, %fpround store half %fneg, half addrspace(1)* %out.gep @@ -1705,7 +1705,7 @@ define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrs %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile double, double addrspace(1)* %a.gep %fpround = fptrunc double %a to float - %fneg = fsub float -0.000000e+00, %fpround + %fneg = fneg float %fpround store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %fpround, float addrspace(1)* %out.gep ret void @@ -1723,7 +1723,7 @@ define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrsp %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpround = fptrunc float %fneg.a to half %fneg = fsub half -0.000000e+00, %fpround store volatile half %fneg, half addrspace(1)* %out.gep @@ -1743,7 +1743,7 @@ define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrsp %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %fpround = fptrunc float %fneg.a to half %fneg = fsub half -0.000000e+00, %fpround %use1 = fmul float %fneg.a, %c @@ -1767,7 +1767,7 @@ define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrsp %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %rcp = call float @llvm.amdgcn.rcp.f32(float %a) - %fneg = fsub float -0.000000e+00, %rcp + %fneg = fneg float %rcp store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1782,9 +1782,9 @@ define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float a %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) - %fneg = fsub float -0.000000e+00, %rcp + %fneg = fneg float %rcp store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1801,9 +1801,9 @@ define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %ou %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) - %fneg = fsub float -0.000000e+00, %rcp + %fneg = fneg float %rcp store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %fneg.a, float addrspace(1)* undef ret void @@ -1821,9 +1821,9 @@ define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %ou %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) - %fneg = fsub float -0.000000e+00, %rcp + %fneg = fneg float %rcp %use1 = fmul float %fneg.a, %c store volatile float %fneg, float addrspace(1)* %out.gep store volatile float %use1, float addrspace(1)* undef @@ -1848,7 +1848,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store float %fneg, float addrspace(1)* %out.gep ret void } @@ -1869,7 +1869,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addr %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out store volatile float %mul, float addrspace(1)* %out ret void @@ -1891,7 +1891,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addr %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0) store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -1911,9 +1911,9 @@ define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1931,9 +1931,9 @@ define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.b = fsub float -0.000000e+00, %b + %fneg.b = fneg float %b %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1951,10 +1951,10 @@ define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* % %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a - %fneg.b = fsub float -0.000000e+00, %b + %fneg.a = fneg float %a + %fneg.b = fneg float %b %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out ret void } @@ -1974,9 +1974,9 @@ define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspac %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul store volatile float %fneg, float addrspace(1)* %out store volatile float %fneg.a, float addrspace(1)* %out ret void @@ -1997,9 +1997,9 @@ define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspac %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep - %fneg.a = fsub float -0.000000e+00, %a + %fneg.a = fneg float %a %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) - %fneg = fsub float -0.000000e+00, %mul + %fneg = fneg float %mul %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c) store volatile float %fneg, float addrspace(1)* %out store volatile float %use1, float addrspace(1)* %out @@ -2023,7 +2023,7 @@ define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrsp %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %sin = call float @llvm.sin.f32(float %a) - %fneg = fsub float -0.000000e+00, %sin + %fneg = fneg float %sin store float %fneg, float addrspace(1)* %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll b/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll index fe7a350a1d9f3..733399dd09096 100644 --- a/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll @@ -11,7 +11,7 @@ define amdgpu_kernel void @test_a(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp olt float %in, 0.000000e+00 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 %4 = bitcast i32 %3 to float %5 = bitcast float %4 to i32 @@ -39,7 +39,7 @@ define amdgpu_kernel void @test_b(i32 addrspace(1)* %out, float %in) { entry: %0 = fcmp olt float %in, 0.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 %4 = bitcast i32 %3 to float %5 = bitcast float %4 to i32 diff --git a/llvm/test/CodeGen/AMDGPU/set-dx10.ll b/llvm/test/CodeGen/AMDGPU/set-dx10.ll index 6867c6394937c..9a317a87540f4 100644 --- a/llvm/test/CodeGen/AMDGPU/set-dx10.ll +++ b/llvm/test/CodeGen/AMDGPU/set-dx10.ll @@ -12,7 +12,7 @@ define amdgpu_kernel void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float entry: %0 = fcmp une float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -38,7 +38,7 @@ define amdgpu_kernel void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float entry: %0 = fcmp oeq float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -64,7 +64,7 @@ define amdgpu_kernel void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float entry: %0 = fcmp ogt float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -90,7 +90,7 @@ define amdgpu_kernel void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float entry: %0 = fcmp oge float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -116,7 +116,7 @@ define amdgpu_kernel void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float entry: %0 = fcmp ole float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void @@ -142,7 +142,7 @@ define amdgpu_kernel void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float entry: %0 = fcmp olt float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 - %2 = fsub float -0.000000e+00, %1 + %2 = fneg float %1 %3 = fptosi float %2 to i32 store i32 %3, i32 addrspace(1)* %out ret void From 7ba82a7320df82d07d3d5679bce89b14526b536c Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Aug 2020 16:16:47 +0000 Subject: [PATCH 198/600] Revert "[mlir][Vector] Add transformation + pattern to split vector.transfer_read into full and partial copies." This reverts commit 35b65be041127db9fe23d3128a004c888893cbae. Build is broken with -DBUILD_SHARED_LIBS=ON with some undefined references like: VectorTransforms.cpp:(.text._ZN4llvm12function_refIFvllEE11callback_fnIZL24createScopedInBoundsCondN4mlir25VectorTransferOpInterfaceEE3$_8EEvlll+0xa5): undefined reference to `mlir::edsc::op::operator+(mlir::Value, mlir::Value)' --- .../mlir/Dialect/Vector/VectorTransforms.h | 64 ----- .../mlir/Interfaces/VectorInterfaces.td | 13 - mlir/lib/Dialect/Vector/VectorTransforms.cpp | 234 ------------------ .../vector-transfer-full-partial-split.mlir | 102 -------- .../lib/Transforms/TestVectorTransforms.cpp | 16 -- 5 files changed, 429 deletions(-) delete mode 100644 mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h index 835ad18a79ad2..0d18c5aa782d1 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h @@ -17,11 +17,6 @@ namespace mlir { class MLIRContext; class OwningRewritePatternList; -class VectorTransferOpInterface; - -namespace scf { -class IfOp; -} // namespace scf /// Collect a set of patterns to convert from the Vector dialect to itself. /// Should be merged with populateVectorToSCFLoweringPattern. @@ -109,65 +104,6 @@ struct UnrollVectorPattern : public OpRewritePattern { FilterConstraintType filter; }; -/// Split a vector.transfer operation into an unmasked fastpath vector.transfer -/// and a slowpath masked vector.transfer. If `ifOp` is not null and the result -/// is `success, the `ifOp` points to the newly created conditional upon -/// function return. To accomodate for the fact that the original -/// vector.transfer indexing may be arbitrary and the slow path indexes @[0...0] -/// in the temporary buffer, the scf.if op returns a view and values of type -/// index. At this time, only vector.transfer_read is implemented. -/// -/// Example (a 2-D vector.transfer_read): -/// ``` -/// %1 = vector.transfer_read %0[...], %pad : memref, vector<...> -/// ``` -/// is transformed into: -/// ``` -/// %1:3 = scf.if (%inBounds) { -/// scf.yield %0 : memref, index, index -/// } else { -/// %2 = vector.transfer_read %0[...], %pad : memref, vector<...> -/// %3 = vector.type_cast %extra_alloc : memref<...> to -/// memref> store %2, %3[] : memref> %4 = -/// memref_cast %extra_alloc: memref to memref scf.yield %4 : -/// memref, index, index -// } -/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} -/// ``` -/// where `extra_alloc` is a top of the function alloca'ed buffer of one vector. -/// -/// Preconditions: -/// 1. `xferOp.permutation_map()` must be a minor identity map -/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` -/// must be equal. This will be relaxed in the future but requires -/// rank-reducing subviews. -LogicalResult -splitFullAndPartialTransferPrecondition(VectorTransferOpInterface xferOp); -LogicalResult splitFullAndPartialTransfer(OpBuilder &b, - VectorTransferOpInterface xferOp, - scf::IfOp *ifOp = nullptr); - -/// Apply `splitFullAndPartialTransfer` selectively via a pattern. This pattern -/// may take an extra filter to perform selection at a finer granularity. -struct VectorTransferFullPartialRewriter : public RewritePattern { - using FilterConstraintType = - std::function; - - explicit VectorTransferFullPartialRewriter( - MLIRContext *context, - FilterConstraintType filter = - [](VectorTransferOpInterface op) { return success(); }, - PatternBenefit benefit = 1) - : RewritePattern(benefit, MatchAnyOpTypeTag()), filter(filter) {} - - /// Performs the rewrite. - LogicalResult matchAndRewrite(Operation *op, - PatternRewriter &rewriter) const override; - -private: - FilterConstraintType filter; -}; - } // namespace vector //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Interfaces/VectorInterfaces.td b/mlir/include/mlir/Interfaces/VectorInterfaces.td index 218715318a867..aefbb7d471172 100644 --- a/mlir/include/mlir/Interfaces/VectorInterfaces.td +++ b/mlir/include/mlir/Interfaces/VectorInterfaces.td @@ -160,19 +160,6 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { /*defaultImplementation=*/ "return $_op.getMemRefType().getRank() - $_op.getTransferRank();" >, - InterfaceMethod< - /*desc=*/[{ Returns true if at least one of the dimensions is masked.}], - /*retTy=*/"bool", - /*methodName=*/"hasMaskedDim", - /*args=*/(ins), - /*methodBody=*/"", - /*defaultImplementation=*/[{ - for (unsigned idx = 0, e = $_op.getTransferRank(); idx < e; ++idx) - if ($_op.isMaskedDim(idx)) - return true; - return false; - }] - >, InterfaceMethod< /*desc=*/[{ Helper function to account for the fact that `permutationMap` results and diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 573b822503f3a..197b1c62274b2 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -12,13 +12,9 @@ #include -#include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/SCF/EDSC/Intrinsics.h" -#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" -#include "mlir/Dialect/Vector/EDSC/Intrinsics.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Dialect/Vector/VectorTransforms.h" #include "mlir/Dialect/Vector/VectorUtils.h" @@ -1989,236 +1985,6 @@ Value ContractionOpLowering::lowerReduction(vector::ContractionOp op, } // namespace mlir -static Optional extractConstantIndex(Value v) { - if (auto cstOp = v.getDefiningOp()) - return cstOp.getValue(); - if (auto affineApplyOp = v.getDefiningOp()) - if (affineApplyOp.getAffineMap().isSingleConstant()) - return affineApplyOp.getAffineMap().getSingleConstantResult(); - return None; -} - -// Missing foldings of scf.if make it necessary to perform poor man's folding -// eagerly, especially in the case of unrolling. In the future, this should go -// away once scf.if folds properly. -static Value createScopedFoldedSLE(Value v, Value ub) { - using namespace edsc::op; - auto maybeCstV = extractConstantIndex(v); - auto maybeCstUb = extractConstantIndex(ub); - if (maybeCstV && maybeCstUb && *maybeCstV < *maybeCstUb) - return Value(); - return sle(v, ub); -} - -// Operates under a scoped context to build the condition to ensure that a -// particular VectorTransferOpInterface is unmasked. -static Value createScopedInBoundsCond(VectorTransferOpInterface xferOp) { - assert(xferOp.permutation_map().isMinorIdentity() && - "Expected minor identity map"); - Value inBoundsCond; - xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) { - // Zip over the resulting vector shape and memref indices. - // If the dimension is known to be unmasked, it does not participate in the - // construction of `inBoundsCond`. - if (!xferOp.isMaskedDim(resultIdx)) - return; - int64_t vectorSize = xferOp.getVectorType().getDimSize(resultIdx); - using namespace edsc::op; - using namespace edsc::intrinsics; - // Fold or create the check that `index + vector_size` <= `memref_size`. - Value sum = xferOp.indices()[indicesIdx] + std_constant_index(vectorSize); - Value cond = - createScopedFoldedSLE(sum, std_dim(xferOp.memref(), indicesIdx)); - if (!cond) - return; - // Conjunction over all dims for which we are in-bounds. - inBoundsCond = inBoundsCond ? inBoundsCond && cond : cond; - }); - return inBoundsCond; -} - -LogicalResult mlir::vector::splitFullAndPartialTransferPrecondition( - VectorTransferOpInterface xferOp) { - // TODO: expand support to these 2 cases. - if (!xferOp.permutation_map().isMinorIdentity()) - return failure(); - // TODO: relax this precondition. This will require rank-reducing subviews. - if (xferOp.getMemRefType().getRank() != xferOp.getTransferRank()) - return failure(); - // Must have some masked dimension to be a candidate for splitting. - if (!xferOp.hasMaskedDim()) - return failure(); - // Don't split transfer operations under IfOp, this avoids applying the - // pattern recursively. - // TODO: improve the condition to make it more applicable. - if (xferOp.getParentOfType()) - return failure(); - return success(); -} - -MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { - if (MemRefCastOp::areCastCompatible(aT, bT)) - return aT; - if (aT.getRank() != bT.getRank()) - return MemRefType(); - int64_t aOffset, bOffset; - SmallVector aStrides, bStrides; - if (failed(getStridesAndOffset(aT, aStrides, aOffset)) || - failed(getStridesAndOffset(bT, bStrides, bOffset)) || - aStrides.size() != bStrides.size()) - return MemRefType(); - - ArrayRef aShape = aT.getShape(), bShape = bT.getShape(); - int64_t resOffset; - SmallVector resShape(aT.getRank(), 0), - resStrides(bT.getRank(), 0); - for (int64_t idx = 0, e = aT.getRank(); idx < e; ++idx) { - resShape[idx] = - (aShape[idx] == bShape[idx]) ? aShape[idx] : MemRefType::kDynamicSize; - resStrides[idx] = (aStrides[idx] == bStrides[idx]) - ? aStrides[idx] - : MemRefType::kDynamicStrideOrOffset; - } - resOffset = - (aOffset == bOffset) ? aOffset : MemRefType::kDynamicStrideOrOffset; - return MemRefType::get( - resShape, aT.getElementType(), - makeStridedLinearLayoutMap(resStrides, resOffset, aT.getContext())); -} - -/// Split a vector.transfer operation into an unmasked fastpath vector.transfer -/// and a slowpath masked vector.transfer. If `ifOp` is not null and the result -/// is `success, the `ifOp` points to the newly created conditional upon -/// function return. To accomodate for the fact that the original -/// vector.transfer indexing may be arbitrary and the slow path indexes @[0...0] -/// in the temporary buffer, the scf.if op returns a view and values of type -/// index. At this time, only vector.transfer_read is implemented. -/// -/// Example (a 2-D vector.transfer_read): -/// ``` -/// %1 = vector.transfer_read %0[...], %pad : memref, vector<...> -/// ``` -/// is transformed into: -/// ``` -/// %1:3 = scf.if (%inBounds) { -/// scf.yield %0 : memref, index, index -/// } else { -/// %2 = vector.transfer_read %0[...], %pad : memref, vector<...> -/// %3 = vector.type_cast %extra_alloc : memref<...> to -/// memref> store %2, %3[] : memref> %4 = -/// memref_cast %extra_alloc: memref to memref scf.yield %4 : -/// memref, index, index -// } -/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} -/// ``` -/// where `extra_alloc` is a top of the function alloca'ed buffer of one vector. -/// -/// Preconditions: -/// 1. `xferOp.permutation_map()` must be a minor identity map -/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` -/// must be equal. This will be relaxed in the future but requires -/// rank-reducing subviews. -LogicalResult mlir::vector::splitFullAndPartialTransfer( - OpBuilder &b, VectorTransferOpInterface xferOp, scf::IfOp *ifOp) { - using namespace edsc; - using namespace edsc::intrinsics; - - assert(succeeded(splitFullAndPartialTransferPrecondition(xferOp)) && - "Expected splitFullAndPartialTransferPrecondition to hold"); - auto xferReadOp = dyn_cast(xferOp.getOperation()); - - // TODO: add support for write case. - if (!xferReadOp) - return failure(); - - OpBuilder::InsertionGuard guard(b); - if (xferOp.memref().getDefiningOp()) - b.setInsertionPointAfter(xferOp.memref().getDefiningOp()); - else - b.setInsertionPoint(xferOp); - ScopedContext scope(b, xferOp.getLoc()); - Value inBoundsCond = createScopedInBoundsCond( - cast(xferOp.getOperation())); - if (!inBoundsCond) - return failure(); - - // Top of the function `alloc` for transient storage. - Value alloc; - { - FuncOp funcOp = xferOp.getParentOfType(); - OpBuilder::InsertionGuard guard(b); - b.setInsertionPointToStart(&funcOp.getRegion().front()); - auto shape = xferOp.getVectorType().getShape(); - Type elementType = xferOp.getVectorType().getElementType(); - alloc = std_alloca(MemRefType::get(shape, elementType), ValueRange{}, - b.getI64IntegerAttr(32)); - } - - Value memref = xferOp.memref(); - SmallVector bools(xferOp.getTransferRank(), false); - auto unmaskedAttr = b.getBoolArrayAttr(bools); - - MemRefType compatibleMemRefType = getCastCompatibleMemRefType( - xferOp.getMemRefType(), alloc.getType().cast()); - - // Read case: full fill + partial copy -> unmasked vector.xfer_read. - Value zero = std_constant_index(0); - SmallVector returnTypes(1 + xferOp.getTransferRank(), - b.getIndexType()); - returnTypes[0] = compatibleMemRefType; - scf::IfOp fullPartialIfOp; - conditionBuilder( - returnTypes, inBoundsCond, - [&]() -> scf::ValueVector { - Value res = memref; - if (compatibleMemRefType != xferOp.getMemRefType()) - res = std_memref_cast(memref, compatibleMemRefType); - scf::ValueVector viewAndIndices{res}; - viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(), - xferOp.indices().end()); - return viewAndIndices; - }, - [&]() -> scf::ValueVector { - Operation *newXfer = - ScopedContext::getBuilderRef().clone(*xferOp.getOperation()); - Value vector = cast(newXfer).vector(); - std_store(vector, vector_type_cast( - MemRefType::get({}, vector.getType()), alloc)); - - Value casted = std_memref_cast(alloc, compatibleMemRefType); - scf::ValueVector viewAndIndices{casted}; - viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(), - zero); - - return viewAndIndices; - }, - &fullPartialIfOp); - if (ifOp) - *ifOp = fullPartialIfOp; - - // Unmask the existing read op, it always reads from a full buffer. - for (unsigned i = 0, e = returnTypes.size(); i != e; ++i) - xferReadOp.setOperand(i, fullPartialIfOp.getResult(i)); - xferOp.setAttr(vector::TransferReadOp::getMaskedAttrName(), unmaskedAttr); - - return success(); -} - -LogicalResult mlir::vector::VectorTransferFullPartialRewriter::matchAndRewrite( - Operation *op, PatternRewriter &rewriter) const { - auto xferOp = dyn_cast(op); - if (!xferOp || failed(splitFullAndPartialTransferPrecondition(xferOp)) || - failed(filter(xferOp))) - return failure(); - rewriter.startRootUpdate(xferOp); - if (succeeded(splitFullAndPartialTransfer(rewriter, xferOp))) { - rewriter.finalizeRootUpdate(xferOp); - return success(); - } - rewriter.cancelRootUpdate(xferOp); - return failure(); -} - // TODO: Add pattern to rewrite ExtractSlices(ConstantMaskOp). // TODO: Add this as DRR pattern. void mlir::vector::populateVectorToVectorTransformationPatterns( diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir deleted file mode 100644 index ef76247ee9d4b..0000000000000 --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ /dev/null @@ -1,102 +0,0 @@ -// RUN: mlir-opt %s -test-vector-transfer-full-partial-split | FileCheck %s - -// CHECK-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)> -// CHECK-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)> -// CHECK-DAG: #[[$map_2d_stride_1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> - -// CHECK-LABEL: split_vector_transfer_read_2d( -// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref -// CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index -// CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index -func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: index) -> vector<4x8xf32> { - %c0 = constant 0 : index - %f0 = constant 0.0 : f32 - - // CHECK-DAG: %[[c0:.*]] = constant 0 : index - // CHECK-DAG: %[[c8:.*]] = constant 8 : index - // CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32 - // alloca for boundary full tile - // CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32> - // %i + 4 <= dim(%A, 0) - // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] - // CHECK: %[[d0:.*]] = dim %[[A]], %[[c0]] : memref - // CHECK: %[[cmp0:.*]] = cmpi "sle", %[[idx0]], %[[d0]] : index - // %j + 8 <= dim(%A, 1) - // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] - // CHECK: %[[cmp1:.*]] = cmpi "sle", %[[idx1]], %[[c8]] : index - // are both conds true - // CHECK: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1 - // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref, index, index) { - // inBounds, just yield %A - // CHECK: scf.yield %[[A]], %[[i]], %[[j]] : memref, index, index - // CHECK: } else { - // slow path, fill tmp alloc and yield a memref_casted version of it - // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst : - // CHECK-SAME: memref, vector<4x8xf32> - // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] : - // CHECK-SAME: memref<4x8xf32> to memref> - // CHECK: store %[[slow]], %[[cast_alloc]][] : memref> - // CHECK: %[[yielded:.*]] = memref_cast %[[alloc]] : - // CHECK-SAME: memref<4x8xf32> to memref - // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : - // CHECK-SAME: memref, index, index - // CHECK: } - // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %[[cst]] - // CHECK_SAME: {masked = [false, false]} : memref, vector<4x8xf32> - %1 = vector.transfer_read %A[%i, %j], %f0 : memref, vector<4x8xf32> - - // CHECK: return %[[res]] : vector<4x8xf32> - return %1: vector<4x8xf32> -} - -// CHECK-LABEL: split_vector_transfer_read_strided_2d( -// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref -// CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index -// CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index -func @split_vector_transfer_read_strided_2d( - %A: memref<7x8xf32, offset:?, strides:[?, 1]>, - %i: index, %j: index) -> vector<4x8xf32> { - %c0 = constant 0 : index - %f0 = constant 0.0 : f32 - - // CHECK-DAG: %[[c0:.*]] = constant 0 : index - // CHECK-DAG: %[[c7:.*]] = constant 7 : index - // CHECK-DAG: %[[c8:.*]] = constant 8 : index - // CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32 - // alloca for boundary full tile - // CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32> - // %i + 4 <= dim(%A, 0) - // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] - // CHECK: %[[cmp0:.*]] = cmpi "sle", %[[idx0]], %[[c7]] : index - // %j + 8 <= dim(%A, 1) - // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] - // CHECK: %[[cmp1:.*]] = cmpi "sle", %[[idx1]], %[[c8]] : index - // are both conds true - // CHECK: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1 - // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref, index, index) { - // inBounds but not cast-compatible: yield a memref_casted form of %A - // CHECK: %[[casted:.*]] = memref_cast %arg0 : - // CHECK-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]> to memref - // CHECK: scf.yield %[[casted]], %[[i]], %[[j]] : - // CHECK-SAME: memref, index, index - // CHECK: } else { - // slow path, fill tmp alloc and yield a memref_casted version of it - // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst : - // CHECK-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]>, vector<4x8xf32> - // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] : - // CHECK-SAME: memref<4x8xf32> to memref> - // CHECK: store %[[slow]], %[[cast_alloc]][] : - // CHECK-SAME: memref> - // CHECK: %[[yielded:.*]] = memref_cast %[[alloc]] : - // CHECK-SAME: memref<4x8xf32> to memref - // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : - // CHECK-SAME: memref, index, index - // CHECK: } - // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {masked = [false, false]} : - // CHECK-SAME: memref, vector<4x8xf32> - %1 = vector.transfer_read %A[%i, %j], %f0 : - memref<7x8xf32, offset:?, strides:[?, 1]>, vector<4x8xf32> - - // CHECK: return %[[res]] : vector<4x8xf32> - return %1 : vector<4x8xf32> -} diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp index 0bba74e76385e..2058706dcbdd3 100644 --- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -122,17 +122,6 @@ struct TestVectorUnrollingPatterns } }; -struct TestVectorTransferFullPartialSplitPatterns - : public PassWrapper { - void runOnFunction() override { - MLIRContext *ctx = &getContext(); - OwningRewritePatternList patterns; - patterns.insert(ctx); - applyPatternsAndFoldGreedily(getFunction(), patterns); - } -}; - } // end anonymous namespace namespace mlir { @@ -152,10 +141,5 @@ void registerTestVectorConversions() { PassRegistration contractionUnrollingPass( "test-vector-unrolling-patterns", "Test conversion patterns to unroll contract ops in the vector dialect"); - - PassRegistration - vectorTransformFullPartialPass("test-vector-transfer-full-partial-split", - "Test conversion patterns to split " - "transfer ops via scf.if + linalg ops"); } } // namespace mlir From ee1c12708a4519361729205168dedb2b61bc2638 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 3 Aug 2020 16:19:06 +0100 Subject: [PATCH 199/600] [SCEV] If Start>=RHS, simplify (Start smin RHS) = RHS for trip counts. In some cases, it seems like we can get rid of unnecessary s/umins by using information from the loop guards (unless I am missing something). One place where this seems to be helpful in practice is when computing loop trip counts. This patch just changes howManyGreaterThans for now. Note that this requires a loop for which we can check 'is guarded'. On SPEC2000/SPEC2006/MultiSource, there are some notable changes for some programs in the number of loops unrolled and trip counts computed. ``` Same hash: 179 (filtered out) Remaining: 58 Metric: scalar-evolution.NumTripCountsComputed Program base patch diff test-suite...langs-C/compiler/compiler.test 25.00 31.00 24.0% test-suite.../Applications/SPASS/SPASS.test 2020.00 2323.00 15.0% test-suite...langs-C/allroots/allroots.test 29.00 32.00 10.3% test-suite.../Prolangs-C/loader/loader.test 17.00 18.00 5.9% test-suite...fice-ispell/office-ispell.test 253.00 265.00 4.7% test-suite...006/450.soplex/450.soplex.test 3552.00 3692.00 3.9% test-suite...chmarks/MallocBench/gs/gs.test 453.00 470.00 3.8% test-suite...ngs-C/assembler/assembler.test 29.00 30.00 3.4% test-suite.../Benchmarks/Ptrdist/bc/bc.test 263.00 270.00 2.7% test-suite...rks/FreeBench/pifft/pifft.test 722.00 741.00 2.6% test-suite...count/automotive-bitcount.test 41.00 42.00 2.4% test-suite...0/253.perlbmk/253.perlbmk.test 1417.00 1451.00 2.4% test-suite...000/197.parser/197.parser.test 387.00 396.00 2.3% test-suite...lications/sqlite3/sqlite3.test 1168.00 1189.00 1.8% test-suite...000/255.vortex/255.vortex.test 173.00 176.00 1.7% Metric: loop-unroll.NumUnrolled Program base patch diff test-suite...langs-C/compiler/compiler.test 1.00 3.00 200.0% test-suite.../Applications/SPASS/SPASS.test 134.00 234.00 74.6% test-suite...count/automotive-bitcount.test 3.00 4.00 33.3% test-suite.../Prolangs-C/loader/loader.test 3.00 4.00 33.3% test-suite...langs-C/allroots/allroots.test 3.00 4.00 33.3% test-suite...Source/Benchmarks/sim/sim.test 10.00 12.00 20.0% test-suite...fice-ispell/office-ispell.test 21.00 25.00 19.0% test-suite.../Benchmarks/Ptrdist/bc/bc.test 32.00 38.00 18.8% test-suite...006/450.soplex/450.soplex.test 300.00 352.00 17.3% test-suite...rks/FreeBench/pifft/pifft.test 60.00 69.00 15.0% test-suite...chmarks/MallocBench/gs/gs.test 57.00 63.00 10.5% test-suite...ngs-C/assembler/assembler.test 10.00 11.00 10.0% test-suite...0/253.perlbmk/253.perlbmk.test 145.00 157.00 8.3% test-suite...000/197.parser/197.parser.test 43.00 46.00 7.0% test-suite...TimberWolfMC/timberwolfmc.test 205.00 214.00 4.4% Geomean difference 7.6% ``` Fixes https://bugs.llvm.org/show_bug.cgi?id=46939 Fixes https://bugs.llvm.org/show_bug.cgi?id=46924 on X86. Reviewed By: mkazantsev Differential Revision: https://reviews.llvm.org/D85046 --- llvm/lib/Analysis/ScalarEvolution.cpp | 11 +++++++++-- .../ScalarEvolution/pr46939-trip-count-count-down.ll | 8 ++++---- llvm/test/Transforms/HardwareLoops/scalar-while.ll | 8 ++------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index a961be8cc35e1..23ad77ff60540 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10628,8 +10628,15 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const SCEV *Start = IV->getStart(); const SCEV *End = RHS; - if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) - End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start); + if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) { + // If we know that Start >= RHS in the context of loop, then we know that + // min(RHS, Start) = RHS at this point. + if (isLoopEntryGuardedByCond( + L, IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, Start, RHS)) + End = RHS; + else + End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start); + } const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); diff --git a/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll b/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll index f1001ecec0fde..803652e47cf62 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll @@ -6,13 +6,13 @@ define void @reverse_loop(i32 %n) { ; CHECK-LABEL: 'reverse_loop' ; CHECK-NEXT: Classifying expressions for: @reverse_loop ; CHECK-NEXT: %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] -; CHECK-NEXT: --> {%n,+,-1}<%for.body> U: full-set S: full-set Exits: (0 smin %n) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {%n,+,-1}<%for.body> U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %dec = add nsw i32 %i.011, -1 -; CHECK-NEXT: --> {(-1 + %n),+,-1}<%for.body> U: full-set S: full-set Exits: (-1 + (0 smin %n)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(-1 + %n),+,-1}<%for.body> U: full-set S: full-set Exits: -1 LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @reverse_loop -; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 * (0 smin %n)) + %n) +; CHECK-NEXT: Loop %for.body: backedge-taken count is %n ; CHECK-NEXT: Loop %for.body: max backedge-taken count is 2147483647 -; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-1 * (0 smin %n)) + %n) +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is %n ; CHECK-NEXT: Predicates: ; CHECK: Loop %for.body: Trip multiple is 1 ; diff --git a/llvm/test/Transforms/HardwareLoops/scalar-while.ll b/llvm/test/Transforms/HardwareLoops/scalar-while.ll index aac94ecb86264..0b9847b33c714 100644 --- a/llvm/test/Transforms/HardwareLoops/scalar-while.ll +++ b/llvm/test/Transforms/HardwareLoops/scalar-while.ll @@ -76,18 +76,14 @@ while.end: ; CHECK-GUARD: br i1 %cmp4, label %while.end, label %while.body.preheader ; CHECK-GUARD: while.body.preheader: ; CHECK-GUARD: [[ADD:%[^ ]+]] = add i32 %i, 1 -; CHECK-GUARD: [[SEL:%[^ ]+]] = icmp slt i32 %N, %i -; CHECK-GUARD: [[MIN:%[^ ]+]] = select i1 [[SEL]], i32 %N, i32 %i -; CHECK-GUARD: [[COUNT:%[^ ]+]] = sub i32 [[ADD]], [[MIN]] +; CHECK-GUARD: [[COUNT:%[^ ]+]] = sub i32 [[ADD]], %N ; CHECK-GUARD: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) ; CHECK-GUARD: br label %while.body ; CHECK-LABEL: while_gte ; CHECK: while.body.preheader: ; CHECK: [[ADD:%[^ ]+]] = add i32 %i, 1 -; CHECK: [[SEL:%[^ ]+]] = icmp slt i32 %N, %i -; CHECK: [[MIN:%[^ ]+]] = select i1 [[SEL]], i32 %N, i32 %i -; CHECK: [[COUNT:%[^ ]+]] = sub i32 [[ADD]], [[MIN]] +; CHECK: [[COUNT:%[^ ]+]] = sub i32 [[ADD]], %N ; CHECK: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]]) ; CHECK: br label %while.body From 3ebd1ba64f3d6f1e75f43213c50f0d1bd3902228 Mon Sep 17 00:00:00 2001 From: Gui Andrade Date: Fri, 31 Jul 2020 18:53:15 +0000 Subject: [PATCH 200/600] [MSAN] Instrument freeze instruction by clearing shadow Freeze always returns a defined value. This also prevents msan from checking the input shadow, which happened because freeze wasn't explicitly visited. Differential Revision: https://reviews.llvm.org/D85040 --- .../Instrumentation/MemorySanitizer.cpp | 6 +++++ .../Instrumentation/MemorySanitizer/freeze.ll | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/freeze.ll diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index edde80ce0ee8f..0f354c1da490a 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4068,6 +4068,12 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, getCleanOrigin()); } + void visitFreezeInst(FreezeInst &I) { + // Freeze always returns a fully defined value. + setShadow(&I, getCleanShadow(&I)); + setOrigin(&I, getCleanOrigin()); + } + void visitInstruction(Instruction &I) { // Everything else: stop propagating and check for poisoned shadow. if (ClDumpStrictInstructions) diff --git a/llvm/test/Instrumentation/MemorySanitizer/freeze.ll b/llvm/test/Instrumentation/MemorySanitizer/freeze.ll new file mode 100644 index 0000000000000..4be8d9efd6319 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/freeze.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 | FileCheck %s +; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=2 -S -passes=msan 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-ORIGIN +; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @nofreeze(i32* %ptr) sanitize_memory { + ; CHECK-LABEL: @nofreeze + %val = load i32, i32* %ptr + ; CHECK: [[SHADOW_PTR:%.*]] = inttoptr + ; CHECK: [[SHADOW:%.*]] = load i32, i32* [[SHADOW_PTR]] + ; CHECK: store i32 [[SHADOW]], {{.*}} @__msan_retval_tls + ret i32 %val +} + +define i32 @freeze_inst(i32* %ptr) sanitize_memory { + ; CHECK-LABEL: @freeze_inst + %val = load i32, i32* %ptr + %freeze_val = freeze i32 %val + ; CHECK-NOT: __msan_warning + ; CHECK: store i32 0, {{.*}} @__msan_retval_tls + ret i32 %freeze_val +} From caf002c7be44cb6c54de5a1b19aa177f18b6b0c1 Mon Sep 17 00:00:00 2001 From: Gui Andrade Date: Mon, 3 Aug 2020 16:45:35 +0000 Subject: [PATCH 201/600] [Utils] Add noundef attribute to vim/emacs/vscode syntax scripts Differential Revision: https://reviews.llvm.org/D84553 --- llvm/utils/emacs/llvm-mode.el | 2 +- llvm/utils/vim/syntax/llvm.vim | 1 + llvm/utils/vscode/llvm/syntaxes/ll.tmLanguage.yaml | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/utils/emacs/llvm-mode.el b/llvm/utils/emacs/llvm-mode.el index 73b02763d0166..1d0b97c5cd094 100644 --- a/llvm/utils/emacs/llvm-mode.el +++ b/llvm/utils/emacs/llvm-mode.el @@ -25,7 +25,7 @@ '("alwaysinline" "argmemonly" "builtin" "cold" "convergent" "inaccessiblememonly" "inaccessiblemem_or_argmemonly" "inlinehint" "jumptable" "minsize" "naked" "nobuiltin" "noduplicate" "noimplicitfloat" "noinline" "nonlazybind" "noredzone" "noreturn" - "norecurse" "nounwind" "optnone" "optsize" "readnone" "readonly" "returns_twice" + "norecurse" "noundef" "nounwind" "optnone" "optsize" "readnone" "readonly" "returns_twice" "speculatable" "ssp" "sspreq" "sspstrong" "safestack" "sanitize_address" "sanitize_hwaddress" "sanitize_memtag" "sanitize_thread" "sanitize_memory" "strictfp" "uwtable" "writeonly" "immarg") 'symbols) . font-lock-constant-face) ;; Variables diff --git a/llvm/utils/vim/syntax/llvm.vim b/llvm/utils/vim/syntax/llvm.vim index ce36b761d5daa..ebdd8faae3ef9 100644 --- a/llvm/utils/vim/syntax/llvm.vim +++ b/llvm/utils/vim/syntax/llvm.vim @@ -119,6 +119,7 @@ syn keyword llvmKeyword \ norecurse \ noredzone \ noreturn + \ noundef \ nounwind \ optnone \ optsize diff --git a/llvm/utils/vscode/llvm/syntaxes/ll.tmLanguage.yaml b/llvm/utils/vscode/llvm/syntaxes/ll.tmLanguage.yaml index 117ec134d5738..e6c574444321c 100644 --- a/llvm/utils/vscode/llvm/syntaxes/ll.tmLanguage.yaml +++ b/llvm/utils/vscode/llvm/syntaxes/ll.tmLanguage.yaml @@ -216,6 +216,7 @@ patterns: \\bnorecurse\\b|\ \\bnoredzone\\b|\ \\bnoreturn\\b|\ + \\bnoundef\\b|\ \\bnounwind\\b|\ \\boptnone\\b|\ \\boptsize\\b|\ From 4b1b109c5126efc963cc19949df5201e40f1bcc1 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 30 Jul 2020 12:44:07 -0700 Subject: [PATCH 202/600] [llvm] Add a parser from JSON to TensorSpec A JSON->TensorSpec utility we will use subsequently to specify additional outputs needed for certain training scenarios. Differential Revision: https://reviews.llvm.org/D84976 --- llvm/include/llvm/Analysis/Utils/TFUtils.h | 44 ++++++++++++---- llvm/lib/Analysis/TFUtils.cpp | 59 +++++++++++++++------- llvm/unittests/Analysis/TFUtilsTest.cpp | 29 +++++++++++ 3 files changed, 103 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h index 512f45bb5671a..d4450276a22ee 100644 --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -13,6 +13,7 @@ #ifdef LLVM_HAVE_TF_API #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/JSON.h" #include #include @@ -58,6 +59,13 @@ class TensorSpec final { int typeIndex() const { return TypeIndex; } const std::vector &shape() const { return Shape; } + bool operator==(const TensorSpec &Other) const { + return Name == Other.Name && Port == Other.Port && + TypeIndex == Other.TypeIndex && Shape == Other.Shape; + } + + bool operator!=(const TensorSpec &Other) const { return !(*this == Other); } + private: TensorSpec(const std::string &Name, int Port, int TypeIndex, const std::vector &Shape) @@ -73,6 +81,9 @@ class TensorSpec final { std::vector Shape; }; +Optional getTensorSpecFromJSON(LLVMContext &Ctx, + const json::Value &Value); + class TFModelEvaluator final { public: /// The result of a model evaluation. Handles the lifetime of the output @@ -124,17 +135,28 @@ class TFModelEvaluator final { std::unique_ptr Impl; }; -template <> int TensorSpec::getDataType(); -template <> int TensorSpec::getDataType(); -template <> int TensorSpec::getDataType(); -template <> int TensorSpec::getDataType(); -template <> int TensorSpec::getDataType(); -template <> int TensorSpec::getDataType(); -template <> int TensorSpec::getDataType(); -template <> int TensorSpec::getDataType(); -template <> int TensorSpec::getDataType(); -template <> int TensorSpec::getDataType(); - +/// List of supported types, as a triple: +/// C++ type +/// short name (for strings, for instance) +/// capitalized short name (for enums, for instance) +#define TFUTILS_SUPPORTED_TYPES(M) \ + M(float, float, FLOAT) \ + M(double, double, DOUBLE) \ + M(int8_t, int8, INT8) \ + M(uint8_t, uint8, UINT8) \ + M(int16_t, int16, INT16) \ + M(uint16_t, uint16, UINT16) \ + M(int32_t, int32, INT32) \ + M(uint32_t, uint32, UINT32) \ + M(int64_t, int64, INT64) \ + M(uint64_t, uint64, UINT64) + +#define TFUTILS_GETDATATYPE_DEF(T, S, C) \ + template <> int TensorSpec::getDataType(); + +TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_DEF) + +#undef TFUTILS_GETDATATYPE_DEF } // namespace llvm #endif // LLVM_HAVE_TF_API diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp index b0ff19857963b..8fd4011e6cd42 100644 --- a/llvm/lib/Analysis/TFUtils.cpp +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -13,9 +13,10 @@ #include "llvm/Config/config.h" #if defined(LLVM_HAVE_TF_API) -#include "llvm/Analysis/Utils/TFUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/Utils/TFUtils.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" @@ -83,6 +84,41 @@ class EvaluationResultImpl { std::vector Output; }; +Optional getTensorSpecFromJSON(LLVMContext &Ctx, + const json::Value &Value) { + auto EmitError = [&](const llvm::Twine &Message) -> Optional { + std::string S; + llvm::raw_string_ostream OS(S); + OS << Value; + Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S); + return None; + }; + json::ObjectMapper Mapper(Value); + if (!Mapper) + return EmitError("Value is not a dict"); + + std::string TensorName; + int TensorPort = -1; + std::string TensorType; + std::vector TensorShape; + + if (!Mapper.map("name", TensorName)) + return EmitError("'name' property not present or not a string"); + if (!Mapper.map("type", TensorType)) + return EmitError("'type' property not present or not a string"); + if (!Mapper.map("port", TensorPort)) + return EmitError("'port' property not present or not an int"); + if (!Mapper.map>("shape", TensorShape)) + return EmitError("'shape' property not present or not an int array"); + +#define PARSE_TYPE(T, S, E) \ + if (TensorType == #S) \ + return TensorSpec::createSpec(TensorName, TensorShape, TensorPort); + TFUTILS_SUPPORTED_TYPES(PARSE_TYPE) +#undef PARSE_TYPE + return None; +} + class TFModelEvaluatorImpl { public: TFModelEvaluatorImpl(StringRef SavedModelPath, @@ -249,25 +285,12 @@ void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) { return TF_TensorData(Impl->getOutput()[Index]); } -template <> int TensorSpec::getDataType() { return TF_FLOAT; } - -template <> int TensorSpec::getDataType() { return TF_DOUBLE; } - -template <> int TensorSpec::getDataType() { return TF_INT8; } - -template <> int TensorSpec::getDataType() { return TF_UINT8; } - -template <> int TensorSpec::getDataType() { return TF_INT16; } - -template <> int TensorSpec::getDataType() { return TF_UINT16; } - -template <> int TensorSpec::getDataType() { return TF_INT32; } - -template <> int TensorSpec::getDataType() { return TF_UINT32; } +#define TFUTILS_GETDATATYPE_IMPL(T, S, E) \ + template <> int TensorSpec::getDataType() { return TF_##E; } -template <> int TensorSpec::getDataType() { return TF_INT64; } +TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL) -template <> int TensorSpec::getDataType() { return TF_UINT64; } +#undef TFUTILS_GETDATATYPE_IMPL TFModelEvaluator::EvaluationResult::~EvaluationResult() {} TFModelEvaluator::~TFModelEvaluator() {} diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp index e96d34092c7e5..abdf2b2b97844 100644 --- a/llvm/unittests/Analysis/TFUtilsTest.cpp +++ b/llvm/unittests/Analysis/TFUtilsTest.cpp @@ -94,3 +94,32 @@ TEST(TFUtilsTest, EvalError) { EXPECT_FALSE(ER.hasValue()); EXPECT_FALSE(Evaluator.isValid()); } + +TEST(TFUtilsTest, JSONParsing) { + auto Value = json::parse( + R"({"name": "tensor_name", + "port": 2, + "type": "int32", + "shape":[1,4] + })"); + EXPECT_TRUE(!!Value); + LLVMContext Ctx; + Optional Spec = getTensorSpecFromJSON(Ctx, *Value); + EXPECT_TRUE(Spec.hasValue()); + EXPECT_EQ(*Spec, TensorSpec::createSpec("tensor_name", {1, 4}, 2)); +} + +TEST(TFUtilsTest, JSONParsingInvalidTensorType) { + auto Value = json::parse( + R"( + {"name": "tensor_name", + "port": 2, + "type": "no such type", + "shape":[1,4] + } + )"); + EXPECT_TRUE(!!Value); + LLVMContext Ctx; + auto Spec = getTensorSpecFromJSON(Ctx, *Value); + EXPECT_FALSE(Spec.hasValue()); +} From d313e9c12ed3541f63a36e3b0d59e9e1185603d2 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 3 Aug 2020 12:24:53 -0400 Subject: [PATCH 203/600] [mlir][Vector] Add transformation + pattern to split vector.transfer_read into full and partial copies. This revision adds a transformation and a pattern that rewrites a "maybe masked" `vector.transfer_read %view[...], %pad `into a pattern resembling: ``` %1:3 = scf.if (%inBounds) { scf.yield %view : memref, index, index } else { %2 = vector.transfer_read %view[...], %pad : memref, vector<...> %3 = vector.type_cast %extra_alloc : memref<...> to memref> store %2, %3[] : memref> %4 = memref_cast %extra_alloc: memref to memref scf.yield %4 : memref, index, index } %res= vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} ``` where `extra_alloc` is a top of the function alloca'ed buffer of one vector. This rewrite makes it possible to realize the "always full tile" abstraction where vector.transfer_read operations are guaranteed to read from a padded full buffer. The extra work only occurs on the boundary tiles. Differential Revision: https://reviews.llvm.org/D84631 --- .../mlir/Dialect/Vector/VectorTransforms.h | 64 +++++ .../mlir/Interfaces/VectorInterfaces.td | 13 + mlir/lib/Dialect/Vector/CMakeLists.txt | 1 + mlir/lib/Dialect/Vector/VectorTransforms.cpp | 234 ++++++++++++++++++ .../vector-transfer-full-partial-split.mlir | 102 ++++++++ .../lib/Transforms/TestVectorTransforms.cpp | 16 ++ 6 files changed, 430 insertions(+) create mode 100644 mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h index 0d18c5aa782d1..835ad18a79ad2 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h @@ -17,6 +17,11 @@ namespace mlir { class MLIRContext; class OwningRewritePatternList; +class VectorTransferOpInterface; + +namespace scf { +class IfOp; +} // namespace scf /// Collect a set of patterns to convert from the Vector dialect to itself. /// Should be merged with populateVectorToSCFLoweringPattern. @@ -104,6 +109,65 @@ struct UnrollVectorPattern : public OpRewritePattern { FilterConstraintType filter; }; +/// Split a vector.transfer operation into an unmasked fastpath vector.transfer +/// and a slowpath masked vector.transfer. If `ifOp` is not null and the result +/// is `success, the `ifOp` points to the newly created conditional upon +/// function return. To accomodate for the fact that the original +/// vector.transfer indexing may be arbitrary and the slow path indexes @[0...0] +/// in the temporary buffer, the scf.if op returns a view and values of type +/// index. At this time, only vector.transfer_read is implemented. +/// +/// Example (a 2-D vector.transfer_read): +/// ``` +/// %1 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// ``` +/// is transformed into: +/// ``` +/// %1:3 = scf.if (%inBounds) { +/// scf.yield %0 : memref, index, index +/// } else { +/// %2 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// %3 = vector.type_cast %extra_alloc : memref<...> to +/// memref> store %2, %3[] : memref> %4 = +/// memref_cast %extra_alloc: memref to memref scf.yield %4 : +/// memref, index, index +// } +/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} +/// ``` +/// where `extra_alloc` is a top of the function alloca'ed buffer of one vector. +/// +/// Preconditions: +/// 1. `xferOp.permutation_map()` must be a minor identity map +/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` +/// must be equal. This will be relaxed in the future but requires +/// rank-reducing subviews. +LogicalResult +splitFullAndPartialTransferPrecondition(VectorTransferOpInterface xferOp); +LogicalResult splitFullAndPartialTransfer(OpBuilder &b, + VectorTransferOpInterface xferOp, + scf::IfOp *ifOp = nullptr); + +/// Apply `splitFullAndPartialTransfer` selectively via a pattern. This pattern +/// may take an extra filter to perform selection at a finer granularity. +struct VectorTransferFullPartialRewriter : public RewritePattern { + using FilterConstraintType = + std::function; + + explicit VectorTransferFullPartialRewriter( + MLIRContext *context, + FilterConstraintType filter = + [](VectorTransferOpInterface op) { return success(); }, + PatternBenefit benefit = 1) + : RewritePattern(benefit, MatchAnyOpTypeTag()), filter(filter) {} + + /// Performs the rewrite. + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override; + +private: + FilterConstraintType filter; +}; + } // namespace vector //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Interfaces/VectorInterfaces.td b/mlir/include/mlir/Interfaces/VectorInterfaces.td index aefbb7d471172..218715318a867 100644 --- a/mlir/include/mlir/Interfaces/VectorInterfaces.td +++ b/mlir/include/mlir/Interfaces/VectorInterfaces.td @@ -160,6 +160,19 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> { /*defaultImplementation=*/ "return $_op.getMemRefType().getRank() - $_op.getTransferRank();" >, + InterfaceMethod< + /*desc=*/[{ Returns true if at least one of the dimensions is masked.}], + /*retTy=*/"bool", + /*methodName=*/"hasMaskedDim", + /*args=*/(ins), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + for (unsigned idx = 0, e = $_op.getTransferRank(); idx < e; ++idx) + if ($_op.isMaskedDim(idx)) + return true; + return false; + }] + >, InterfaceMethod< /*desc=*/[{ Helper function to account for the fact that `permutationMap` results and diff --git a/mlir/lib/Dialect/Vector/CMakeLists.txt b/mlir/lib/Dialect/Vector/CMakeLists.txt index 7b34f1933c42c..13dbf6da73fa2 100644 --- a/mlir/lib/Dialect/Vector/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_dialect_library(MLIRVector MLIRVectorOpsIncGen LINK_LIBS PUBLIC + MLIRAffineEDSC MLIREDSC MLIRIR MLIRStandardOps diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 197b1c62274b2..573b822503f3a 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -12,9 +12,13 @@ #include +#include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/SCF/EDSC/Intrinsics.h" +#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/EDSC/Intrinsics.h" #include "mlir/Dialect/Vector/VectorOps.h" #include "mlir/Dialect/Vector/VectorTransforms.h" #include "mlir/Dialect/Vector/VectorUtils.h" @@ -1985,6 +1989,236 @@ Value ContractionOpLowering::lowerReduction(vector::ContractionOp op, } // namespace mlir +static Optional extractConstantIndex(Value v) { + if (auto cstOp = v.getDefiningOp()) + return cstOp.getValue(); + if (auto affineApplyOp = v.getDefiningOp()) + if (affineApplyOp.getAffineMap().isSingleConstant()) + return affineApplyOp.getAffineMap().getSingleConstantResult(); + return None; +} + +// Missing foldings of scf.if make it necessary to perform poor man's folding +// eagerly, especially in the case of unrolling. In the future, this should go +// away once scf.if folds properly. +static Value createScopedFoldedSLE(Value v, Value ub) { + using namespace edsc::op; + auto maybeCstV = extractConstantIndex(v); + auto maybeCstUb = extractConstantIndex(ub); + if (maybeCstV && maybeCstUb && *maybeCstV < *maybeCstUb) + return Value(); + return sle(v, ub); +} + +// Operates under a scoped context to build the condition to ensure that a +// particular VectorTransferOpInterface is unmasked. +static Value createScopedInBoundsCond(VectorTransferOpInterface xferOp) { + assert(xferOp.permutation_map().isMinorIdentity() && + "Expected minor identity map"); + Value inBoundsCond; + xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) { + // Zip over the resulting vector shape and memref indices. + // If the dimension is known to be unmasked, it does not participate in the + // construction of `inBoundsCond`. + if (!xferOp.isMaskedDim(resultIdx)) + return; + int64_t vectorSize = xferOp.getVectorType().getDimSize(resultIdx); + using namespace edsc::op; + using namespace edsc::intrinsics; + // Fold or create the check that `index + vector_size` <= `memref_size`. + Value sum = xferOp.indices()[indicesIdx] + std_constant_index(vectorSize); + Value cond = + createScopedFoldedSLE(sum, std_dim(xferOp.memref(), indicesIdx)); + if (!cond) + return; + // Conjunction over all dims for which we are in-bounds. + inBoundsCond = inBoundsCond ? inBoundsCond && cond : cond; + }); + return inBoundsCond; +} + +LogicalResult mlir::vector::splitFullAndPartialTransferPrecondition( + VectorTransferOpInterface xferOp) { + // TODO: expand support to these 2 cases. + if (!xferOp.permutation_map().isMinorIdentity()) + return failure(); + // TODO: relax this precondition. This will require rank-reducing subviews. + if (xferOp.getMemRefType().getRank() != xferOp.getTransferRank()) + return failure(); + // Must have some masked dimension to be a candidate for splitting. + if (!xferOp.hasMaskedDim()) + return failure(); + // Don't split transfer operations under IfOp, this avoids applying the + // pattern recursively. + // TODO: improve the condition to make it more applicable. + if (xferOp.getParentOfType()) + return failure(); + return success(); +} + +MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { + if (MemRefCastOp::areCastCompatible(aT, bT)) + return aT; + if (aT.getRank() != bT.getRank()) + return MemRefType(); + int64_t aOffset, bOffset; + SmallVector aStrides, bStrides; + if (failed(getStridesAndOffset(aT, aStrides, aOffset)) || + failed(getStridesAndOffset(bT, bStrides, bOffset)) || + aStrides.size() != bStrides.size()) + return MemRefType(); + + ArrayRef aShape = aT.getShape(), bShape = bT.getShape(); + int64_t resOffset; + SmallVector resShape(aT.getRank(), 0), + resStrides(bT.getRank(), 0); + for (int64_t idx = 0, e = aT.getRank(); idx < e; ++idx) { + resShape[idx] = + (aShape[idx] == bShape[idx]) ? aShape[idx] : MemRefType::kDynamicSize; + resStrides[idx] = (aStrides[idx] == bStrides[idx]) + ? aStrides[idx] + : MemRefType::kDynamicStrideOrOffset; + } + resOffset = + (aOffset == bOffset) ? aOffset : MemRefType::kDynamicStrideOrOffset; + return MemRefType::get( + resShape, aT.getElementType(), + makeStridedLinearLayoutMap(resStrides, resOffset, aT.getContext())); +} + +/// Split a vector.transfer operation into an unmasked fastpath vector.transfer +/// and a slowpath masked vector.transfer. If `ifOp` is not null and the result +/// is `success, the `ifOp` points to the newly created conditional upon +/// function return. To accomodate for the fact that the original +/// vector.transfer indexing may be arbitrary and the slow path indexes @[0...0] +/// in the temporary buffer, the scf.if op returns a view and values of type +/// index. At this time, only vector.transfer_read is implemented. +/// +/// Example (a 2-D vector.transfer_read): +/// ``` +/// %1 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// ``` +/// is transformed into: +/// ``` +/// %1:3 = scf.if (%inBounds) { +/// scf.yield %0 : memref, index, index +/// } else { +/// %2 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// %3 = vector.type_cast %extra_alloc : memref<...> to +/// memref> store %2, %3[] : memref> %4 = +/// memref_cast %extra_alloc: memref to memref scf.yield %4 : +/// memref, index, index +// } +/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} +/// ``` +/// where `extra_alloc` is a top of the function alloca'ed buffer of one vector. +/// +/// Preconditions: +/// 1. `xferOp.permutation_map()` must be a minor identity map +/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` +/// must be equal. This will be relaxed in the future but requires +/// rank-reducing subviews. +LogicalResult mlir::vector::splitFullAndPartialTransfer( + OpBuilder &b, VectorTransferOpInterface xferOp, scf::IfOp *ifOp) { + using namespace edsc; + using namespace edsc::intrinsics; + + assert(succeeded(splitFullAndPartialTransferPrecondition(xferOp)) && + "Expected splitFullAndPartialTransferPrecondition to hold"); + auto xferReadOp = dyn_cast(xferOp.getOperation()); + + // TODO: add support for write case. + if (!xferReadOp) + return failure(); + + OpBuilder::InsertionGuard guard(b); + if (xferOp.memref().getDefiningOp()) + b.setInsertionPointAfter(xferOp.memref().getDefiningOp()); + else + b.setInsertionPoint(xferOp); + ScopedContext scope(b, xferOp.getLoc()); + Value inBoundsCond = createScopedInBoundsCond( + cast(xferOp.getOperation())); + if (!inBoundsCond) + return failure(); + + // Top of the function `alloc` for transient storage. + Value alloc; + { + FuncOp funcOp = xferOp.getParentOfType(); + OpBuilder::InsertionGuard guard(b); + b.setInsertionPointToStart(&funcOp.getRegion().front()); + auto shape = xferOp.getVectorType().getShape(); + Type elementType = xferOp.getVectorType().getElementType(); + alloc = std_alloca(MemRefType::get(shape, elementType), ValueRange{}, + b.getI64IntegerAttr(32)); + } + + Value memref = xferOp.memref(); + SmallVector bools(xferOp.getTransferRank(), false); + auto unmaskedAttr = b.getBoolArrayAttr(bools); + + MemRefType compatibleMemRefType = getCastCompatibleMemRefType( + xferOp.getMemRefType(), alloc.getType().cast()); + + // Read case: full fill + partial copy -> unmasked vector.xfer_read. + Value zero = std_constant_index(0); + SmallVector returnTypes(1 + xferOp.getTransferRank(), + b.getIndexType()); + returnTypes[0] = compatibleMemRefType; + scf::IfOp fullPartialIfOp; + conditionBuilder( + returnTypes, inBoundsCond, + [&]() -> scf::ValueVector { + Value res = memref; + if (compatibleMemRefType != xferOp.getMemRefType()) + res = std_memref_cast(memref, compatibleMemRefType); + scf::ValueVector viewAndIndices{res}; + viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(), + xferOp.indices().end()); + return viewAndIndices; + }, + [&]() -> scf::ValueVector { + Operation *newXfer = + ScopedContext::getBuilderRef().clone(*xferOp.getOperation()); + Value vector = cast(newXfer).vector(); + std_store(vector, vector_type_cast( + MemRefType::get({}, vector.getType()), alloc)); + + Value casted = std_memref_cast(alloc, compatibleMemRefType); + scf::ValueVector viewAndIndices{casted}; + viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(), + zero); + + return viewAndIndices; + }, + &fullPartialIfOp); + if (ifOp) + *ifOp = fullPartialIfOp; + + // Unmask the existing read op, it always reads from a full buffer. + for (unsigned i = 0, e = returnTypes.size(); i != e; ++i) + xferReadOp.setOperand(i, fullPartialIfOp.getResult(i)); + xferOp.setAttr(vector::TransferReadOp::getMaskedAttrName(), unmaskedAttr); + + return success(); +} + +LogicalResult mlir::vector::VectorTransferFullPartialRewriter::matchAndRewrite( + Operation *op, PatternRewriter &rewriter) const { + auto xferOp = dyn_cast(op); + if (!xferOp || failed(splitFullAndPartialTransferPrecondition(xferOp)) || + failed(filter(xferOp))) + return failure(); + rewriter.startRootUpdate(xferOp); + if (succeeded(splitFullAndPartialTransfer(rewriter, xferOp))) { + rewriter.finalizeRootUpdate(xferOp); + return success(); + } + rewriter.cancelRootUpdate(xferOp); + return failure(); +} + // TODO: Add pattern to rewrite ExtractSlices(ConstantMaskOp). // TODO: Add this as DRR pattern. void mlir::vector::populateVectorToVectorTransformationPatterns( diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir new file mode 100644 index 0000000000000..ef76247ee9d4b --- /dev/null +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -0,0 +1,102 @@ +// RUN: mlir-opt %s -test-vector-transfer-full-partial-split | FileCheck %s + +// CHECK-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)> +// CHECK-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)> +// CHECK-DAG: #[[$map_2d_stride_1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> + +// CHECK-LABEL: split_vector_transfer_read_2d( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index +// CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index +func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: index) -> vector<4x8xf32> { + %c0 = constant 0 : index + %f0 = constant 0.0 : f32 + + // CHECK-DAG: %[[c0:.*]] = constant 0 : index + // CHECK-DAG: %[[c8:.*]] = constant 8 : index + // CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32 + // alloca for boundary full tile + // CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32> + // %i + 4 <= dim(%A, 0) + // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] + // CHECK: %[[d0:.*]] = dim %[[A]], %[[c0]] : memref + // CHECK: %[[cmp0:.*]] = cmpi "sle", %[[idx0]], %[[d0]] : index + // %j + 8 <= dim(%A, 1) + // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] + // CHECK: %[[cmp1:.*]] = cmpi "sle", %[[idx1]], %[[c8]] : index + // are both conds true + // CHECK: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1 + // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref, index, index) { + // inBounds, just yield %A + // CHECK: scf.yield %[[A]], %[[i]], %[[j]] : memref, index, index + // CHECK: } else { + // slow path, fill tmp alloc and yield a memref_casted version of it + // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst : + // CHECK-SAME: memref, vector<4x8xf32> + // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] : + // CHECK-SAME: memref<4x8xf32> to memref> + // CHECK: store %[[slow]], %[[cast_alloc]][] : memref> + // CHECK: %[[yielded:.*]] = memref_cast %[[alloc]] : + // CHECK-SAME: memref<4x8xf32> to memref + // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : + // CHECK-SAME: memref, index, index + // CHECK: } + // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %[[cst]] + // CHECK_SAME: {masked = [false, false]} : memref, vector<4x8xf32> + %1 = vector.transfer_read %A[%i, %j], %f0 : memref, vector<4x8xf32> + + // CHECK: return %[[res]] : vector<4x8xf32> + return %1: vector<4x8xf32> +} + +// CHECK-LABEL: split_vector_transfer_read_strided_2d( +// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index +// CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index +func @split_vector_transfer_read_strided_2d( + %A: memref<7x8xf32, offset:?, strides:[?, 1]>, + %i: index, %j: index) -> vector<4x8xf32> { + %c0 = constant 0 : index + %f0 = constant 0.0 : f32 + + // CHECK-DAG: %[[c0:.*]] = constant 0 : index + // CHECK-DAG: %[[c7:.*]] = constant 7 : index + // CHECK-DAG: %[[c8:.*]] = constant 8 : index + // CHECK-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32 + // alloca for boundary full tile + // CHECK: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32> + // %i + 4 <= dim(%A, 0) + // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] + // CHECK: %[[cmp0:.*]] = cmpi "sle", %[[idx0]], %[[c7]] : index + // %j + 8 <= dim(%A, 1) + // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] + // CHECK: %[[cmp1:.*]] = cmpi "sle", %[[idx1]], %[[c8]] : index + // are both conds true + // CHECK: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1 + // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref, index, index) { + // inBounds but not cast-compatible: yield a memref_casted form of %A + // CHECK: %[[casted:.*]] = memref_cast %arg0 : + // CHECK-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]> to memref + // CHECK: scf.yield %[[casted]], %[[i]], %[[j]] : + // CHECK-SAME: memref, index, index + // CHECK: } else { + // slow path, fill tmp alloc and yield a memref_casted version of it + // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst : + // CHECK-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]>, vector<4x8xf32> + // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] : + // CHECK-SAME: memref<4x8xf32> to memref> + // CHECK: store %[[slow]], %[[cast_alloc]][] : + // CHECK-SAME: memref> + // CHECK: %[[yielded:.*]] = memref_cast %[[alloc]] : + // CHECK-SAME: memref<4x8xf32> to memref + // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : + // CHECK-SAME: memref, index, index + // CHECK: } + // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {masked = [false, false]} : + // CHECK-SAME: memref, vector<4x8xf32> + %1 = vector.transfer_read %A[%i, %j], %f0 : + memref<7x8xf32, offset:?, strides:[?, 1]>, vector<4x8xf32> + + // CHECK: return %[[res]] : vector<4x8xf32> + return %1 : vector<4x8xf32> +} diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp index 2058706dcbdd3..0bba74e76385e 100644 --- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -122,6 +122,17 @@ struct TestVectorUnrollingPatterns } }; +struct TestVectorTransferFullPartialSplitPatterns + : public PassWrapper { + void runOnFunction() override { + MLIRContext *ctx = &getContext(); + OwningRewritePatternList patterns; + patterns.insert(ctx); + applyPatternsAndFoldGreedily(getFunction(), patterns); + } +}; + } // end anonymous namespace namespace mlir { @@ -141,5 +152,10 @@ void registerTestVectorConversions() { PassRegistration contractionUnrollingPass( "test-vector-unrolling-patterns", "Test conversion patterns to unroll contract ops in the vector dialect"); + + PassRegistration + vectorTransformFullPartialPass("test-vector-transfer-full-partial-split", + "Test conversion patterns to split " + "transfer ops via scf.if + linalg ops"); } } // namespace mlir From e888886cc3daf2c2d6c20cad51cd5ec2ffc24789 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Mon, 3 Aug 2020 10:04:16 -0700 Subject: [PATCH 204/600] [mlir][DialectConversion] Add support for mergeBlocks in ConversionPatternRewriter. Differential Revision: https://reviews.llvm.org/D84795 --- mlir/lib/Transforms/DialectConversion.cpp | 61 +++++++++- mlir/test/Transforms/test-merge-blocks.mlir | 59 ++++++++++ mlir/test/lib/Dialect/Test/TestOps.td | 10 ++ mlir/test/lib/Dialect/Test/TestPatterns.cpp | 116 ++++++++++++++++++++ 4 files changed, 242 insertions(+), 4 deletions(-) create mode 100644 mlir/test/Transforms/test-merge-blocks.mlir diff --git a/mlir/lib/Transforms/DialectConversion.cpp b/mlir/lib/Transforms/DialectConversion.cpp index 9778958a45885..713f0b73dfe04 100644 --- a/mlir/lib/Transforms/DialectConversion.cpp +++ b/mlir/lib/Transforms/DialectConversion.cpp @@ -602,7 +602,14 @@ struct OpReplacement { /// The kind of the block action performed during the rewrite. Actions can be /// undone if the conversion fails. -enum class BlockActionKind { Create, Erase, Move, Split, TypeConversion }; +enum class BlockActionKind { + Create, + Erase, + Merge, + Move, + Split, + TypeConversion +}; /// Original position of the given block in its parent region. We cannot use /// a region iterator because it could have been invalidated by other region @@ -612,6 +619,15 @@ struct BlockPosition { Region::iterator::difference_type position; }; +/// Information needed to undo the merge actions. +/// - the source block, and +/// - the Operation that was the last operation in the dest block before the +/// merge (could be null if the dest block was empty). +struct MergeInfo { + Block *sourceBlock; + Operation *destBlockLastInst; +}; + /// The storage class for an undoable block action (one of BlockActionKind), /// contains the information necessary to undo this action. struct BlockAction { @@ -621,6 +637,11 @@ struct BlockAction { static BlockAction getErase(Block *block, BlockPosition originalPos) { return {BlockActionKind::Erase, block, {originalPos}}; } + static BlockAction getMerge(Block *block, Block *sourceBlock) { + BlockAction action{BlockActionKind::Merge, block, {}}; + action.mergeInfo = {sourceBlock, block->empty() ? nullptr : &block->back()}; + return action; + } static BlockAction getMove(Block *block, BlockPosition originalPos) { return {BlockActionKind::Move, block, {originalPos}}; } @@ -647,6 +668,9 @@ struct BlockAction { // In use if kind == BlockActionKind::Split and contains a pointer to the // block that was split into two parts. Block *originalBlock; + // In use if kind == BlockActionKind::Merge, and contains the information + // needed to undo the merge. + MergeInfo mergeInfo; }; }; } // end anonymous namespace @@ -738,6 +762,9 @@ struct ConversionPatternRewriterImpl { /// Notifies that a block was split. void notifySplitBlock(Block *block, Block *continuation); + /// Notifies that `block` is being merged with `srcBlock`. + void notifyBlocksBeingMerged(Block *block, Block *srcBlock); + /// Notifies that the blocks of a region are about to be moved. void notifyRegionIsBeingInlinedBefore(Region ®ion, Region &parent, Region::iterator before); @@ -966,6 +993,20 @@ void ConversionPatternRewriterImpl::undoBlockActions( action.block); break; } + // Split the block at the position which was originally the end of the + // destination block (owned by action), and put the instructions back into + // the block used before the merge. + case BlockActionKind::Merge: { + Block *sourceBlock = action.mergeInfo.sourceBlock; + Block::iterator splitPoint = + (action.mergeInfo.destBlockLastInst + ? ++Block::iterator(action.mergeInfo.destBlockLastInst) + : action.block->begin()); + sourceBlock->getOperations().splice(sourceBlock->begin(), + action.block->getOperations(), + splitPoint, action.block->end()); + break; + } // Move the block back to its original position. case BlockActionKind::Move: { Region *originalRegion = action.originalPosition.region; @@ -1161,6 +1202,11 @@ void ConversionPatternRewriterImpl::notifySplitBlock(Block *block, blockActions.push_back(BlockAction::getSplit(continuation, block)); } +void ConversionPatternRewriterImpl::notifyBlocksBeingMerged(Block *block, + Block *srcBlock) { + blockActions.push_back(BlockAction::getMerge(block, srcBlock)); +} + void ConversionPatternRewriterImpl::notifyRegionIsBeingInlinedBefore( Region ®ion, Region &parent, Region::iterator before) { for (auto &pair : llvm::enumerate(region)) { @@ -1283,9 +1329,16 @@ Block *ConversionPatternRewriter::splitBlock(Block *block, /// PatternRewriter hook for merging a block into another. void ConversionPatternRewriter::mergeBlocks(Block *source, Block *dest, ValueRange argValues) { - // TODO: This requires fixing the implementation of - // 'replaceUsesOfBlockArgument', which currently isn't undoable. - llvm_unreachable("block merging updates are currently not supported"); + impl->notifyBlocksBeingMerged(dest, source); + assert(llvm::all_of(source->getPredecessors(), + [dest](Block *succ) { return succ == dest; }) && + "expected 'source' to have no predecessors or only 'dest'"); + assert(argValues.size() == source->getNumArguments() && + "incorrect # of argument replacement values"); + for (auto it : llvm::zip(source->getArguments(), argValues)) + replaceUsesOfBlockArgument(std::get<0>(it), std::get<1>(it)); + dest->getOperations().splice(dest->end(), source->getOperations()); + eraseBlock(source); } /// PatternRewriter hook for moving blocks out of a region. diff --git a/mlir/test/Transforms/test-merge-blocks.mlir b/mlir/test/Transforms/test-merge-blocks.mlir new file mode 100644 index 0000000000000..65dd50569416e --- /dev/null +++ b/mlir/test/Transforms/test-merge-blocks.mlir @@ -0,0 +1,59 @@ +// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-merge-blocks -verify-diagnostics %s | FileCheck %s + +// CHECK-LABEL: @merge_blocks +func @merge_blocks(%arg0: i32, %arg1 : i32) -> () { + // CHECK: "test.merge_blocks"() ( { + // CHECK-NEXT: "test.return" + // CHECK-NEXT: }) + // CHECK-NEXT: "test.return" + %0:2 = "test.merge_blocks"() ({ + ^bb0: + "test.br"(%arg0, %arg1)[^bb1] : (i32, i32) -> () + ^bb1(%arg3 : i32, %arg4 : i32): + "test.return"(%arg3, %arg4) : (i32, i32) -> () + }) : () -> (i32, i32) + "test.return"(%0#0, %0#1) : (i32, i32) -> () +} + +// ----- + +// The op in this function is rewritten to itself (and thus remains +// illegal) by a pattern that merges the second block with the first +// after adding an operation into it. Check that we can undo block +// removal succesfully. +// CHECK-LABEL: @undo_blocks_merge +func @undo_blocks_merge(%arg0: i32) { + "test.undo_blocks_merge"() ({ + // expected-remark@-1 {{op 'test.undo_blocks_merge' is not legalizable}} + // CHECK: "unregistered.return"(%{{.*}})[^[[BB:.*]]] + "unregistered.return"(%arg0)[^bb1] : (i32) -> () + // expected-remark@-1 {{op 'unregistered.return' is not legalizable}} + // CHECK: ^[[BB]] + ^bb1(%arg1 : i32): + // CHECK: "unregistered.return" + "unregistered.return"(%arg1) : (i32) -> () + // expected-remark@-1 {{op 'unregistered.return' is not legalizable}} + }) : () -> () +} + +// ----- + +// CHECK-LABEL: @inline_regions() +func @inline_regions() -> () +{ + // CHECK: test.SingleBlockImplicitTerminator + // CHECK-NEXT: %[[T0:.*]] = "test.type_producer" + // CHECK-NEXT: "test.type_consumer"(%[[T0]]) + // CHECK-NEXT: "test.finish" + "test.SingleBlockImplicitTerminator"() ({ + ^bb0: + %0 = "test.type_producer"() : () -> i32 + "test.SingleBlockImplicitTerminator"() ({ + ^bb1: + "test.type_consumer"(%0) : (i32) -> () + "test.finish"() : () -> () + }) : () -> () + "test.finish"() : () -> () + }) : () -> () + "test.return"() : () -> () +} diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index af20034abd9ba..0c26f8a719c09 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -1163,6 +1163,16 @@ def TestTypeConsumerOp : TEST_Op<"type_consumer">, def TestValidOp : TEST_Op<"valid", [Terminator]>, Arguments<(ins Variadic)>; +def TestMergeBlocksOp : TEST_Op<"merge_blocks"> { + let summary = "merge_blocks operation"; + let description = [{ + Test op with multiple blocks that are merged with Dialect Conversion" + }]; + + let regions = (region AnyRegion:$body); + let results = (outs Variadic:$result); +} + //===----------------------------------------------------------------------===// // Test parser. //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 5bc947fc8c916..f6607a5f55246 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -834,6 +834,118 @@ struct TestTypeConversionDriver }; } // end anonymous namespace +namespace { +/// A rewriter pattern that tests that blocks can be merged. +struct TestMergeBlock : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(TestMergeBlocksOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + Block &firstBlock = op.body().front(); + Operation *branchOp = firstBlock.getTerminator(); + Block *secondBlock = &*(std::next(op.body().begin())); + auto succOperands = branchOp->getOperands(); + SmallVector replacements(succOperands); + rewriter.eraseOp(branchOp); + rewriter.mergeBlocks(secondBlock, &firstBlock, replacements); + rewriter.updateRootInPlace(op, [] {}); + return success(); + } +}; + +/// A rewrite pattern to tests the undo mechanism of blocks being merged. +struct TestUndoBlocksMerge : public ConversionPattern { + TestUndoBlocksMerge(MLIRContext *ctx) + : ConversionPattern("test.undo_blocks_merge", /*benefit=*/1, ctx) {} + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + Block &firstBlock = op->getRegion(0).front(); + Operation *branchOp = firstBlock.getTerminator(); + Block *secondBlock = &*(std::next(op->getRegion(0).begin())); + rewriter.setInsertionPointToStart(secondBlock); + rewriter.create(op->getLoc(), rewriter.getF32Type()); + auto succOperands = branchOp->getOperands(); + SmallVector replacements(succOperands); + rewriter.eraseOp(branchOp); + rewriter.mergeBlocks(secondBlock, &firstBlock, replacements); + rewriter.updateRootInPlace(op, [] {}); + return success(); + } +}; + +/// A rewrite mechanism to inline the body of the op into its parent, when both +/// ops can have a single block. +struct TestMergeSingleBlockOps + : public OpConversionPattern { + using OpConversionPattern< + SingleBlockImplicitTerminatorOp>::OpConversionPattern; + + LogicalResult + matchAndRewrite(SingleBlockImplicitTerminatorOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + SingleBlockImplicitTerminatorOp parentOp = + op.getParentOfType(); + if (!parentOp) + return failure(); + Block &parentBlock = parentOp.region().front(); + Block &innerBlock = op.region().front(); + TerminatorOp innerTerminator = + cast(innerBlock.getTerminator()); + Block *parentPrologue = + rewriter.splitBlock(&parentBlock, Block::iterator(op)); + rewriter.eraseOp(innerTerminator); + rewriter.mergeBlocks(&innerBlock, &parentBlock, {}); + rewriter.eraseOp(op); + rewriter.mergeBlocks(parentPrologue, &parentBlock, {}); + rewriter.updateRootInPlace(op, [] {}); + return success(); + } +}; + +struct TestMergeBlocksPatternDriver + : public PassWrapper> { + void runOnOperation() override { + mlir::OwningRewritePatternList patterns; + MLIRContext *context = &getContext(); + patterns + .insert( + context); + ConversionTarget target(*context); + target.addLegalOp(); + target.addIllegalOp(); + + /// Expect the op to have a single block after legalization. + target.addDynamicallyLegalOp( + [&](TestMergeBlocksOp op) -> bool { + return llvm::hasSingleElement(op.body()); + }); + + /// Only allow `test.br` within test.merge_blocks op. + target.addDynamicallyLegalOp([&](TestBranchOp op) -> bool { + return op.getParentOfType(); + }); + + /// Expect that all nested test.SingleBlockImplicitTerminator ops are + /// inlined. + target.addDynamicallyLegalOp( + [&](SingleBlockImplicitTerminatorOp op) -> bool { + return !op.getParentOfType(); + }); + + DenseSet unlegalizedOps; + (void)applyPartialConversion(getOperation(), target, patterns, + &unlegalizedOps); + for (auto *op : unlegalizedOps) + op->emitRemark() << "op '" << op->getName() << "' is not legalizable"; + } +}; +} // namespace + //===----------------------------------------------------------------------===// // PassRegistration //===----------------------------------------------------------------------===// @@ -866,5 +978,9 @@ void registerPatternsTestPass() { PassRegistration( "test-legalize-type-conversion", "Test various type conversion functionalities in DialectConversion"); + + PassRegistration{ + "test-merge-blocks", + "Test Merging operation in ConversionPatternRewriter"}; } } // namespace mlir From 32f3a9a9d68eea7d40a19767b591622b4b737990 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Mon, 3 Aug 2020 10:04:48 -0700 Subject: [PATCH 205/600] [mlir][DialectConversion] Remove usage of std::distance to track position. Remove use of iterator::difference_type to know where to insert a moved or erased block during undo actions. Differential Revision: https://reviews.llvm.org/D85066 --- mlir/lib/Transforms/DialectConversion.cpp | 37 +++++++++++++---------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/mlir/lib/Transforms/DialectConversion.cpp b/mlir/lib/Transforms/DialectConversion.cpp index 713f0b73dfe04..a8d7fe71f75fb 100644 --- a/mlir/lib/Transforms/DialectConversion.cpp +++ b/mlir/lib/Transforms/DialectConversion.cpp @@ -611,12 +611,11 @@ enum class BlockActionKind { TypeConversion }; -/// Original position of the given block in its parent region. We cannot use -/// a region iterator because it could have been invalidated by other region -/// operations since the position was stored. +/// Original position of the given block in its parent region. During undo +/// actions, the block needs to be placed after `insertAfterBlock`. struct BlockPosition { Region *region; - Region::iterator::difference_type position; + Block *insertAfterBlock; }; /// Information needed to undo the merge actions. @@ -634,16 +633,16 @@ struct BlockAction { static BlockAction getCreate(Block *block) { return {BlockActionKind::Create, block, {}}; } - static BlockAction getErase(Block *block, BlockPosition originalPos) { - return {BlockActionKind::Erase, block, {originalPos}}; + static BlockAction getErase(Block *block, BlockPosition originalPosition) { + return {BlockActionKind::Erase, block, {originalPosition}}; } static BlockAction getMerge(Block *block, Block *sourceBlock) { BlockAction action{BlockActionKind::Merge, block, {}}; action.mergeInfo = {sourceBlock, block->empty() ? nullptr : &block->back()}; return action; } - static BlockAction getMove(Block *block, BlockPosition originalPos) { - return {BlockActionKind::Move, block, {originalPos}}; + static BlockAction getMove(Block *block, BlockPosition originalPosition) { + return {BlockActionKind::Move, block, {originalPosition}}; } static BlockAction getSplit(Block *block, Block *originalBlock) { BlockAction action{BlockActionKind::Split, block, {}}; @@ -988,9 +987,11 @@ void ConversionPatternRewriterImpl::undoBlockActions( // Put the block (owned by action) back into its original position. case BlockActionKind::Erase: { auto &blockList = action.originalPosition.region->getBlocks(); - blockList.insert( - std::next(blockList.begin(), action.originalPosition.position), - action.block); + Block *insertAfterBlock = action.originalPosition.insertAfterBlock; + blockList.insert((insertAfterBlock + ? std::next(Region::iterator(insertAfterBlock)) + : blockList.end()), + action.block); break; } // Split the block at the position which was originally the end of the @@ -1010,8 +1011,10 @@ void ConversionPatternRewriterImpl::undoBlockActions( // Move the block back to its original position. case BlockActionKind::Move: { Region *originalRegion = action.originalPosition.region; + Block *insertAfterBlock = action.originalPosition.insertAfterBlock; originalRegion->getBlocks().splice( - std::next(originalRegion->begin(), action.originalPosition.position), + (insertAfterBlock ? std::next(Region::iterator(insertAfterBlock)) + : originalRegion->end()), action.block->getParent()->getBlocks(), action.block); break; } @@ -1189,8 +1192,8 @@ void ConversionPatternRewriterImpl::notifyOpReplaced(Operation *op, void ConversionPatternRewriterImpl::notifyBlockIsBeingErased(Block *block) { Region *region = block->getParent(); - auto position = std::distance(region->begin(), Region::iterator(block)); - blockActions.push_back(BlockAction::getErase(block, {region, position})); + Block *origPrevBlock = block->getPrevNode(); + blockActions.push_back(BlockAction::getErase(block, {region, origPrevBlock})); } void ConversionPatternRewriterImpl::notifyCreatedBlock(Block *block) { @@ -1209,10 +1212,12 @@ void ConversionPatternRewriterImpl::notifyBlocksBeingMerged(Block *block, void ConversionPatternRewriterImpl::notifyRegionIsBeingInlinedBefore( Region ®ion, Region &parent, Region::iterator before) { + Block *origPrevBlock = nullptr; for (auto &pair : llvm::enumerate(region)) { Block &block = pair.value(); - Region::iterator::difference_type position = pair.index(); - blockActions.push_back(BlockAction::getMove(&block, {®ion, position})); + blockActions.push_back( + BlockAction::getMove(&block, {®ion, origPrevBlock})); + origPrevBlock = █ } } From ac82b918c74f3fab8d4a7c1905277bda6b9bccb4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 3 Aug 2020 10:09:57 -0700 Subject: [PATCH 206/600] [X86] Use h-register for final XOR of __builtin_parity on 64-bit targets. This adds an isel pattern and special XOR8rr_NOREX instruction to enable the use of h-registers for __builtin_parity. This avoids a copy and a shift instruction. The NOREX instruction is in case register allocation doesn't use the matching l-register for some reason. If a R8-R15 register gets picked instead, we won't be able to encode the instruction since an h-register can't be used with a REX prefix. Fixes PR46954 --- llvm/lib/Target/X86/X86InstrArithmetic.td | 9 + llvm/lib/Target/X86/X86InstrCompiler.td | 10 + llvm/test/CodeGen/X86/parity.ll | 16 +- .../CodeGen/X86/vector-reduce-xor-bool.ll | 196 +++++------------- 4 files changed, 72 insertions(+), 159 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index f7f22285bd15d..e83e1e74ff526 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -1182,6 +1182,15 @@ defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m, X86sub_flag, sub, 0, 1, 0>; } +// Version of XOR8rr_NOREX that use GR8_NOREX. This is used by the handling of +// __builtin_parity where the last step xors an h-register with an l-register. +let isCodeGenOnly = 1, hasSideEffects = 0, Constraints = "$src1 = $dst", + Defs = [EFLAGS], isCommutable = 1 in +def XOR8rr_NOREX : I<0x30, MRMDestReg, (outs GR8_NOREX:$dst), + (ins GR8_NOREX:$src1, GR8_NOREX:$src2), + "xor{b}\t{$src2, $dst|$dst, $src2}", []>, + Sched<[WriteALU]>; + // Arithmetic. defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag, 1, 0>; diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 4df93fb2ed60b..d78d9f7c80c76 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1698,6 +1698,16 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>, Requires<[In64BitMode]>; +// Special pattern to catch the last step of __builtin_parity handling. Our +// goal is to use an xor of an h-register with the corresponding l-register. +// The above patterns would handle this on non 64-bit targets, but for 64-bit +// we need to be more careful. We're using a NOREX instruction here in case +// register allocation fails to keep the two registers together. So we need to +// make sure we can't accidentally mix R8-R15 with an h-register. +def : Pat<(X86xor_flag (i8 (trunc GR32:$src)), + (i8 (trunc (srl_su GR32:$src, (i8 8))))), + (XOR8rr_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit), + (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; // (shl x, 1) ==> (add x, x) // Note that if x is undef (immediate or otherwise), we could theoretically diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll index 869ee552c67da..6289ab482426c 100644 --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -21,10 +21,8 @@ define i32 @parity_32(i32 %x) { ; X64-NOPOPCNT-NEXT: movl %edi, %ecx ; X64-NOPOPCNT-NEXT: shrl $16, %ecx ; X64-NOPOPCNT-NEXT: xorl %edi, %ecx -; X64-NOPOPCNT-NEXT: movl %ecx, %edx -; X64-NOPOPCNT-NEXT: shrl $8, %edx ; X64-NOPOPCNT-NEXT: xorl %eax, %eax -; X64-NOPOPCNT-NEXT: xorb %cl, %dl +; X64-NOPOPCNT-NEXT: xorb %ch, %cl ; X64-NOPOPCNT-NEXT: setnp %al ; X64-NOPOPCNT-NEXT: retq ; @@ -66,10 +64,8 @@ define i64 @parity_64(i64 %x) { ; X64-NOPOPCNT-NEXT: movl %eax, %ecx ; X64-NOPOPCNT-NEXT: shrl $16, %ecx ; X64-NOPOPCNT-NEXT: xorl %eax, %ecx -; X64-NOPOPCNT-NEXT: movl %ecx, %edx -; X64-NOPOPCNT-NEXT: shrl $8, %edx ; X64-NOPOPCNT-NEXT: xorl %eax, %eax -; X64-NOPOPCNT-NEXT: xorb %cl, %dl +; X64-NOPOPCNT-NEXT: xorb %ch, %cl ; X64-NOPOPCNT-NEXT: setnp %al ; X64-NOPOPCNT-NEXT: retq ; @@ -113,10 +109,8 @@ define i32 @parity_64_trunc(i64 %x) { ; X64-NOPOPCNT-NEXT: movl %eax, %ecx ; X64-NOPOPCNT-NEXT: shrl $16, %ecx ; X64-NOPOPCNT-NEXT: xorl %eax, %ecx -; X64-NOPOPCNT-NEXT: movl %ecx, %edx -; X64-NOPOPCNT-NEXT: shrl $8, %edx ; X64-NOPOPCNT-NEXT: xorl %eax, %eax -; X64-NOPOPCNT-NEXT: xorb %cl, %dl +; X64-NOPOPCNT-NEXT: xorb %ch, %cl ; X64-NOPOPCNT-NEXT: setnp %al ; X64-NOPOPCNT-NEXT: retq ; @@ -156,9 +150,7 @@ define i8 @parity_32_trunc(i32 %x) { ; X64-NOPOPCNT-NEXT: movl %edi, %eax ; X64-NOPOPCNT-NEXT: shrl $16, %eax ; X64-NOPOPCNT-NEXT: xorl %edi, %eax -; X64-NOPOPCNT-NEXT: movl %eax, %ecx -; X64-NOPOPCNT-NEXT: shrl $8, %ecx -; X64-NOPOPCNT-NEXT: xorb %al, %cl +; X64-NOPOPCNT-NEXT: xorb %ah, %al ; X64-NOPOPCNT-NEXT: setnp %al ; X64-NOPOPCNT-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll index 8e50cfc4e4b29..b28aa43affb74 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll @@ -181,9 +181,7 @@ define i1 @trunc_v16i8_v16i1(<16 x i8>) { ; SSE: # %bb.0: ; SSE-NEXT: psllw $7, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -191,9 +189,7 @@ define i1 @trunc_v16i8_v16i1(<16 x i8>) { ; AVX: # %bb.0: ; AVX-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: xorb %al, %cl +; AVX-NEXT: xorb %ah, %al ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; @@ -201,9 +197,7 @@ define i1 @trunc_v16i8_v16i1(<16 x i8>) { ; AVX512: # %bb.0: ; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX512-NEXT: vpmovmskb %xmm0, %eax -; AVX512-NEXT: movl %eax, %ecx -; AVX512-NEXT: shrl $8, %ecx -; AVX512-NEXT: xorb %al, %cl +; AVX512-NEXT: xorb %ah, %al ; AVX512-NEXT: setnp %al ; AVX512-NEXT: retq %a = trunc <16 x i8> %0 to <16 x i1> @@ -364,9 +358,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; SSE-NEXT: packuswb %xmm1, %xmm0 ; SSE-NEXT: psllw $7, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -377,9 +369,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -391,9 +381,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) { ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: shrl $8, %ecx -; AVX2-NEXT: xorb %al, %cl +; AVX2-NEXT: xorb %ah, %al ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -445,9 +433,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; SSE-NEXT: pxor %xmm1, %xmm0 ; SSE-NEXT: psllw $7, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -457,9 +443,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -471,9 +455,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrl $16, %ecx ; AVX2-NEXT: xorl %eax, %ecx -; AVX2-NEXT: movl %ecx, %eax -; AVX2-NEXT: shrl $8, %eax -; AVX2-NEXT: xorb %cl, %al +; AVX2-NEXT: xorb %ch, %cl ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -505,9 +487,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX512BW-NEXT: movl %eax, %ecx ; AVX512BW-NEXT: shrl $16, %ecx ; AVX512BW-NEXT: xorl %eax, %ecx -; AVX512BW-NEXT: movl %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: xorb %cl, %al +; AVX512BW-NEXT: xorb %ch, %cl ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -519,9 +499,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) { ; AVX512VL-NEXT: movl %eax, %ecx ; AVX512VL-NEXT: shrl $16, %ecx ; AVX512VL-NEXT: xorl %eax, %ecx -; AVX512VL-NEXT: movl %ecx, %eax -; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: xorb %cl, %al +; AVX512VL-NEXT: xorb %ch, %cl ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -648,9 +626,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) { ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: psllw $7, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $8, %ecx -; SSE2-NEXT: xorb %al, %cl +; SSE2-NEXT: xorb %ah, %al ; SSE2-NEXT: setnp %al ; SSE2-NEXT: retq ; @@ -666,9 +642,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) { ; SSE41-NEXT: packuswb %xmm2, %xmm0 ; SSE41-NEXT: psllw $7, %xmm0 ; SSE41-NEXT: pmovmskb %xmm0, %eax -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: shrl $8, %ecx -; SSE41-NEXT: xorb %al, %cl +; SSE41-NEXT: xorb %ah, %al ; SSE41-NEXT: setnp %al ; SSE41-NEXT: retq ; @@ -684,9 +658,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) { ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -704,9 +676,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) { ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: shrl $8, %ecx -; AVX2-NEXT: xorb %al, %cl +; AVX2-NEXT: xorb %ah, %al ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -764,9 +734,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: psllw $7, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -778,9 +746,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -798,9 +764,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrl $16, %ecx ; AVX2-NEXT: xorl %eax, %ecx -; AVX2-NEXT: movl %ecx, %eax -; AVX2-NEXT: shrl $8, %eax -; AVX2-NEXT: xorb %cl, %al +; AVX2-NEXT: xorb %ch, %cl ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -833,9 +797,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX512BW-NEXT: movl %eax, %ecx ; AVX512BW-NEXT: shrl $16, %ecx ; AVX512BW-NEXT: xorl %eax, %ecx -; AVX512BW-NEXT: movl %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: xorb %cl, %al +; AVX512BW-NEXT: xorb %ch, %cl ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -848,9 +810,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) { ; AVX512VL-NEXT: movl %eax, %ecx ; AVX512VL-NEXT: shrl $16, %ecx ; AVX512VL-NEXT: xorl %eax, %ecx -; AVX512VL-NEXT: movl %ecx, %eax -; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: xorb %cl, %al +; AVX512VL-NEXT: xorb %ch, %cl ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -867,9 +827,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; SSE-NEXT: pxor %xmm0, %xmm1 ; SSE-NEXT: psllw $7, %xmm1 ; SSE-NEXT: pmovmskb %xmm1, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -880,9 +838,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -895,9 +851,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrl $16, %ecx ; AVX2-NEXT: xorl %eax, %ecx -; AVX2-NEXT: movl %ecx, %eax -; AVX2-NEXT: shrl $8, %eax -; AVX2-NEXT: xorb %cl, %al +; AVX2-NEXT: xorb %ch, %cl ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -937,9 +891,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX512BW-NEXT: movl %ecx, %eax ; AVX512BW-NEXT: shrl $16, %eax ; AVX512BW-NEXT: xorl %ecx, %eax -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: xorb %al, %cl +; AVX512BW-NEXT: xorb %ah, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -955,9 +907,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) { ; AVX512VL-NEXT: movl %ecx, %eax ; AVX512VL-NEXT: shrl $16, %eax ; AVX512VL-NEXT: xorl %ecx, %eax -; AVX512VL-NEXT: movl %eax, %ecx -; AVX512VL-NEXT: shrl $8, %ecx -; AVX512VL-NEXT: xorb %al, %cl +; AVX512VL-NEXT: xorb %ah, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1157,9 +1107,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) { ; SSE-NEXT: pxor %xmm1, %xmm1 ; SSE-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE-NEXT: pmovmskb %xmm1, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -1168,9 +1116,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) { ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: shrl $8, %ecx -; AVX-NEXT: xorb %al, %cl +; AVX-NEXT: xorb %ah, %al ; AVX-NEXT: setnp %al ; AVX-NEXT: retq ; @@ -1179,9 +1125,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) { ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpmovmskb %xmm0, %eax -; AVX512F-NEXT: movl %eax, %ecx -; AVX512F-NEXT: shrl $8, %ecx -; AVX512F-NEXT: xorb %al, %cl +; AVX512F-NEXT: xorb %ah, %al ; AVX512F-NEXT: setnp %al ; AVX512F-NEXT: retq ; @@ -1371,9 +1315,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) { ; SSE-NEXT: pcmpeqw %xmm2, %xmm0 ; SSE-NEXT: packsswb %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -1385,9 +1327,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) { ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1399,9 +1339,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: shrl $8, %ecx -; AVX2-NEXT: xorb %al, %cl +; AVX2-NEXT: xorb %ah, %al ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1455,9 +1393,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; SSE-NEXT: pcmpeqb %xmm2, %xmm0 ; SSE-NEXT: pxor %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -1469,9 +1405,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1484,9 +1418,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrl $16, %ecx ; AVX2-NEXT: xorl %eax, %ecx -; AVX2-NEXT: movl %ecx, %eax -; AVX2-NEXT: shrl $8, %eax -; AVX2-NEXT: xorb %cl, %al +; AVX2-NEXT: xorb %ch, %cl ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1520,9 +1452,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX512BW-NEXT: movl %eax, %ecx ; AVX512BW-NEXT: shrl $16, %ecx ; AVX512BW-NEXT: xorl %eax, %ecx -; AVX512BW-NEXT: movl %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: xorb %cl, %al +; AVX512BW-NEXT: xorb %ch, %cl ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1534,9 +1464,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) { ; AVX512VL-NEXT: movl %eax, %ecx ; AVX512VL-NEXT: shrl $16, %ecx ; AVX512VL-NEXT: xorl %eax, %ecx -; AVX512VL-NEXT: movl %ecx, %eax -; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: xorb %cl, %al +; AVX512VL-NEXT: xorb %ch, %cl ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1660,9 +1588,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) { ; SSE-NEXT: packssdw %xmm1, %xmm0 ; SSE-NEXT: packsswb %xmm2, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -1679,9 +1605,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) { ; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1696,9 +1620,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) { ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] ; AVX2-NEXT: vpmovmskb %xmm0, %eax -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: shrl $8, %ecx -; AVX2-NEXT: xorb %al, %cl +; AVX2-NEXT: xorb %ah, %al ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1752,9 +1674,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; SSE-NEXT: packsswb %xmm3, %xmm2 ; SSE-NEXT: pxor %xmm0, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -1771,9 +1691,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1789,9 +1707,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrl $16, %ecx ; AVX2-NEXT: xorl %eax, %ecx -; AVX2-NEXT: movl %ecx, %eax -; AVX2-NEXT: shrl $8, %eax -; AVX2-NEXT: xorb %cl, %al +; AVX2-NEXT: xorb %ch, %cl ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1825,9 +1741,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX512BW-NEXT: movl %eax, %ecx ; AVX512BW-NEXT: shrl $16, %ecx ; AVX512BW-NEXT: xorl %eax, %ecx -; AVX512BW-NEXT: movl %ecx, %eax -; AVX512BW-NEXT: shrl $8, %eax -; AVX512BW-NEXT: xorb %cl, %al +; AVX512BW-NEXT: xorb %ch, %cl ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1839,9 +1753,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) { ; AVX512VL-NEXT: movl %eax, %ecx ; AVX512VL-NEXT: shrl $16, %ecx ; AVX512VL-NEXT: xorl %eax, %ecx -; AVX512VL-NEXT: movl %ecx, %eax -; AVX512VL-NEXT: shrl $8, %eax -; AVX512VL-NEXT: xorb %cl, %al +; AVX512VL-NEXT: xorb %ch, %cl ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1862,9 +1774,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: pxor %xmm0, %xmm1 ; SSE-NEXT: pmovmskb %xmm1, %eax -; SSE-NEXT: movl %eax, %ecx -; SSE-NEXT: shrl $8, %ecx -; SSE-NEXT: xorb %al, %cl +; SSE-NEXT: xorb %ah, %al ; SSE-NEXT: setnp %al ; SSE-NEXT: retq ; @@ -1881,9 +1791,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX1-NEXT: vpxor %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vpxor %xmm0, %xmm4, %xmm0 ; AVX1-NEXT: vpmovmskb %xmm0, %eax -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: shrl $8, %ecx -; AVX1-NEXT: xorb %al, %cl +; AVX1-NEXT: xorb %ah, %al ; AVX1-NEXT: setnp %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1898,9 +1806,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrl $16, %ecx ; AVX2-NEXT: xorl %eax, %ecx -; AVX2-NEXT: movl %ecx, %eax -; AVX2-NEXT: shrl $8, %eax -; AVX2-NEXT: xorb %cl, %al +; AVX2-NEXT: xorb %ch, %cl ; AVX2-NEXT: setnp %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1942,9 +1848,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX512BW-NEXT: movl %ecx, %eax ; AVX512BW-NEXT: shrl $16, %eax ; AVX512BW-NEXT: xorl %ecx, %eax -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: shrl $8, %ecx -; AVX512BW-NEXT: xorb %al, %cl +; AVX512BW-NEXT: xorb %ah, %al ; AVX512BW-NEXT: setnp %al ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1959,9 +1863,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) { ; AVX512VL-NEXT: movl %ecx, %eax ; AVX512VL-NEXT: shrl $16, %eax ; AVX512VL-NEXT: xorl %ecx, %eax -; AVX512VL-NEXT: movl %eax, %ecx -; AVX512VL-NEXT: shrl $8, %ecx -; AVX512VL-NEXT: xorb %al, %cl +; AVX512VL-NEXT: xorb %ah, %al ; AVX512VL-NEXT: setnp %al ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq From 317e00dc54c74a2e0fd0c62bdc6a6d68b0d2ca7e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 3 Aug 2020 10:09:51 -0700 Subject: [PATCH 207/600] [PGO] Change a `NumVSites == 0` workaround to assert The root cause was fixed by 3d6f53018f845e893ad34f64ff2851a2e5c3ba1d. The workaround added in 99ad956fdaee5398fdcf46fa49cb433cf52dc461 can be changed to an assert now. (In case the fix regresses, there will be a heap-use-after-free.) --- compiler-rt/lib/profile/InstrProfilingValue.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/profile/InstrProfilingValue.c b/compiler-rt/lib/profile/InstrProfilingValue.c index 29b9e628a9c95..4b4081bd21b70 100644 --- a/compiler-rt/lib/profile/InstrProfilingValue.c +++ b/compiler-rt/lib/profile/InstrProfilingValue.c @@ -6,6 +6,7 @@ |* \*===----------------------------------------------------------------------===*/ +#include #include #include #include @@ -93,8 +94,8 @@ static int allocateValueProfileCounters(__llvm_profile_data *Data) { for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) NumVSites += Data->NumValueSites[VKI]; - if (NumVSites == 0) - return 0; + // If NumVSites = 0, calloc is allowed to return a non-null pointer. + assert(NumVSites > 0 && "NumVSites can't be zero"); ValueProfNode **Mem = (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *)); if (!Mem) From d535a91d13b88b547ba24ec50337aa0715d74d4d Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Wed, 29 Jul 2020 10:33:01 -0400 Subject: [PATCH 208/600] [FPEnv] IRBuilder fails to add strictfp attribute The strictfp attribute is required on all function calls in a function that is itself marked with the strictfp attribute. The IRBuilder knows this and has a method for adding the attribute to function call instructions. If a function being called has the strictfp attribute itself then the IRBuilder will refuse to add the attribute to the calling instruction despite being asked to add it. Eliminate this error. Differential Revision: https://reviews.llvm.org/D84878 --- llvm/include/llvm/IR/IRBuilder.h | 3 +-- llvm/unittests/IR/IRBuilderTest.cpp | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index d467789132aca..aa0286ab11f94 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -294,8 +294,7 @@ class IRBuilderBase { } void setConstrainedFPCallAttr(CallInst *I) { - if (!I->hasFnAttr(Attribute::StrictFP)) - I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP); + I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP); } void setDefaultOperandBundles(ArrayRef OpBundles) { diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp index fa0e33a07bb40..204e476b8632f 100644 --- a/llvm/unittests/IR/IRBuilderTest.cpp +++ b/llvm/unittests/IR/IRBuilderTest.cpp @@ -332,6 +332,33 @@ TEST_F(IRBuilderTest, ConstrainedFPIntrinsics) { EXPECT_EQ(fp::ebStrict, CII->getExceptionBehavior()); } +TEST_F(IRBuilderTest, ConstrainedFPFunctionCall) { + IRBuilder<> Builder(BB); + + // Create an empty constrained FP function. + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), + /*isVarArg=*/false); + Function *Callee = + Function::Create(FTy, Function::ExternalLinkage, "", M.get()); + BasicBlock *CalleeBB = BasicBlock::Create(Ctx, "", Callee); + IRBuilder<> CalleeBuilder(CalleeBB); + CalleeBuilder.setIsFPConstrained(true); + CalleeBuilder.setConstrainedFPFunctionAttr(); + CalleeBuilder.CreateRetVoid(); + + // Now call the empty constrained FP function. + Builder.setIsFPConstrained(true); + Builder.setConstrainedFPFunctionAttr(); + CallInst *FCall = Builder.CreateCall(Callee, None); + + // Check the attributes to verify the strictfp attribute is on the call. + EXPECT_TRUE(FCall->getAttributes().getFnAttributes().hasAttribute( + Attribute::StrictFP)); + + Builder.CreateRetVoid(); + EXPECT_FALSE(verifyModule(*M)); +} + TEST_F(IRBuilderTest, Lifetime) { IRBuilder<> Builder(BB); AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty()); From 7c19c89dd5c532fef533e008fb5911d20992d2ac Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 31 Jul 2020 17:30:30 -0700 Subject: [PATCH 209/600] [NewPM][LoopVersioning] Port LoopVersioning to NPM Reviewed By: ychen, fhahn Differential Revision: https://reviews.llvm.org/D85063 --- llvm/include/llvm/InitializePasses.h | 2 +- .../llvm/Transforms/Utils/LoopVersioning.h | 9 ++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/Scalar/Scalar.cpp | 2 +- llvm/lib/Transforms/Utils/LoopVersioning.cpp | 107 ++++++++++++------ llvm/test/Transforms/LoopVersioning/basic.ll | 1 + 7 files changed, 86 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index d23ecfb98ce92..cc8397a36651c 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -254,7 +254,7 @@ void initializeLoopUnrollPass(PassRegistry&); void initializeLoopUnswitchPass(PassRegistry&); void initializeLoopVectorizePass(PassRegistry&); void initializeLoopVersioningLICMPass(PassRegistry&); -void initializeLoopVersioningPassPass(PassRegistry&); +void initializeLoopVersioningLegacyPassPass(PassRegistry &); void initializeLowerAtomicLegacyPassPass(PassRegistry&); void initializeLowerConstantIntrinsicsPass(PassRegistry&); void initializeLowerEmuTLSPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h index 1efdcc65b39a8..ac6cee637a46d 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h +++ b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h @@ -16,6 +16,7 @@ #define LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -148,6 +149,14 @@ class LoopVersioning { DominatorTree *DT; ScalarEvolution *SE; }; + +/// Expose LoopVersioning as a pass. Currently this is only used for +/// unit-testing. It adds all memchecks necessary to remove all may-aliasing +/// array accesses from the loop. +class LoopVersioningPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; } #endif diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index eb9041311e1bf..b9d1e76fcf198 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -186,6 +186,7 @@ #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Transforms/Utils/LowerInvoke.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index e77182031f108..7490ebfb29d56 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -225,6 +225,7 @@ FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) FUNCTION_PASS("loop-fusion", LoopFusePass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) +FUNCTION_PASS("loop-versioning", LoopVersioningPass()) FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp index a0598447030c0..8bc35d5228df8 100644 --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -110,7 +110,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopDistributeLegacyPass(Registry); initializeLoopLoadEliminationPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); - initializeLoopVersioningPassPass(Registry); + initializeLoopVersioningLegacyPassPass(Registry); initializeEntryExitInstrumenterPass(Registry); initializePostInlineEntryExitInstrumenterPass(Registry); } diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 16bd08c704eeb..c8fa337ed04f7 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -16,10 +16,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" @@ -253,47 +256,55 @@ void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, } namespace { +bool runImpl(LoopInfo *LI, function_ref GetLAA, + DominatorTree *DT, ScalarEvolution *SE) { + // Build up a worklist of inner-loops to version. This is necessary as the + // act of versioning a loop creates new loops and can invalidate iterators + // across the loops. + SmallVector Worklist; + + for (Loop *TopLevelLoop : *LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->empty()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + const LoopAccessInfo &LAI = GetLAA(*L); + if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() && + (LAI.getNumRuntimePointerChecks() || + !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { + LoopVersioning LVer(LAI, L, LI, DT, SE); + LVer.versionLoop(); + LVer.annotateLoopWithNoAlias(); + Changed = true; + } + } + + return Changed; +} + /// Also expose this is a pass. Currently this is only used for /// unit-testing. It adds all memchecks necessary to remove all may-aliasing /// array accesses from the loop. -class LoopVersioningPass : public FunctionPass { +class LoopVersioningLegacyPass : public FunctionPass { public: - LoopVersioningPass() : FunctionPass(ID) { - initializeLoopVersioningPassPass(*PassRegistry::getPassRegistry()); + LoopVersioningLegacyPass() : FunctionPass(ID) { + initializeLoopVersioningLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { auto *LI = &getAnalysis().getLoopInfo(); - auto *LAA = &getAnalysis(); + auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & { + return getAnalysis().getInfo(&L); + }; + auto *DT = &getAnalysis().getDomTree(); auto *SE = &getAnalysis().getSE(); - // Build up a worklist of inner-loops to version. This is necessary as the - // act of versioning a loop creates new loops and can invalidate iterators - // across the loops. - SmallVector Worklist; - - for (Loop *TopLevelLoop : *LI) - for (Loop *L : depth_first(TopLevelLoop)) - // We only handle inner-most loops. - if (L->empty()) - Worklist.push_back(L); - - // Now walk the identified inner loops. - bool Changed = false; - for (Loop *L : Worklist) { - const LoopAccessInfo &LAI = LAA->getInfo(L); - if (L->isLoopSimplifyForm() && !LAI.hasConvergentOp() && - (LAI.getNumRuntimePointerChecks() || - !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) { - LoopVersioning LVer(LAI, L, LI, DT, SE); - LVer.versionLoop(); - LVer.annotateLoopWithNoAlias(); - Changed = true; - } - } - - return Changed; + return runImpl(LI, GetLAA, DT, SE); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -312,18 +323,44 @@ class LoopVersioningPass : public FunctionPass { #define LVER_OPTION "loop-versioning" #define DEBUG_TYPE LVER_OPTION -char LoopVersioningPass::ID; +char LoopVersioningLegacyPass::ID; static const char LVer_name[] = "Loop Versioning"; -INITIALIZE_PASS_BEGIN(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) +INITIALIZE_PASS_BEGIN(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, + false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(LoopVersioningPass, LVER_OPTION, LVer_name, false, false) +INITIALIZE_PASS_END(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false, + false) namespace llvm { -FunctionPass *createLoopVersioningPass() { - return new LoopVersioningPass(); +FunctionPass *createLoopVersioningLegacyPass() { + return new LoopVersioningLegacyPass(); } + +PreservedAnalyses LoopVersioningPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &SE = AM.getResult(F); + auto &LI = AM.getResult(F); + auto &TTI = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &TLI = AM.getResult(F); + auto &AA = AM.getResult(F); + auto &AC = AM.getResult(F); + MemorySSA *MSSA = EnableMSSALoopDependency + ? &AM.getResult(F).getMSSA() + : nullptr; + + auto &LAM = AM.getResult(F).getManager(); + auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & { + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; + return LAM.getResult(L, AR); + }; + + if (runImpl(&LI, GetLAA, &DT, &SE)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); } +} // namespace llvm diff --git a/llvm/test/Transforms/LoopVersioning/basic.ll b/llvm/test/Transforms/LoopVersioning/basic.ll index bcd5e8c875c69..bd8ea4d28e162 100644 --- a/llvm/test/Transforms/LoopVersioning/basic.ll +++ b/llvm/test/Transforms/LoopVersioning/basic.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-versioning -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes=loop-versioning -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" From 219f32f4b68679563443cdaae7b8174c9976409a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 3 Aug 2020 17:53:32 +0100 Subject: [PATCH 210/600] [X86][SSE] Shuffle combine blends to OR(X,Y) if the relevant elements are known zero. This allows us to remove the (depth violating) code in getFauxShuffleMask where we were combining the OR(SHUFFLE,SHUFFLE) shuffle inputs as well, and not just the OR(). This is a minor step toward being able to shuffle combine from/to SELECT/BLENDV as a faux shuffle. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 65 ++++++++++++++----- llvm/test/CodeGen/X86/insertelement-ones.ll | 12 ++-- llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll | 8 +-- .../test/CodeGen/X86/vector-shuffle-128-v8.ll | 5 +- .../CodeGen/X86/vector-shuffle-256-v32.ll | 6 +- 5 files changed, 61 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e9bb50aacec0e..b2bfcc2698f4d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7401,8 +7401,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other // is a valid shuffle index. - SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); - SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1)); + SDValue N0 = peekThroughBitcasts(N.getOperand(0)); + SDValue N1 = peekThroughBitcasts(N.getOperand(1)); if (!N0.getValueType().isVector() || !N1.getValueType().isVector()) return false; SmallVector SrcMask0, SrcMask1; @@ -7413,34 +7413,24 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, true)) return false; - // Shuffle inputs must be the same size as the result. - if (llvm::any_of(SrcInputs0, [VT](SDValue Op) { - return VT.getSizeInBits() != Op.getValueSizeInBits(); - })) - return false; - if (llvm::any_of(SrcInputs1, [VT](SDValue Op) { - return VT.getSizeInBits() != Op.getValueSizeInBits(); - })) - return false; - size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0); narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1); - for (size_t i = 0; i != MaskSize; ++i) { + for (int i = 0; i != (int)MaskSize; ++i) { if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef) Mask.push_back(SM_SentinelUndef); else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) Mask.push_back(SM_SentinelZero); else if (Mask1[i] == SM_SentinelZero) - Mask.push_back(Mask0[i]); + Mask.push_back(i); else if (Mask0[i] == SM_SentinelZero) - Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size())); + Mask.push_back(i + MaskSize); else return false; } - Ops.append(SrcInputs0.begin(), SrcInputs0.end()); - Ops.append(SrcInputs1.begin(), SrcInputs1.end()); + Ops.push_back(N0); + Ops.push_back(N1); return true; } case ISD::INSERT_SUBVECTOR: { @@ -34219,6 +34209,7 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, bool IsUnary) { + unsigned NumMaskElts = Mask.size(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { @@ -34276,6 +34267,46 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, } } + // Attempt to match against a OR if we're performing a blend shuffle and the + // non-blended source element is zero in each case. + if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && + (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) { + bool IsBlend = true; + unsigned NumV1Elts = V1.getValueType().getVectorNumElements(); + unsigned NumV2Elts = V2.getValueType().getVectorNumElements(); + unsigned Scale1 = NumV1Elts / NumMaskElts; + unsigned Scale2 = NumV2Elts / NumMaskElts; + APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts); + APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts); + for (unsigned i = 0; i != NumMaskElts; ++i) { + int M = Mask[i]; + if (M == SM_SentinelUndef) + continue; + if (M == SM_SentinelZero) { + DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); + DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); + continue; + } + if (M == i) { + DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); + continue; + } + if (M == (i + NumMaskElts)) { + DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); + continue; + } + IsBlend = false; + break; + } + if (IsBlend && + DAG.computeKnownBits(V1, DemandedZeroV1).isZero() && + DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) { + Shuffle = ISD::OR; + SrcVT = DstVT = EVT(MaskVT).changeTypeToInteger().getSimpleVT(); + return true; + } + } + return false; } diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index 3d8e42b9c07d0..6a9a401264c56 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -389,11 +389,9 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; SSE2-NEXT: pand %xmm5, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE2-NEXT: pandn %xmm3, %xmm5 -; SSE2-NEXT: por %xmm5, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: por %xmm4, %xmm1 ; SSE2-NEXT: retq @@ -411,11 +409,9 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; SSE3-NEXT: movdqa %xmm3, %xmm4 ; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; SSE3-NEXT: por %xmm4, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; SSE3-NEXT: pand %xmm5, %xmm1 +; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE3-NEXT: pandn %xmm3, %xmm5 -; SSE3-NEXT: por %xmm5, %xmm1 +; SSE3-NEXT: por %xmm3, %xmm1 ; SSE3-NEXT: pand %xmm2, %xmm1 ; SSE3-NEXT: por %xmm4, %xmm1 ; SSE3-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll index 6b49f22f21f1f..9256a43f8e339 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll @@ -1314,10 +1314,10 @@ define void @trunc_v4i64_to_v4i8(<32 x i8>* %L, <4 x i8>* %S) nounwind { define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind { ; AVX1-LABEL: negative: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[u,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,zero,zero,zero,zero,zero,zero,xmm0[0,2,4,6,8,10,12,14] -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index f448f41cf522e..86423ce76065b 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1713,9 +1713,8 @@ define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { ; ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u] -; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u] -; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,u,u] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v8i16_XX4X8acX: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 82df05e5ae068..e5285aebda69e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -3358,9 +3358,9 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3] ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u] -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u] -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255] ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 From f208c659fb76b1ad8ae83dd10c4f0c30986d48ee Mon Sep 17 00:00:00 2001 From: Joao Moreira Date: Mon, 3 Aug 2020 10:26:31 -0700 Subject: [PATCH 211/600] [X86] Make ENDBR instruction a scheduling boundary Instructions should not be scheduled across ENDBR instructions, as this would result in the ENDBR being displaced, breaking the parity needed for the Indirect Branch Tracking feature of CET. Currently, the X86IndirectBranchTracking pass is later than the instruction scheduling in the pipeline, what causes the bug to be unnoticeable and very hard (if not unfeasible) to be triggered while compiling C files with the standard LLVM setup. Yet, for correctness and to prevent issues in future changes, the compiler should prevent the such scheduling. Differential Revision: https://reviews.llvm.org/D84862 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 12 ++++++++++++ llvm/lib/Target/X86/X86InstrInfo.h | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index ae838274f2e68..c753880fc92cd 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6675,6 +6675,18 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, return true; } +bool X86InstrInfo::isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + + // ENDBR instructions should not be scheduled around. + unsigned Opcode = MI.getOpcode(); + if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32) + return true; + + return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); +} + bool X86InstrInfo:: reverseBranchCondition(SmallVectorImpl &Cond) const { assert(Cond.size() == 1 && "Invalid X86 branch condition!"); diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 89f2ff118c378..c345a8217168d 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -409,6 +409,13 @@ class X86InstrInfo final : public X86GenInstrInfo { bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override; + /// isSchedulingBoundary - Overrides the isSchedulingBoundary from + /// Codegen/TargetInstrInfo.cpp to make it capable of identifying ENDBR + /// intructions and prevent it from being re-scheduled. + bool isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads /// should be scheduled togther. On some targets if two loads are loading from From 39494d9c21bab3281e4af30578af10f37ea09470 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Mon, 3 Aug 2020 19:56:05 +0200 Subject: [PATCH 212/600] [compiler-rt][profile] Fix various InstrProf tests on Solaris Currently, several InstrProf tests `FAIL` on Solaris (both sparc and x86): Profile-i386 :: Posix/instrprof-visibility.cpp Profile-i386 :: instrprof-merging.cpp Profile-i386 :: instrprof-set-file-object-merging.c Profile-i386 :: instrprof-set-file-object.c On sparc there's also Profile-sparc :: coverage_comments.cpp The failure mode is always the same: error: /var/llvm/local-amd64/projects/compiler-rt/test/profile/Profile-i386/Posix/Output/instrprof-visibility.cpp.tmp: Failed to load coverage: Malformed coverage data The error is from `llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp` (`loadBinaryFormat`), l.926: InstrProfSymtab ProfileNames; std::vector NamesSectionRefs = *NamesSection; if (NamesSectionRefs.size() != 1) return make_error(coveragemap_error::malformed); where .size() is 2 instead. Looking at the executable, I find (with `elfdump -c -N __llvm_prf_names`): Section Header[15]: sh_name: __llvm_prf_names sh_addr: 0x8053ca5 sh_flags: [ SHF_ALLOC ] sh_size: 0x86 sh_type: [ SHT_PROGBITS ] sh_offset: 0x3ca5 sh_entsize: 0 sh_link: 0 sh_info: 0 sh_addralign: 0x1 Section Header[31]: sh_name: __llvm_prf_names sh_addr: 0x8069998 sh_flags: [ SHF_WRITE SHF_ALLOC ] sh_size: 0 sh_type: [ SHT_PROGBITS ] sh_offset: 0x9998 sh_entsize: 0 sh_link: 0 sh_info: 0 sh_addralign: 0x1 Unlike GNU `ld` (which primarily operates on section names) the Solaris linker, following the ELF spirit, only merges input sections into an output section if both section name and section flags match, so two separate sections are maintained. The read-write one comes from `lib/clang/12.0.0/lib/sunos/libclang_rt.profile-i386.a(InstrProfilingPlatformLinux.c.o)` while the read-only one is generated by `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp` (`InstrProfiling::emitNameData`) at l.1004 where `isConstant = true`. The easiest way to avoid the mismatch is to change the definition in `compiler-rt/lib/profile/InstrProfilingPlatformLinux.c` to `const`. This fixes all failures observed. Tested on `amd64-pc-solaris2.11`, `sparcv9-sun-solaris2.11`, and `x86_64-pc-linux-gnu`. Differential Revision: https://reviews.llvm.org/D85116 --- compiler-rt/lib/profile/InstrProfilingPlatformLinux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c index becfe1fd9f5ad..c9fb481f8e90c 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c @@ -43,7 +43,7 @@ uint64_t __prof_cnts_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_CNTS_SECT_NAME); uint32_t __prof_orderfile_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_ORDERFILE_SECT_NAME); -char __prof_nms_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_NAME_SECT_NAME); +const char __prof_nms_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_NAME_SECT_NAME); ValueProfNode __prof_vnodes_sect_data[0] COMPILER_RT_SECTION(INSTR_PROF_VNODES_SECT_NAME); COMPILER_RT_VISIBILITY const __llvm_profile_data * From f78f509c75861dc4e26f9a22ad12996bf8005a2e Mon Sep 17 00:00:00 2001 From: Hiroshi Yamauchi Date: Mon, 3 Aug 2020 10:35:47 -0700 Subject: [PATCH 213/600] [PGO] Extend the value profile buckets for mem op sizes. Extend the memop value profile buckets to be more flexible (could accommodate a mix of individual values and ranges) and to cover more value ranges (from 11 to 22 buckets). Disabled behind a flag (to be enabled separately) and the existing code to be removed later. Differential Revision: https://reviews.llvm.org/D81682 --- compiler-rt/include/profile/InstrProfData.inc | 125 ++++++++++++++++++ compiler-rt/lib/profile/InstrProfilingValue.c | 17 ++- llvm/include/llvm/ProfileData/InstrProf.h | 8 ++ .../llvm/ProfileData/InstrProfData.inc | 125 ++++++++++++++++++ .../Instrumentation/InstrProfiling.h | 2 + llvm/lib/ProfileData/InstrProf.cpp | 2 + .../Instrumentation/InstrProfiling.cpp | 55 ++++++-- .../Instrumentation/PGOMemOPSizeOpt.cpp | 21 ++- llvm/test/Transforms/PGOProfile/memcpy.ll | 9 +- .../PGOProfile/memop_profile_funclet.ll | 9 +- llvm/unittests/ProfileData/CMakeLists.txt | 1 + .../ProfileData/InstrProfDataTest.cpp | 68 ++++++++++ 12 files changed, 423 insertions(+), 19 deletions(-) create mode 100644 llvm/unittests/ProfileData/InstrProfDataTest.cpp diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index a6913527e67f0..6d0ffb12294b0 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -157,6 +157,8 @@ VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA #ifndef VALUE_RANGE_PROF VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) #else /* VALUE_RANGE_PROF */ +/* FIXME: This is to be removed after switching to the new memop value + * profiling. */ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \ INSTR_PROF_COMMA VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \ @@ -753,9 +755,14 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target #define INSTR_PROF_VALUE_PROF_FUNC_STR \ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC) +/* FIXME: This is to be removed after switching to the new memop value + * profiling. */ #define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range #define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC) +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC __llvm_profile_instrument_memop +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_MEMOP_FUNC) /* InstrProfile per-function control data alignment. */ #define INSTR_PROF_DATA_ALIGNMENT 8 @@ -783,3 +790,121 @@ typedef struct InstrProfValueData { #endif #undef COVMAP_V2_OR_V3 + +#ifdef INSTR_PROF_VALUE_PROF_MEMOP_API + +#ifdef __cplusplus +#define INSTR_PROF_INLINE inline +#else +#define INSTR_PROF_INLINE +#endif + +/* The value range buckets (22 buckets) for the memop size value profiling looks + * like: + * + * [0, 0] + * [1, 1] + * [2, 2] + * [3, 3] + * [4, 4] + * [5, 5] + * [6, 6] + * [7, 7] + * [8, 8] + * [9, 15] + * [16, 16] + * [17, 31] + * [32, 32] + * [33, 63] + * [64, 64] + * [65, 127] + * [128, 128] + * [129, 255] + * [256, 256] + * [257, 511] + * [512, 512] + * [513, UINT64_MAX] + * + * Each range has a 'representative value' which is the lower end value of the + * range and used to store in the runtime profile data records and the VP + * metadata. For example, it's 2 for [2, 2] and 64 for [65, 127]. + */ + +/* + * Clz and Popcount. This code was copied from + * compiler-rt/lib/fuzzer/{FuzzerBuiltins.h,FuzzerBuiltinsMsvc.h} and + * llvm/include/llvm/Support/MathExtras.h. + */ +#if defined(_MSC_VER) && !defined(__clang__) + +#include +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { + unsigned long LeadZeroIdx = 0; +#if !defined(_M_ARM64) && !defined(_M_X64) + // Scan the high 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X >> 32))) + return (int)(63 - (LeadZeroIdx + 32)); // Create a bit offset + // from the MSB. + // Scan the low 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X))) + return (int)(63 - LeadZeroIdx); +#else + if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx; +#endif + return 64; +} +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { + // This code originates from https://reviews.llvm.org/rG30626254510f. + unsigned long long v = X; + v = v - ((v >> 1) & 0x5555555555555555ULL); + v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); + v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + return (int)((unsigned long long)(v * 0x0101010101010101ULL) >> 56); +} + +#else + +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { return __builtin_clzll(X); } +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { return __builtin_popcountll(X); } + +#endif /* defined(_MSC_VER) && !defined(__clang__) */ + +/* Map an (observed) memop size value to the representative value of its range. + * For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE uint64_t +InstrProfGetRangeRepValue(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. Use the value as is. + return Value; + else if (Value >= 513) + // The last range is mapped to its lowest value. + return 513; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, use it as is. + return Value; + else + // Otherwise, take to the previous power of two + 1. + return (1 << (64 - InstProfClzll(Value) - 1)) + 1; +} + +/* Return true if the range that an (observed) memop size value belongs to has + * only a single value in the range. For example, 0 -> true, 8 -> true, 10 -> + * false, 64 -> true, 100 -> false, 513 -> false. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE unsigned +InstrProfIsSingleValRange(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. + return 1; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, there's only one value. + return 1; + else + // Otherwise, there's more than one value in the range. + return 0; +} + +#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */ diff --git a/compiler-rt/lib/profile/InstrProfilingValue.c b/compiler-rt/lib/profile/InstrProfilingValue.c index 4b4081bd21b70..f6b28b48e8426 100644 --- a/compiler-rt/lib/profile/InstrProfilingValue.c +++ b/compiler-rt/lib/profile/InstrProfilingValue.c @@ -18,13 +18,14 @@ #define INSTR_PROF_VALUE_PROF_DATA #define INSTR_PROF_COMMON_API_IMPL +#define INSTR_PROF_VALUE_PROF_MEMOP_API #include "profile/InstrProfData.inc" static int hasStaticCounters = 1; static int OutOfNodesWarnings = 0; static int hasNonDefaultValsPerSite = 0; #define INSTR_PROF_MAX_VP_WARNS 10 -#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 16 +#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24 #define INSTR_PROF_VNODE_POOL_SIZE 1024 #ifndef _MSC_VER @@ -253,6 +254,8 @@ __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, * The range for large values is optional. The default value of INT64_MIN * indicates it is not specified. */ +/* FIXME: This is to be removed after switching to the new memop value + * profiling. */ COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range( uint64_t TargetValue, void *Data, uint32_t CounterIndex, int64_t PreciseRangeStart, int64_t PreciseRangeLast, int64_t LargeValue) { @@ -266,6 +269,18 @@ COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range( __llvm_profile_instrument_target(TargetValue, Data, CounterIndex); } +/* + * The target values are partitioned into multiple ranges. The range spec is + * defined in InstrProfData.inc. + */ +COMPILER_RT_VISIBILITY void +__llvm_profile_instrument_memop(uint64_t TargetValue, void *Data, + uint32_t CounterIndex) { + // Map the target value to the representative value of its range. + uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue); + __llvm_profile_instrument_target(RepValue, Data, CounterIndex); +} + /* * A wrapper struct that represents value profile runtime data. * Like InstrProfRecord class which is used by profiling host tools, diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 50c4857537812..c767a362d6dcb 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -75,10 +75,18 @@ inline StringRef getInstrProfValueProfFuncName() { } /// Return the name profile runtime entry point to do value range profiling. +// FIXME: This is to be removed after switching to the new memop value +// profiling. inline StringRef getInstrProfValueRangeProfFuncName() { return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; } +/// Return the name profile runtime entry point to do memop size value +/// profiling. +inline StringRef getInstrProfValueProfMemOpFuncName() { + return INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR; +} + /// Return the name prefix of variables containing instrumented function names. inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index e56623afad644..06927fb5652b9 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -157,6 +157,8 @@ VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA #ifndef VALUE_RANGE_PROF VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) #else /* VALUE_RANGE_PROF */ +/* FIXME: This is to be removed after switching to the new memop value + * profiling. */ VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \ INSTR_PROF_COMMA VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \ @@ -754,9 +756,14 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target #define INSTR_PROF_VALUE_PROF_FUNC_STR \ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC) +/* FIXME: This is to be removed after switching to the new memop value + * profiling. */ #define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range #define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC) +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC __llvm_profile_instrument_memop +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_MEMOP_FUNC) /* InstrProfile per-function control data alignment. */ #define INSTR_PROF_DATA_ALIGNMENT 8 @@ -784,3 +791,121 @@ typedef struct InstrProfValueData { #endif #undef COVMAP_V2_OR_V3 + +#ifdef INSTR_PROF_VALUE_PROF_MEMOP_API + +#ifdef __cplusplus +#define INSTR_PROF_INLINE inline +#else +#define INSTR_PROF_INLINE +#endif + +/* The value range buckets (22 buckets) for the memop size value profiling looks + * like: + * + * [0, 0] + * [1, 1] + * [2, 2] + * [3, 3] + * [4, 4] + * [5, 5] + * [6, 6] + * [7, 7] + * [8, 8] + * [9, 15] + * [16, 16] + * [17, 31] + * [32, 32] + * [33, 63] + * [64, 64] + * [65, 127] + * [128, 128] + * [129, 255] + * [256, 256] + * [257, 511] + * [512, 512] + * [513, UINT64_MAX] + * + * Each range has a 'representative value' which is the lower end value of the + * range and used to store in the runtime profile data records and the VP + * metadata. For example, it's 2 for [2, 2] and 64 for [65, 127]. + */ + +/* + * Clz and Popcount. This code was copied from + * compiler-rt/lib/fuzzer/{FuzzerBuiltins.h,FuzzerBuiltinsMsvc.h} and + * llvm/include/llvm/Support/MathExtras.h. + */ +#if defined(_MSC_VER) && !defined(__clang__) + +#include +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { + unsigned long LeadZeroIdx = 0; +#if !defined(_M_ARM64) && !defined(_M_X64) + // Scan the high 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X >> 32))) + return (int)(63 - (LeadZeroIdx + 32)); // Create a bit offset + // from the MSB. + // Scan the low 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X))) + return (int)(63 - LeadZeroIdx); +#else + if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx; +#endif + return 64; +} +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { + // This code originates from https://reviews.llvm.org/rG30626254510f. + unsigned long long v = X; + v = v - ((v >> 1) & 0x5555555555555555ULL); + v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); + v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + return (int)((unsigned long long)(v * 0x0101010101010101ULL) >> 56); +} + +#else + +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { return __builtin_clzll(X); } +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { return __builtin_popcountll(X); } + +#endif /* defined(_MSC_VER) && !defined(__clang__) */ + +/* Map an (observed) memop size value to the representative value of its range. + * For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE uint64_t +InstrProfGetRangeRepValue(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. Use the value as is. + return Value; + else if (Value >= 513) + // The last range is mapped to its lowest value. + return 513; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, use it as is. + return Value; + else + // Otherwise, take to the previous power of two + 1. + return (1 << (64 - InstProfClzll(Value) - 1)) + 1; +} + +/* Return true if the range that an (observed) memop size value belongs to has + * only a single value in the range. For example, 0 -> true, 8 -> true, 10 -> + * false, 64 -> true, 100 -> false, 513 -> false. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE unsigned +InstrProfIsSingleValRange(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. + return 1; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, there's only one value. + return 1; + else + // Otherwise, there's more than one value in the range. + return 0; +} + +#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */ diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 263d3b629589c..a7052f7b6a2b1 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -68,6 +68,8 @@ class InstrProfiling : public PassInfoMixin { // vector of counter load/store pairs to be register promoted. std::vector PromotionCandidates; + // FIXME: These are to be removed after switching to the new memop value + // profiling. // The start value of precise value profile range for memory intrinsic sizes. int64_t MemOPSizeRangeStart; // The end value of precise value profile range for memory intrinsic sizes. diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index fb788ef4c7655..a172f319b502c 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1112,6 +1112,8 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) { return true; } +// FIXME: This is to be removed after switching to the new memop value +// profiling. // Parse the value profile options. void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart, int64_t &RangeLast) { diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 8279716002864..3ab697d6cc321 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -57,6 +57,8 @@ using namespace llvm; #define DEBUG_TYPE "instrprof" +// FIXME: These are to be removed after switching to the new memop value +// profiling. // The start and end values of precise value profile range for memory // intrinsic sizes cl::opt MemOPSizeRange( @@ -72,6 +74,12 @@ cl::opt MemOPSizeLarge( "Value of 0 disables the large value profiling."), cl::init(8192)); +cl::opt UseOldMemOpValueProf( + "use-old-memop-value-prof", + cl::desc("Use the old memop value profiling buckets. This is " + "transitional and to be removed after switching. "), + cl::init(true)); + namespace { cl::opt DoHashBasedCounterSplit( @@ -411,6 +419,19 @@ class PGOCounterPromoter { BlockFrequencyInfo *BFI; }; +enum class ValueProfilingCallType { + // Individual values are tracked. Currently used for indiret call target + // profiling. + Default, + + // The old memop size value profiling. FIXME: To be removed after switching to + // the new one. + OldMemOp, + + // MemOp: the (new) memop size value profiling with extended buckets. + MemOp +}; + } // end anonymous namespace PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) { @@ -595,9 +616,9 @@ bool InstrProfiling::run( return true; } -static FunctionCallee -getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI, - bool IsRange = false) { +static FunctionCallee getOrInsertValueProfilingCall( + Module &M, const TargetLibraryInfo &TLI, + ValueProfilingCallType CallType = ValueProfilingCallType::Default) { LLVMContext &Ctx = M.getContext(); auto *ReturnTy = Type::getVoidTy(M.getContext()); @@ -605,16 +626,22 @@ getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI, if (auto AK = TLI.getExtAttrForI32Param(false)) AL = AL.addParamAttribute(M.getContext(), 2, AK); - if (!IsRange) { + if (CallType == ValueProfilingCallType::Default || + CallType == ValueProfilingCallType::MemOp) { Type *ParamTypes[] = { #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType #include "llvm/ProfileData/InstrProfData.inc" }; auto *ValueProfilingCallTy = FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false); - return M.getOrInsertFunction(getInstrProfValueProfFuncName(), - ValueProfilingCallTy, AL); + StringRef FuncName = CallType == ValueProfilingCallType::Default + ? getInstrProfValueProfFuncName() + : getInstrProfValueProfMemOpFuncName(); + return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL); } else { + // FIXME: This code is to be removed after switching to the new memop value + // profiling. + assert(CallType == ValueProfilingCallType::OldMemOp); Type *RangeParamTypes[] = { #define VALUE_RANGE_PROF 1 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType @@ -654,8 +681,8 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Index += It->second.NumValueSites[Kind]; IRBuilder<> Builder(Ind); - bool IsRange = (Ind->getValueKind()->getZExtValue() == - llvm::InstrProfValueKind::IPVK_MemOPSize); + bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() == + llvm::InstrProfValueKind::IPVK_MemOPSize); CallInst *Call = nullptr; auto *TLI = &GetTLI(*Ind->getFunction()); @@ -665,12 +692,19 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { // WinEHPrepare pass. SmallVector OpBundles; Ind->getOperandBundlesAsDefs(OpBundles); - if (!IsRange) { + if (!IsMemOpSize) { Value *Args[3] = {Ind->getTargetValue(), Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), Builder.getInt32(Index)}; Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args, OpBundles); + } else if (!UseOldMemOpValueProf) { + Value *Args[3] = {Ind->getTargetValue(), + Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()), + Builder.getInt32(Index)}; + Call = Builder.CreateCall( + getOrInsertValueProfilingCall(*M, *TLI, ValueProfilingCallType::MemOp), + Args, OpBundles); } else { Value *Args[6] = { Ind->getTargetValue(), @@ -679,7 +713,8 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Builder.getInt64(MemOPSizeRangeStart), Builder.getInt64(MemOPSizeRangeLast), Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)}; - Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), + Call = Builder.CreateCall(getOrInsertValueProfilingCall( + *M, *TLI, ValueProfilingCallType::OldMemOp), Args, OpBundles); } if (auto AK = TLI->getExtAttrForI32Param(false)) diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 2b7b859891dcd..43a1434ae2d37 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -38,6 +38,8 @@ #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/ProfileData/InstrProf.h" +#define INSTR_PROF_VALUE_PROF_MEMOP_API +#include "llvm/ProfileData/InstrProfData.inc" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -89,17 +91,25 @@ static cl::opt cl::desc("Scale the memop size counts using the basic " " block count value")); +// FIXME: These are to be removed after switching to the new memop value +// profiling. // This option sets the rangge of precise profile memop sizes. extern cl::opt MemOPSizeRange; // This option sets the value that groups large memop sizes extern cl::opt MemOPSizeLarge; +extern cl::opt UseOldMemOpValueProf; + cl::opt MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls")); +static cl::opt + MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), + cl::desc("Optimize the memop size <= this value")); + namespace { class PGOMemOPSizeOptLegacyPass : public FunctionPass { public: @@ -269,6 +279,8 @@ class MemOPSizeOpt : public InstVisitor { TargetLibraryInfo &TLI; bool Changed; std::vector WorkList; + // FIXME: These are to be removed after switching to the new memop value + // profiling. // Start of the previse range. int64_t PreciseRangeStart; // Last value of the previse range. @@ -277,6 +289,8 @@ class MemOPSizeOpt : public InstVisitor { std::unique_ptr ValueDataArray; bool perform(MemOp MO); + // FIXME: This is to be removed after switching to the new memop value + // profiling. // This kind shows which group the value falls in. For PreciseValue, we have // the profile count for that value. LargeGroup groups the values that are in // range [LargeValue, +inf). NonLargeGroup groups the rest of values. @@ -365,8 +379,11 @@ bool MemOPSizeOpt::perform(MemOp MO) { if (MemOPScaleCount) C = getScaledCount(C, ActualCount, SavedTotalCount); - // Only care precise value here. - if (getMemOPSizeKind(V) != PreciseValue) + if (UseOldMemOpValueProf) { + // Only care precise value here. + if (getMemOPSizeKind(V) != PreciseValue) + continue; + } else if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize) continue; // ValueCounts are sorted on the count. Break at the first un-profitable diff --git a/llvm/test/Transforms/PGOProfile/memcpy.ll b/llvm/test/Transforms/PGOProfile/memcpy.ll index 6047c95e7c084..e00e1d350871d 100644 --- a/llvm/test/Transforms/PGOProfile/memcpy.ll +++ b/llvm/test/Transforms/PGOProfile/memcpy.ll @@ -1,5 +1,7 @@ -; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s -; RUN: opt <%s -passes=pgo-instr-gen,instrprof -S | FileCheck %s +; RUN: opt < %s -pgo-instr-gen -instrprof -use-old-memop-value-prof=true -S | FileCheck %s --check-prefix=OLDMEMOPVP +; RUN: opt < %s -pgo-instr-gen -instrprof -use-old-memop-value-prof=false -S | FileCheck %s --check-prefix=NEWMEMOPVP +; RUN: opt <%s -passes=pgo-instr-gen,instrprof -use-old-memop-value-prof=true -S | FileCheck %s --check-prefix=OLDMEMOPVP +; RUN: opt <%s -passes=pgo-instr-gen,instrprof -use-old-memop-value-prof=false -S | FileCheck %s --check-prefix=NEWMEMOPVP target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -23,7 +25,8 @@ for.cond1: for.body3: %conv = sext i32 %add to i64 -; CHECK: call void @__llvm_profile_instrument_range(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0, i64 0, i64 8, i64 8192) +; OLDMEMOPVP: call void @__llvm_profile_instrument_range(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0, i64 0, i64 8, i64 8192) +; NEWMEMOPVP: call void @__llvm_profile_instrument_memop(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i1 false) %inc = add nsw i32 %j.0, 1 br label %for.cond1 diff --git a/llvm/test/Transforms/PGOProfile/memop_profile_funclet.ll b/llvm/test/Transforms/PGOProfile/memop_profile_funclet.ll index b79431e3128e8..43c85ed25baae 100644 --- a/llvm/test/Transforms/PGOProfile/memop_profile_funclet.ll +++ b/llvm/test/Transforms/PGOProfile/memop_profile_funclet.ll @@ -1,8 +1,10 @@ ; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN -; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s --check-prefix=LOWER +; RUN: opt < %s -pgo-instr-gen -instrprof -use-old-memop-value-prof=true -S | FileCheck %s --check-prefixes=LOWER,LOWEROLDMEMOPVP +; RUN: opt < %s -pgo-instr-gen -instrprof -use-old-memop-value-prof=false -S | FileCheck %s --check-prefixes=LOWER,LOWERNEWMEMOPVP ; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN -; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s --check-prefix=LOWER +; RUN: opt < %s -passes=pgo-instr-gen,instrprof -use-old-memop-value-prof=true -S | FileCheck %s --check-prefixes=LOWER,LOWEROLDMEMOPVP +; RUN: opt < %s -passes=pgo-instr-gen,instrprof -use-old-memop-value-prof=false -S | FileCheck %s --check-prefixes=LOWER,LOWERNEWMEMOPVP ; This test is to verify that PGO runtime library calls get created with the ; appropriate operand bundle funclet information when a memory intrinsic @@ -63,7 +65,8 @@ try.cont: ; preds = %entry ; GEN-SAME: [ "funclet"(token %tmp1) ] ; LOWER: catch: -; LOWER: call void @__llvm_profile_instrument_range( +; LOWEROLDMEMOPVP: call void @__llvm_profile_instrument_range( +; LOWERNEWMEMOPVP: call void @__llvm_profile_instrument_memop( ; LOWER-SAME: [ "funclet"(token %tmp1) ] declare dso_local void @"?may_throw@@YAXH@Z"(i32) diff --git a/llvm/unittests/ProfileData/CMakeLists.txt b/llvm/unittests/ProfileData/CMakeLists.txt index 366ed5482bf2c..00a0079e675a8 100644 --- a/llvm/unittests/ProfileData/CMakeLists.txt +++ b/llvm/unittests/ProfileData/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_unittest(ProfileDataTests CoverageMappingTest.cpp + InstrProfDataTest.cpp InstrProfTest.cpp SampleProfTest.cpp ) diff --git a/llvm/unittests/ProfileData/InstrProfDataTest.cpp b/llvm/unittests/ProfileData/InstrProfDataTest.cpp new file mode 100644 index 0000000000000..af1a3de0657c2 --- /dev/null +++ b/llvm/unittests/ProfileData/InstrProfDataTest.cpp @@ -0,0 +1,68 @@ +//===- unittest/ProfileData/InstProfDataTest.cpp ----------------------------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include + +#define INSTR_PROF_VALUE_PROF_MEMOP_API +#include "llvm/ProfileData/InstrProfData.inc" + +namespace { + +TEST(InstrProfDataTest, MapValueToRangeRepValue) { + EXPECT_EQ(0ULL, InstrProfGetRangeRepValue(0)); + EXPECT_EQ(1ULL, InstrProfGetRangeRepValue(1)); + EXPECT_EQ(2ULL, InstrProfGetRangeRepValue(2)); + EXPECT_EQ(3ULL, InstrProfGetRangeRepValue(3)); + EXPECT_EQ(4ULL, InstrProfGetRangeRepValue(4)); + EXPECT_EQ(5ULL, InstrProfGetRangeRepValue(5)); + EXPECT_EQ(6ULL, InstrProfGetRangeRepValue(6)); + EXPECT_EQ(7ULL, InstrProfGetRangeRepValue(7)); + EXPECT_EQ(8ULL, InstrProfGetRangeRepValue(8)); + EXPECT_EQ(9ULL, InstrProfGetRangeRepValue(9)); + EXPECT_EQ(16ULL, InstrProfGetRangeRepValue(16)); + EXPECT_EQ(17ULL, InstrProfGetRangeRepValue(30)); + EXPECT_EQ(32ULL, InstrProfGetRangeRepValue(32)); + EXPECT_EQ(33ULL, InstrProfGetRangeRepValue(54)); + EXPECT_EQ(64ULL, InstrProfGetRangeRepValue(64)); + EXPECT_EQ(65ULL, InstrProfGetRangeRepValue(127)); + EXPECT_EQ(128ULL, InstrProfGetRangeRepValue(128)); + EXPECT_EQ(129ULL, InstrProfGetRangeRepValue(200)); + EXPECT_EQ(256ULL, InstrProfGetRangeRepValue(256)); + EXPECT_EQ(257ULL, InstrProfGetRangeRepValue(397)); + EXPECT_EQ(512ULL, InstrProfGetRangeRepValue(512)); + EXPECT_EQ(513ULL, InstrProfGetRangeRepValue(2832048023)); +} + +TEST(InstrProfDataTest, IsInOneValueRange) { + EXPECT_EQ(true, InstrProfIsSingleValRange(0)); + EXPECT_EQ(true, InstrProfIsSingleValRange(1)); + EXPECT_EQ(true, InstrProfIsSingleValRange(2)); + EXPECT_EQ(true, InstrProfIsSingleValRange(3)); + EXPECT_EQ(true, InstrProfIsSingleValRange(4)); + EXPECT_EQ(true, InstrProfIsSingleValRange(5)); + EXPECT_EQ(true, InstrProfIsSingleValRange(6)); + EXPECT_EQ(true, InstrProfIsSingleValRange(7)); + EXPECT_EQ(true, InstrProfIsSingleValRange(8)); + EXPECT_EQ(false, InstrProfIsSingleValRange(9)); + EXPECT_EQ(true, InstrProfIsSingleValRange(16)); + EXPECT_EQ(false, InstrProfIsSingleValRange(30)); + EXPECT_EQ(true, InstrProfIsSingleValRange(32)); + EXPECT_EQ(false, InstrProfIsSingleValRange(54)); + EXPECT_EQ(true, InstrProfIsSingleValRange(64)); + EXPECT_EQ(false, InstrProfIsSingleValRange(127)); + EXPECT_EQ(true, InstrProfIsSingleValRange(128)); + EXPECT_EQ(false, InstrProfIsSingleValRange(200)); + EXPECT_EQ(true, InstrProfIsSingleValRange(256)); + EXPECT_EQ(false, InstrProfIsSingleValRange(397)); + EXPECT_EQ(true, InstrProfIsSingleValRange(512)); + EXPECT_EQ(false, InstrProfIsSingleValRange(2832048023344)); +} + +} // end anonymous namespace From c12bd8dac91adac81cd9721fe34daf473ebd5e10 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 3 Aug 2020 18:05:15 +0000 Subject: [PATCH 214/600] [gn build] Port f78f509c758 --- llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn index e933b510003cd..3818b0caad04b 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn @@ -9,6 +9,7 @@ unittest("ProfileDataTests") { ] sources = [ "CoverageMappingTest.cpp", + "InstrProfDataTest.cpp", "InstrProfTest.cpp", "SampleProfTest.cpp", ] From 1e392fc44584a4909b4dced02b8386b48963002b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 3 Aug 2020 19:18:13 +0100 Subject: [PATCH 215/600] [ArgPromotion] Replace all md uses of promoted values with undef. Currently, ArgPromotion may leave metadata uses of promoted values, which will end up in the wrong function, creating invalid IR. PR33641 fixed this for dead arguments, but it can be also be triggered arguments with users that are promoted (see the updated test case). We also have to drop uses to them after promoting them. We need to do this after dealing with the non-metadata uses, so I also moved the empty use case to the loop that deals with updating the arguments of the new function. Reviewed By: aprantl Differential Revision: https://reviews.llvm.org/D85127 --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 11 ++-- .../pr33641_remove_arg_dbgvalue.ll | 50 +++++++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index ad0d7eb51507a..d511ad2729abc 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -153,10 +154,6 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; - - // There may be remaining metadata uses of the argument for things like - // llvm.dbg.value. Replace them with undef. - I->replaceAllUsesWith(UndefValue::get(I->getType())); } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads @@ -414,6 +411,11 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, continue; } + // There potentially are metadata uses for things like llvm.dbg.value. + // Replace them with undef, after handling the other regular uses. + auto RauwUndefMetadata = make_scope_exit( + [&]() { I->replaceAllUsesWith(UndefValue::get(I->getType())); }); + if (I->use_empty()) continue; @@ -465,7 +467,6 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, GEP->eraseFromParent(); } } - // Increment I2 past all of the arguments added for this promoted pointer. std::advance(I2, ArgIndices.size()); } diff --git a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll index 4c2503d8b0ccc..8dcc5b8c4f137 100644 --- a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll @@ -30,6 +30,52 @@ define internal void @bar(%p_t %p) { declare void @llvm.dbg.value(metadata, metadata, metadata) + +; Test case where the promoted argument has uses in @callee and we need to +; retain a reference to the original function, because it is stored in @storer. +define void @storer({i32, i32}* %ptr) { +; CHECK-LABEL: define {{[^@]+}}@storer +; CHECK-SAME: ({ i32, i32 }* [[PTR:%.*]]) +; CHECK-NEXT: ret void +; + %tmp = alloca i32 ({i32, i32}*)* + store i32 ({i32, i32}*)* @callee, i32 ({i32, i32}*)** %tmp + ret void +} + +define i32 @caller() { +; CHECK-LABEL: define {{[^@]+}}@caller() +; CHECK-NEXT: [[TMP:%.*]] = alloca { i32, i32 }, align 8 +; CHECK-NEXT: [[F_1:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[TMP]], i32 0, i32 1 +; CHECK-NEXT: store i32 10, i32* [[F_1]], align 4 +; CHECK-NEXT: [[TMP_IDX:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[TMP]], i64 0, i32 1 +; CHECK-NEXT: [[TMP_IDX_VAL:%.*]] = load i32, i32* [[TMP_IDX]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call i32 @callee(i32 [[TMP_IDX_VAL]]) +; CHECK-NEXT: ret i32 [[RES]] +; + %tmp = alloca {i32, i32} + %f.1 = getelementptr {i32, i32}, {i32, i32}* %tmp, i32 0, i32 1 + store i32 10, i32* %f.1 + %res = call i32 @callee({i32, i32}* %tmp) + ret i32 %res +} + +define internal i32 @callee({i32, i32}* %ptr) !dbg !7 { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i32 [[PTR_0_1_VAL:%.*]]) !dbg !6 +; CHECK-NEXT: call void @llvm.dbg.value(metadata { i32, i32 }* undef, metadata !7, metadata !DIExpression()), !dbg !8 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PTR_0_1_VAL]], metadata !7, metadata !DIExpression()), !dbg !8 +; CHECK-NEXT: ret i32 [[PTR_0_1_VAL]] +; + call void @llvm.dbg.value(metadata {i32, i32}* %ptr, metadata !8, metadata !9), !dbg !10 + %f.1 = getelementptr {i32, i32}, {i32, i32}* %ptr, i32 0, i32 1 + %l.1 = load i32, i32* %f.1 + call void @llvm.dbg.value(metadata i32 %l.1, metadata !8, metadata !9), !dbg !10 + ret i32 %l.1 +} + + + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} @@ -40,3 +86,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !4 = !DILocalVariable(name: "p", scope: !3) !5 = !DIExpression() !6 = !DILocation(line: 1, column: 1, scope: !3) +!7 = distinct !DISubprogram(name: "callee", unit: !0) +!8 = !DILocalVariable(name: "c", scope: !7) +!9 = !DIExpression() +!10 = !DILocation(line: 2, column: 2, scope: !7) From c6334db577e7049fe4868b1647c9f937f68ff1f5 Mon Sep 17 00:00:00 2001 From: Jian Cai Date: Thu, 30 Jul 2020 18:33:33 -0700 Subject: [PATCH 216/600] [X86] support .nops directive Add support of .nops on X86. This addresses llvm.org/PR45788. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D82826 --- llvm/include/llvm/MC/MCAsmBackend.h | 4 ++ llvm/include/llvm/MC/MCFragment.h | 26 +++++++++++ llvm/include/llvm/MC/MCObjectStreamer.h | 2 + llvm/include/llvm/MC/MCStreamer.h | 3 ++ llvm/lib/MC/MCAssembler.cpp | 46 ++++++++++++++++++- llvm/lib/MC/MCFragment.cpp | 12 +++++ llvm/lib/MC/MCObjectStreamer.cpp | 10 ++++ llvm/lib/MC/MCStreamer.cpp | 3 ++ .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 41 ++++++++++++++++- .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 35 +++++++------- llvm/test/MC/X86/x86-directive-nops-errors.s | 12 +++++ llvm/test/MC/X86/x86-directive-nops.s | 12 +++++ llvm/test/MC/X86/x86_64-directive-nops.s | 19 ++++++++ 13 files changed, 205 insertions(+), 20 deletions(-) create mode 100644 llvm/test/MC/X86/x86-directive-nops-errors.s create mode 100644 llvm/test/MC/X86/x86-directive-nops.s create mode 100644 llvm/test/MC/X86/x86_64-directive-nops.s diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index 8f95cfd55a3d7..94ed3d27e7859 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -177,6 +177,10 @@ class MCAsmBackend { /// virtual unsigned getMinimumNopSize() const { return 1; } + /// Returns the maximum size of a nop in bytes on this target. + /// + virtual unsigned getMaximumNopSize() const { return 0; } + /// Write an (optimal) nop sequence of Count bytes to the given output. If the /// target cannot generate such a sequence, it should return an error. /// diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index fb7166e82c098..87338ab46cc2a 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -37,6 +37,7 @@ class MCFragment : public ilist_node_with_parent { FT_Data, FT_CompactEncodedInst, FT_Fill, + FT_Nops, FT_Relaxable, FT_Org, FT_Dwarf, @@ -350,6 +351,31 @@ class MCFillFragment : public MCFragment { } }; +class MCNopsFragment : public MCFragment { + /// The number of bytes to insert. + int64_t Size; + /// Maximum number of bytes allowed in each NOP instruction. + int64_t ControlledNopLength; + + /// Source location of the directive that this fragment was created for. + SMLoc Loc; + +public: + MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L, + MCSection *Sec = nullptr) + : MCFragment(FT_Nops, false, Sec), Size(NumBytes), + ControlledNopLength(ControlledNopLength), Loc(L) {} + + int64_t getNumBytes() const { return Size; } + int64_t getControlledNopLength() const { return ControlledNopLength; } + + SMLoc getLoc() const { return Loc; } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Nops; + } +}; + class MCOrgFragment : public MCFragment { /// Value to use for filling bytes. int8_t Value; diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index c3f3ae5de921e..a00000bc11b60 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -179,6 +179,8 @@ class MCObjectStreamer : public MCStreamer { SMLoc Loc = SMLoc()) override; void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, SMLoc Loc = SMLoc()) override; + void emitNops(int64_t NumBytes, int64_t ControlledNopLength, + SMLoc Loc) override; void emitFileDirective(StringRef Filename) override; void emitAddrsig() override; diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 484c62538366e..63a4c1d190aca 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -777,6 +777,9 @@ class MCStreamer { virtual void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, SMLoc Loc = SMLoc()); + virtual void emitNops(int64_t NumBytes, int64_t ControlledNopLength, + SMLoc Loc); + /// Emit NumBytes worth of zeros. /// This function properly handles data in virtual sections. void emitZeros(uint64_t NumBytes); diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 3ca8714b7817c..9515b7e2642bc 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -62,8 +62,8 @@ STATISTIC(EmittedAlignFragments, "Number of emitted assembler fragments - align"); STATISTIC(EmittedFillFragments, "Number of emitted assembler fragments - fill"); -STATISTIC(EmittedOrgFragments, - "Number of emitted assembler fragments - org"); +STATISTIC(EmittedNopsFragments, "Number of emitted assembler fragments - nops"); +STATISTIC(EmittedOrgFragments, "Number of emitted assembler fragments - org"); STATISTIC(evaluateFixup, "Number of evaluated fixups"); STATISTIC(FragmentLayouts, "Number of fragment layouts"); STATISTIC(ObjectBytes, "Number of emitted object file bytes"); @@ -312,6 +312,9 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, return Size; } + case MCFragment::FT_Nops: + return cast(F).getNumBytes(); + case MCFragment::FT_LEB: return cast(F).getContents().size(); @@ -613,6 +616,45 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm, break; } + case MCFragment::FT_Nops: { + ++stats::EmittedNopsFragments; + const MCNopsFragment &NF = cast(F); + int64_t NumBytes = NF.getNumBytes(); + int64_t ControlledNopLength = NF.getControlledNopLength(); + int64_t MaximumNopLength = Asm.getBackend().getMaximumNopSize(); + + assert(NumBytes > 0 && "Expected positive NOPs fragment size"); + assert(ControlledNopLength >= 0 && "Expected non-negative NOP size"); + + if (ControlledNopLength > MaximumNopLength) { + Asm.getContext().reportError(NF.getLoc(), + "illegal NOP size " + + std::to_string(ControlledNopLength) + + ". (expected within [0, " + + std::to_string(MaximumNopLength) + "])"); + // Clamp the NOP length as reportError does not stop the execution + // immediately. + ControlledNopLength = MaximumNopLength; + } + + // Use maximum value if the size of each NOP is not specified + if (!ControlledNopLength) + ControlledNopLength = MaximumNopLength; + + while (NumBytes) { + uint64_t NumBytesToEmit = + (uint64_t)std::min(NumBytes, ControlledNopLength); + assert(NumBytesToEmit && "try to emit empty NOP instruction"); + if (!Asm.getBackend().writeNopData(OS, NumBytesToEmit)) { + report_fatal_error("unable to write nop sequence of the remaining " + + Twine(NumBytesToEmit) + " bytes"); + break; + } + NumBytes -= NumBytesToEmit; + } + break; + } + case MCFragment::FT_LEB: { const MCLEBFragment &LF = cast(F); OS << LF.getContents(); diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index 8e90e07a4dbfd..e9cea9d18f2e1 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -279,6 +279,9 @@ void MCFragment::destroy() { case FT_Fill: delete cast(this); return; + case FT_Nops: + delete cast(this); + return; case FT_Relaxable: delete cast(this); return; @@ -336,6 +339,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { case MCFragment::FT_CompactEncodedInst: OS << "MCCompactEncodedInstFragment"; break; case MCFragment::FT_Fill: OS << "MCFillFragment"; break; + case MCFragment::FT_Nops: + OS << "MCFNopsFragment"; + break; case MCFragment::FT_Relaxable: OS << "MCRelaxableFragment"; break; case MCFragment::FT_Org: OS << "MCOrgFragment"; break; case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break; @@ -408,6 +414,12 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { << " NumValues:" << FF->getNumValues(); break; } + case MCFragment::FT_Nops: { + const auto *NF = cast(this); + OS << " NumBytes:" << NF->getNumBytes() + << " ControlledNopLength:" << NF->getControlledNopLength(); + break; + } case MCFragment::FT_Relaxable: { const auto *F = cast(this); OS << "\n "; diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 78ee215b59aae..f9e0d858cf701 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -819,6 +819,16 @@ void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size, insert(new MCFillFragment(Expr, Size, NumValues, Loc)); } +void MCObjectStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLength, + SMLoc Loc) { + // Emit an NOP fragment. + MCDataFragment *DF = getOrCreateDataFragment(); + flushPendingLabels(DF, DF->getContents().size()); + + assert(getCurrentSectionOnly() && "need a section"); + insert(new MCNopsFragment(NumBytes, ControlledNopLength, Loc)); +} + void MCObjectStreamer::emitFileDirective(StringRef Filename) { getAssembler().addFileName(Filename); } diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 6d3a933c96a37..df08c343f69f7 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -202,6 +202,9 @@ void MCStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) { emitFill(*MCConstantExpr::create(NumBytes, getContext()), FillValue); } +void llvm::MCStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLen, + llvm::SMLoc) {} + /// The implementation in this class just redirects to emitFill. void MCStreamer::emitZeros(uint64_t NumBytes) { emitFill(NumBytes, 0); } diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 49c01d7b9ef0b..ddb13e46e9305 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -934,6 +934,7 @@ class X86AsmParser : public MCTargetAsmParser { OperandVector &Operands); bool parseDirectiveArch(); + bool parseDirectiveNops(SMLoc L); bool parseDirectiveEven(SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); @@ -4037,7 +4038,9 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { "a '%' prefix in .intel_syntax"); } return false; - } else if (IDVal == ".even") + } else if (IDVal == ".nops") + return parseDirectiveNops(DirectiveID.getLoc()); + else if (IDVal == ".even") return parseDirectiveEven(DirectiveID.getLoc()); else if (IDVal == ".cv_fpo_proc") return parseDirectiveFPOProc(DirectiveID.getLoc()); @@ -4073,6 +4076,42 @@ bool X86AsmParser::parseDirectiveArch() { return false; } +/// parseDirectiveNops +/// ::= .nops size[, control] +bool X86AsmParser::parseDirectiveNops(SMLoc L) { + int64_t NumBytes = 0, Control = 0; + SMLoc NumBytesLoc, ControlLoc; + const MCSubtargetInfo STI = getSTI(); + NumBytesLoc = getTok().getLoc(); + if (getParser().checkForValidSection() || + getParser().parseAbsoluteExpression(NumBytes)) + return true; + + if (parseOptionalToken(AsmToken::Comma)) { + ControlLoc = getTok().getLoc(); + if (getParser().parseAbsoluteExpression(Control)) + return true; + } + if (getParser().parseToken(AsmToken::EndOfStatement, + "unexpected token in '.nops' directive")) + return true; + + if (NumBytes <= 0) { + Error(NumBytesLoc, "'.nops' directive with non-positive size"); + return false; + } + + if (Control < 0) { + Error(ControlLoc, "'.nops' directive with negative NOP size"); + return false; + } + + /// Emit nops + getParser().getStreamer().emitNops(NumBytes, Control, L); + + return false; +} + /// parseDirectiveEven /// ::= .even bool X86AsmParser::parseDirectiveEven(SMLoc L) { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index bf3b6bcb5463f..31bc54f53d027 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -207,6 +207,8 @@ class X86AsmBackend : public MCAsmBackend { void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; + unsigned getMaximumNopSize() const override; + bool writeNopData(raw_ostream &OS, uint64_t Count) const override; }; } // end anonymous namespace @@ -1067,6 +1069,21 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm, } } +unsigned X86AsmBackend::getMaximumNopSize() const { + if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) + return 1; + if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP]) + return 7; + if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP]) + return 15; + if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP]) + return 11; + // FIXME: handle 32-bit mode + // 15-bytes is the longest single NOP instruction, but 10-bytes is + // commonly the longest that can be efficiently decoded. + return 10; +} + /// Write a sequence of optimal nops to the output, covering \p Count /// bytes. /// \return - true on success, false on failure @@ -1094,23 +1111,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", }; - // This CPU doesn't support long nops. If needed add more. - // FIXME: We could generated something better than plain 0x90. - if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) { - for (uint64_t i = 0; i < Count; ++i) - OS << '\x90'; - return true; - } - - // 15-bytes is the longest single NOP instruction, but 10-bytes is - // commonly the longest that can be efficiently decoded. - uint64_t MaxNopLength = 10; - if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP]) - MaxNopLength = 7; - else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP]) - MaxNopLength = 15; - else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP]) - MaxNopLength = 11; + uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(); // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining // length. diff --git a/llvm/test/MC/X86/x86-directive-nops-errors.s b/llvm/test/MC/X86/x86-directive-nops-errors.s new file mode 100644 index 0000000000000..473cb509442cd --- /dev/null +++ b/llvm/test/MC/X86/x86-directive-nops-errors.s @@ -0,0 +1,12 @@ +# RUN: not llvm-mc -triple i386 %s -filetype=obj -o /dev/null 2>&1 | FileCheck --check-prefix=X86 %s +# RUN: not llvm-mc -triple=x86_64 %s -filetype=obj -o /dev/null 2>&1 | FileCheck --check-prefix=X64 %s + +.nops 4, 3 +# X86: :[[@LINE-1]]:1: error: illegal NOP size 3. +.nops 4, 4 +# X86: :[[@LINE-1]]:1: error: illegal NOP size 4. +.nops 4, 5 +# X86: :[[@LINE-1]]:1: error: illegal NOP size 5. +.nops 16, 15 +# X86: :[[@LINE-1]]:1: error: illegal NOP size 15. +# X64: :[[@LINE-2]]:1: error: illegal NOP size 15. diff --git a/llvm/test/MC/X86/x86-directive-nops.s b/llvm/test/MC/X86/x86-directive-nops.s new file mode 100644 index 0000000000000..786a029d503ae --- /dev/null +++ b/llvm/test/MC/X86/x86-directive-nops.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc -triple i386 %s -filetype=obj | llvm-objdump -d - | FileCheck %s + +.nops 4 +# CHECK: 0: 90 nop +# CHECK-NEXT: 1: 90 nop +# CHECK-NEXT: 2: 90 nop +# CHECK-NEXT: 3: 90 nop +.nops 4, 1 +# CHECK: 4: 90 nop +# CHECK-NEXT: 5: 90 nop +# CHECK-NEXT: 6: 90 nop +# CHECK-NEXT: 7: 90 nop diff --git a/llvm/test/MC/X86/x86_64-directive-nops.s b/llvm/test/MC/X86/x86_64-directive-nops.s new file mode 100644 index 0000000000000..2255cd3a2efdc --- /dev/null +++ b/llvm/test/MC/X86/x86_64-directive-nops.s @@ -0,0 +1,19 @@ +# RUN: llvm-mc -triple=x86_64 %s -filetype=obj | llvm-objdump -d - | FileCheck %s + +.nops 4, 1 +# CHECK: 0: 90 nop +# CHECK-NEXT: 1: 90 nop +# CHECK-NEXT: 2: 90 nop +# CHECK-NEXT: 3: 90 nop +.nops 4, 2 +# CHECK-NEXT: 4: 66 90 nop +# CHECK-NEXT: 6: 66 90 nop +.nops 4, 3 +# CHECK-NEXT: 8: 0f 1f 00 nopl (%rax) +# CHECK-NEXT: b: 90 nop +.nops 4, 4 +# CHECK-NEXT: c: 0f 1f 40 00 nopl (%rax) +.nops 4, 5 +# CHECK-NEXT: 10: 0f 1f 40 00 nopl (%rax) +.nops 4 +# CHECK-NEXT: 14: 0f 1f 40 00 nopl (%rax) From 456f38a97199770d4ea563dec8c50eaaf20f0309 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 3 Aug 2020 11:50:07 -0700 Subject: [PATCH 217/600] Fix layering violation Transforms/Utils -> Scalar Introduced in D85063. --- llvm/lib/Transforms/Utils/LoopVersioning.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index c8fa337ed04f7..b4925064bc6b9 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -14,15 +14,17 @@ #include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" From 7efd9ceb588b5e76e4ce9ae0b8ed45bfc90645cd Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 3 Aug 2020 13:50:54 -0400 Subject: [PATCH 218/600] [InstSimplify] add tests for min-of-max variants; NFC --- .../InstSimplify/maxmin_intrinsics.ll | 220 ++++++++++++++++++ 1 file changed, 220 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index c0064ab0a423a..7c79357a0bd04 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -427,3 +427,223 @@ define i8 @smin_smin_commute3(i8 %x, i8 %y) { %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) ret i8 %m2 } + +define i8 @umax_umin(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_umin( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define i8 @umax_umin_commute1(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_umin_commute1( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define i8 @umax_umin_commute2(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_umin_commute2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.umin.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define <2 x i8> @umax_umin_commute3(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @umax_umin_commute3( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[M]], <2 x i8> [[X]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %y, <2 x i8> %x) + %m2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %m, <2 x i8> %x) + ret <2 x i8> %m2 +} + +define i8 @umin_umax(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_umax( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define i8 @umin_umax_commute1(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_umax_commute1( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define <2 x i8> @umin_umax_commute2(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @umin_umax_commute2( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[M]], <2 x i8> [[X]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> %y) + %m2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %m, <2 x i8> %x) + ret <2 x i8> %m2 +} + +define i8 @umin_umax_commute3(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_umax_commute3( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.umax.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @smax_smin(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_smin( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.smin.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define <2 x i8> @smax_smin_commute1(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @smax_smin_commute1( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X]], <2 x i8> [[M]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %y, <2 x i8> %x) + %m2 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> %m) + ret <2 x i8> %m2 +} + +define i8 @smax_smin_commute2(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_smin_commute2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @smax_smin_commute3(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_smin_commute3( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define <2 x i8> @smin_smax(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @smin_smax( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X]], <2 x i8> [[M]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> %y) + %m2 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %x, <2 x i8> %m) + ret <2 x i8> %m2 +} + +define i8 @smin_smax_commute1(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_smax_commute1( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smin.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.smax.i8(i8 %x, i8 %m) + ret i8 %m2 +} + +define i8 @smin_smax_commute2(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_smax_commute2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smin.i8(i8 %x, i8 %y) + %m2 = call i8 @llvm.smax.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @smin_smax_commute3(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_smax_commute3( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smin.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.smax.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @smax_umin(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_umin( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.umin.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @smax_umax(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_umax( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.umax.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @umax_smin(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_smin( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) + ret i8 %m2 +} + +define i8 @umin_smin(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_smin( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 %y, i8 %x) + %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) + ret i8 %m2 +} From 9e5cf6bde5963f14a38117061c7a4df064453088 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 3 Aug 2020 14:02:09 -0400 Subject: [PATCH 219/600] [InstSimplify] fold variations of max-of-min with common operand https://alive2.llvm.org/ce/z/ZtxpZ3 --- llvm/lib/Analysis/InstructionSimplify.cpp | 33 ++++++++-- .../InstSimplify/maxmin_intrinsics.ll | 64 +++++-------------- 2 files changed, 43 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 6e75478d52afd..f827f0230a3e4 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5198,6 +5198,16 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, return nullptr; } +static Intrinsic::ID getMaxMinOpposite(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::smax: return Intrinsic::smin; + case Intrinsic::smin: return Intrinsic::smax; + case Intrinsic::umax: return Intrinsic::umin; + case Intrinsic::umin: return Intrinsic::umax; + default: llvm_unreachable("Unexpected intrinsic"); + } +} + static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, const SimplifyQuery &Q) { Intrinsic::ID IID = F->getIntrinsicID(); @@ -5239,16 +5249,27 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return ConstantInt::get(ReturnType, APInt::getMinValue(BitWidth)); } + auto hasSpecificOperand = [](IntrinsicInst *II, Value *V) { + return II->getOperand(0) == V || II->getOperand(1) == V; + }; + // For 4 commuted variants of each intrinsic: // max (max X, Y), X --> max X, Y - if (auto *MinMax0 = dyn_cast(Op0)) - if (MinMax0->getIntrinsicID() == IID && - (MinMax0->getOperand(0) == Op1 || MinMax0->getOperand(1) == Op1)) + // max (min X, Y), X --> X + if (auto *MinMax0 = dyn_cast(Op0)) { + Intrinsic::ID InnerID = MinMax0->getIntrinsicID(); + if (InnerID == IID && hasSpecificOperand(MinMax0, Op1)) return MinMax0; - if (auto *MinMax1 = dyn_cast(Op1)) - if (MinMax1->getIntrinsicID() == IID && - (MinMax1->getOperand(0) == Op0 || MinMax1->getOperand(1) == Op0)) + if (InnerID == getMaxMinOpposite(IID) && hasSpecificOperand(MinMax0, Op1)) + return Op1; + } + if (auto *MinMax1 = dyn_cast(Op1)) { + Intrinsic::ID InnerID = MinMax1->getIntrinsicID(); + if (InnerID == IID && hasSpecificOperand(MinMax1, Op0)) return MinMax1; + if (InnerID == getMaxMinOpposite(IID) && hasSpecificOperand(MinMax1, Op0)) + return Op0; + } const APInt *C; if (!match(Op1, m_APIntAllowUndef(C))) diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index 7c79357a0bd04..7a31a4dcb9a3b 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -430,9 +430,7 @@ define i8 @smin_smin_commute3(i8 %x, i8 %y) { define i8 @umax_umin(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_umin( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %m) @@ -441,9 +439,7 @@ define i8 @umax_umin(i8 %x, i8 %y) { define i8 @umax_umin_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_umin_commute1( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.umax.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.umin.i8(i8 %x, i8 %m) @@ -452,9 +448,7 @@ define i8 @umax_umin_commute1(i8 %x, i8 %y) { define i8 @umax_umin_commute2(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_umin_commute2( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.umin.i8(i8 %m, i8 %x) @@ -463,9 +457,7 @@ define i8 @umax_umin_commute2(i8 %x, i8 %y) { define <2 x i8> @umax_umin_commute3(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @umax_umin_commute3( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[M]], <2 x i8> [[X]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %m = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %y, <2 x i8> %x) %m2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %m, <2 x i8> %x) @@ -474,9 +466,7 @@ define <2 x i8> @umax_umin_commute3(<2 x i8> %x, <2 x i8> %y) { define i8 @umin_umax(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_umax( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.umin.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %m) @@ -485,9 +475,7 @@ define i8 @umin_umax(i8 %x, i8 %y) { define i8 @umin_umax_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_umax_commute1( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.umin.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.umax.i8(i8 %x, i8 %m) @@ -496,9 +484,7 @@ define i8 @umin_umax_commute1(i8 %x, i8 %y) { define <2 x i8> @umin_umax_commute2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @umin_umax_commute2( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[M]], <2 x i8> [[X]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> %y) %m2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %m, <2 x i8> %x) @@ -507,9 +493,7 @@ define <2 x i8> @umin_umax_commute2(<2 x i8> %x, <2 x i8> %y) { define i8 @umin_umax_commute3(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_umax_commute3( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.umin.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.umax.i8(i8 %m, i8 %x) @@ -518,9 +502,7 @@ define i8 @umin_umax_commute3(i8 %x, i8 %y) { define i8 @smax_smin(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_smin( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.smin.i8(i8 %x, i8 %m) @@ -529,9 +511,7 @@ define i8 @smax_smin(i8 %x, i8 %y) { define <2 x i8> @smax_smin_commute1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @smax_smin_commute1( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X]], <2 x i8> [[M]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %y, <2 x i8> %x) %m2 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> %m) @@ -540,9 +520,7 @@ define <2 x i8> @smax_smin_commute1(<2 x i8> %x, <2 x i8> %y) { define i8 @smax_smin_commute2(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_smin_commute2( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) @@ -551,9 +529,7 @@ define i8 @smax_smin_commute2(i8 %x, i8 %y) { define i8 @smax_smin_commute3(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_smin_commute3( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.smax.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) @@ -562,9 +538,7 @@ define i8 @smax_smin_commute3(i8 %x, i8 %y) { define <2 x i8> @smin_smax(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @smin_smax( -; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X]], <2 x i8> [[M]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %m = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> %y) %m2 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %x, <2 x i8> %m) @@ -573,9 +547,7 @@ define <2 x i8> @smin_smax(<2 x i8> %x, <2 x i8> %y) { define i8 @smin_smax_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @smin_smax_commute1( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.smin.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.smax.i8(i8 %x, i8 %m) @@ -584,9 +556,7 @@ define i8 @smin_smax_commute1(i8 %x, i8 %y) { define i8 @smin_smax_commute2(i8 %x, i8 %y) { ; CHECK-LABEL: @smin_smax_commute2( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.smin.i8(i8 %x, i8 %y) %m2 = call i8 @llvm.smax.i8(i8 %m, i8 %x) @@ -595,9 +565,7 @@ define i8 @smin_smax_commute2(i8 %x, i8 %y) { define i8 @smin_smax_commute3(i8 %x, i8 %y) { ; CHECK-LABEL: @smin_smax_commute3( -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 [[X]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %m = call i8 @llvm.smin.i8(i8 %y, i8 %x) %m2 = call i8 @llvm.smax.i8(i8 %m, i8 %x) From 0d454e8e087049ae86283e73a25cf8eaad488651 Mon Sep 17 00:00:00 2001 From: Tim Keith Date: Mon, 3 Aug 2020 12:21:57 -0700 Subject: [PATCH 220/600] [flang] Fix bug detecting intrinsic function Don't set the INTRINSIC attribute on a dummy procedure. Differential Revision: https://reviews.llvm.org/D85136 --- flang/lib/Semantics/resolve-names.cpp | 3 ++- flang/test/Semantics/symbol18.f90 | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index e85dfa9c91ef5..c5b42473d0113 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -5749,7 +5749,8 @@ void ResolveNamesVisitor::HandleProcedureName( // error was reported } else { symbol = &Resolve(name, symbol)->GetUltimate(); - if (ConvertToProcEntity(*symbol) && IsIntrinsic(symbol->name())) { + if (ConvertToProcEntity(*symbol) && IsIntrinsic(symbol->name()) && + !IsDummy(*symbol)) { symbol->attrs().set(Attr::INTRINSIC); // 8.2(3): ignore type from intrinsic in type-declaration-stmt symbol->get().set_interface(ProcInterface{}); diff --git a/flang/test/Semantics/symbol18.f90 b/flang/test/Semantics/symbol18.f90 index b7269b70be0a9..93987f6741ed6 100644 --- a/flang/test/Semantics/symbol18.f90 +++ b/flang/test/Semantics/symbol18.f90 @@ -19,3 +19,13 @@ program p1 !REF: /p1/x y = f(x) end program + +!DEF: /f2 (Function) Subprogram REAL(4) +!DEF: /f2/cos EXTERNAL (Function, Implicit) ProcEntity REAL(4) +!DEF: /f2/x (Implicit) ObjectEntity REAL(4) +function f2(cos, x) + !DEF: /f2/f2 (Implicit) ObjectEntity REAL(4) + !REF: /f2/cos + !REF: /f2/x + f2 = cos(x) +end function From 3e89cbf38e76d0d0ac75fe77d318a5cfeac512f5 Mon Sep 17 00:00:00 2001 From: Hiroshi Yamauchi Date: Mon, 3 Aug 2020 11:37:22 -0700 Subject: [PATCH 221/600] [PGO] Enable the extended value profile buckets for mem op sizes. Following up D81682 and enable the new, extended value profile buckets for mem op sizes. Differential Revision: https://reviews.llvm.org/D83903 --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 3ab697d6cc321..75988893fdb85 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -78,7 +78,7 @@ cl::opt UseOldMemOpValueProf( "use-old-memop-value-prof", cl::desc("Use the old memop value profiling buckets. This is " "transitional and to be removed after switching. "), - cl::init(true)); + cl::init(false)); namespace { From 777824b49d5d9e1fbc93108107fa6d12a936a2e4 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 3 Aug 2020 11:55:57 -0700 Subject: [PATCH 222/600] [llvm-jitlink] Add support for static archives and MachO universal archives. Archives can now be specified as input files the same way that object files are. Archives will always be linked after all objects (regardless of the relative order of the inputs) but before any dynamic libraries or process symbols. This patch also relaxes matching for slice triples in StaticLibraryDefinitionGenerator in order to support this feature: Vendors need not match if the source vendor is unknown. --- .../ExecutionEngine/Orc/ExecutionUtils.cpp | 3 ++- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 21 +++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index 4d255cd66c1be..278f492f0ebe7 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -322,7 +322,8 @@ StaticLibraryDefinitionGenerator::Load(ObjectLayer &L, const char *FileName, auto ObjTT = Obj.getTriple(); if (ObjTT.getArch() == TT.getArch() && ObjTT.getSubArch() == TT.getSubArch() && - ObjTT.getVendor() == TT.getVendor()) { + (TT.getVendor() == Triple::UnknownVendor || + ObjTT.getVendor() == TT.getVendor())) { // We found a match. Create an instance from a buffer covering this // slice. auto SliceBuffer = MemoryBuffer::getFileSlice(FileName, Obj.getSize(), diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 798087d8cae7b..d5dc661cc69f7 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -14,6 +14,7 @@ #include "llvm-jitlink.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" #include "llvm/MC/MCAsmInfo.h" @@ -887,13 +888,20 @@ Error loadObjects(Session &S) { InputFileItr != InputFileEnd; ++InputFileItr) { unsigned InputFileArgIdx = InputFiles.getPosition(InputFileItr - InputFiles.begin()); - StringRef InputFile = *InputFileItr; + const std::string &InputFile = *InputFileItr; auto &JD = *std::prev(IdxToJLD.lower_bound(InputFileArgIdx))->second; LLVM_DEBUG(dbgs() << " " << InputFileArgIdx << ": \"" << InputFile << "\" to " << JD.getName() << "\n";); auto ObjBuffer = ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(InputFile))); - ExitOnErr(S.ObjLayer.add(JD, std::move(ObjBuffer))); + + auto Magic = identify_magic(ObjBuffer->getBuffer()); + if (Magic == file_magic::archive || + Magic == file_magic::macho_universal_binary) + JD.addGenerator(ExitOnErr(StaticLibraryDefinitionGenerator::Load( + S.ObjLayer, InputFile.c_str(), S.TT))); + else + ExitOnErr(S.ObjLayer.add(JD, std::move(ObjBuffer))); } // Define absolute symbols. @@ -1056,6 +1064,11 @@ int main(int argc, char *argv[]) { ExitOnErr(sanitizeArguments(*S)); + { + TimeRegion TR(Timers ? &Timers->LoadObjectsTimer : nullptr); + ExitOnErr(loadObjects(*S)); + } + if (!NoProcessSymbols) ExitOnErr(loadProcessSymbols(*S)); ExitOnErr(loadDylibs()); @@ -1063,10 +1076,6 @@ int main(int argc, char *argv[]) { if (PhonyExternals) addPhonyExternalsGenerator(*S); - { - TimeRegion TR(Timers ? &Timers->LoadObjectsTimer : nullptr); - ExitOnErr(loadObjects(*S)); - } if (ShowInitialExecutionSessionState) S->ES.dump(outs()); From dca23ed8952383701a62b778104f4db6f5d4b799 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 3 Aug 2020 12:29:40 -0700 Subject: [PATCH 223/600] [AArch64] Add missing isel patterns for fcvtzs/u intrinsic on v1f64. Fixes test-suite compile failure caused by 8dfb5d7. While I'm in the area, add some more test coverage to related operations, to make sure we aren't missing any other patterns. --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 + llvm/test/CodeGen/AArch64/arm64-vcvt.ll | 183 ++++++++++++++++++ .../AArch64/fp16_intrinsic_scalar_1op.ll | 40 ++++ 3 files changed, 227 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 61a43ed9df67f..39e1ee3ad8c18 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4483,6 +4483,10 @@ def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), (FCVTPSv1i64 FPR64:$Rn)>; def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), (FCVTPUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), + (FCVTZSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), + (FCVTZUv1i64 FPR64:$Rn)>; def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), (FRECPEv1f16 FPR16:$Rn)>; diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll index d236aeaf32a70..9ab7247677070 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll @@ -30,9 +30,19 @@ define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtas_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtas_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtas d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtau_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtau_2s: @@ -61,9 +71,19 @@ define <2 x i64> @fcvtau_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtau_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtau_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtau d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtms_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtms_2s: @@ -92,9 +112,19 @@ define <2 x i64> @fcvtms_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtms_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtms_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtms d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtmu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtmu_2s: @@ -123,9 +153,19 @@ define <2 x i64> @fcvtmu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtmu_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtmu_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtmu d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtps_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtps_2s: @@ -154,9 +194,19 @@ define <2 x i64> @fcvtps_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtps_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtps_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtps d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtpu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtpu_2s: @@ -185,9 +235,19 @@ define <2 x i64> @fcvtpu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtpu_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtpu_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtpu d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtns_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtns_2s: @@ -216,9 +276,19 @@ define <2 x i64> @fcvtns_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtns_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtns_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtns d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtnu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtnu_2s: @@ -247,9 +317,19 @@ define <2 x i64> @fcvtnu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +define <1 x i64> @fcvtnu_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtnu_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtnu d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) nounwind readnone declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtzs_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtzs_2s: @@ -278,6 +358,57 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +; FIXME: Generate "fcvtzs d0, d0"? +define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtzs_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtzs x8, d0 +;CHECK-NEXT: mov d0, x8 +;CHECK-NEXT: ret + %tmp3 = fptosi <1 x double> %A to <1 x i64> + ret <1 x i64> %tmp3 +} + +define <2 x i32> @fcvtzs_2s_intrinsic(<2 x float> %A) nounwind { +;CHECK-LABEL: fcvtzs_2s_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzs.2s v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtzs.v2i32.v2f32(<2 x float> %A) + ret <2 x i32> %tmp3 +} + +define <4 x i32> @fcvtzs_4s_intrinsic(<4 x float> %A) nounwind { +;CHECK-LABEL: fcvtzs_4s_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzs.4s v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float> %A) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @fcvtzs_2d_intrinsic(<2 x double> %A) nounwind { +;CHECK-LABEL: fcvtzs_2d_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzs.2d v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double> %A) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @fcvtzs_1d_intrinsic(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtzs_1d_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzs d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + +declare <2 x i32> @llvm.aarch64.neon.fcvtzs.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>) nounwind readnone define <2 x i32> @fcvtzu_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtzu_2s: @@ -306,6 +437,58 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { ret <2 x i64> %tmp3 } +; FIXME: Generate "fcvtzu d0, d0"? +define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtzu_1d: +;CHECK-NOT: ld1 +;CHECK: fcvtzu x8, d0 +;CHECK-NEXT: mov d0, x8 +;CHECK-NEXT: ret + %tmp3 = fptoui <1 x double> %A to <1 x i64> + ret <1 x i64> %tmp3 +} + +define <2 x i32> @fcvtzu_2s_intrinsic(<2 x float> %A) nounwind { +;CHECK-LABEL: fcvtzu_2s_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzu.2s v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <2 x i32> @llvm.aarch64.neon.fcvtzu.v2i32.v2f32(<2 x float> %A) + ret <2 x i32> %tmp3 +} + +define <4 x i32> @fcvtzu_4s_intrinsic(<4 x float> %A) nounwind { +;CHECK-LABEL: fcvtzu_4s_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzu.4s v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float> %A) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @fcvtzu_2d_intrinsic(<2 x double> %A) nounwind { +;CHECK-LABEL: fcvtzu_2d_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzu.2d v0, v0 +;CHECK-NEXT: ret + %tmp3 = call <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double> %A) + ret <2 x i64> %tmp3 +} + +define <1 x i64> @fcvtzu_1d_intrinsic(<1 x double> %A) nounwind { +;CHECK-LABEL: fcvtzu_1d_intrinsic: +;CHECK-NOT: ld1 +;CHECK: fcvtzu d0, d0 +;CHECK-NEXT: ret + %tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %A) + ret <1 x i64> %tmp3 +} + +declare <2 x i32> @llvm.aarch64.neon.fcvtzu.v2i32.v2f32(<2 x float>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>) nounwind readnone + define <2 x float> @frinta_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: frinta_2s: ;CHECK-NOT: ld1 diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll index c8333b253ec42..ff19e6ac91a73 100644 --- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll +++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll @@ -16,6 +16,10 @@ declare i64 @llvm.aarch64.neon.fcvtau.i64.f16(half) declare i32 @llvm.aarch64.neon.fcvtau.i32.f16(half) declare i64 @llvm.aarch64.neon.fcvtas.i64.f16(half) declare i32 @llvm.aarch64.neon.fcvtas.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half) +declare i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half) +declare i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half) declare half @llvm.aarch64.neon.frsqrte.f16(half) declare half @llvm.aarch64.neon.frecpx.f16(half) declare half @llvm.aarch64.neon.frecpe.f16(half) @@ -138,6 +142,42 @@ entry: ret i64 %0 } +define i32 @fcvtzu_intrinsic_i32(half %a) { +; CHECK-LABEL: fcvtzu_intrinsic_i32: +; CHECK: fcvtzu w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) + ret i32 %fcvt +} + +define i64 @fcvtzu_intrinsic_i64(half %a) { +; CHECK-LABEL: fcvtzu_intrinsic_i64: +; CHECK: fcvtzs x0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a) + ret i64 %fcvt +} + +define i32 @fcvtzs_intrinsic_i32(half %a) { +; CHECK-LABEL: fcvtzs_intrinsic_i32: +; CHECK: fcvtzs w0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) + ret i32 %fcvt +} + +define i64 @fcvtzs_intrinsic_i64(half %a) { +; CHECK-LABEL: fcvtzs_intrinsic_i64: +; CHECK: fcvtzs x0, h0 +; CHECK-NEXT: ret +entry: + %fcvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a) + ret i64 %fcvt +} + define dso_local i16 @t19(half %a) { ; CHECK-LABEL: t19: ; CHECK: fcvtas w0, h0 From 7f1556f292ccfd80c4ffa986d5b849f915e5cd82 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Mon, 3 Aug 2020 14:09:46 -0600 Subject: [PATCH 224/600] Fix typo: s/epomymous/eponymous/ NFC --- llvm/lib/CodeGen/MachineScheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index cec7a0c031eb5..7daaa3526aa3a 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1298,7 +1298,7 @@ void ScheduleDAGMILive::computeDFSResult() { /// The cyclic path estimation identifies a def-use pair that crosses the back /// edge and considers the depth and height of the nodes. For example, consider /// the following instruction sequence where each instruction has unit latency -/// and defines an epomymous virtual register: +/// and defines an eponymous virtual register: /// /// a->b(a,c)->c(b)->d(c)->exit /// From 7209f83112db4dbe15d8328705f9d2aff0624fbd Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 3 Aug 2020 12:46:49 -0700 Subject: [PATCH 225/600] Allow .dSYM's to be directly placed in an alternate directory Once available in the relevant toolchains this will allow us to implement LLVM_EXTERNALIZE_DEBUGINFO_OUTPUT_DIR after D84127 by directly placing the dSYM in the desired location instead of emitting next to the output file and moving it. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D84572 --- clang/include/clang/Driver/Options.td | 3 +++ clang/lib/Driver/Driver.cpp | 12 +++++++++++- clang/test/Driver/darwin-dsymutil.c | 24 ++++++++++++++---------- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 16051934c1e0b..fcb5c030755ed 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -677,6 +677,9 @@ def dependency_dot : Separate<["-"], "dependency-dot">, Flags<[CC1Option]>, HelpText<"Filename to write DOT-formatted header dependencies to">; def module_dependency_dir : Separate<["-"], "module-dependency-dir">, Flags<[CC1Option]>, HelpText<"Directory to dump module dependencies to">; +def dsym_dir : JoinedOrSeparate<["-"], "dsym-dir">, + Flags<[DriverOption, RenderAsInput]>, + HelpText<"Directory to output dSYM's (if any) to">, MetaVarName<"">; def dumpmachine : Flag<["-"], "dumpmachine">; def dumpspecs : Flag<["-"], "dumpspecs">, Flags<[Unsupported]>; def dumpversion : Flag<["-"], "dumpversion">; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 317098e248233..35263fbe1b2d8 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4604,7 +4604,17 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, StringRef BaseName; // Dsymutil actions should use the full path. - if (isa(JA) || isa(JA)) + if (isa(JA) && C.getArgs().hasArg(options::OPT_dsym_dir)) { + SmallString<128> ExternalPath( + C.getArgs().getLastArg(options::OPT_dsym_dir)->getValue()); + // We use posix style here because the tests (specifically + // darwin-dsymutil.c) demonstrate that posix style paths are acceptable + // even on Windows and if we don't then the similar test covering this + // fails. + llvm::sys::path::append(ExternalPath, llvm::sys::path::Style::posix, + llvm::sys::path::filename(BasePath)); + BaseName = ExternalPath; + } else if (isa(JA) || isa(JA)) BaseName = BasePath; else BaseName = llvm::sys::path::filename(BasePath); diff --git a/clang/test/Driver/darwin-dsymutil.c b/clang/test/Driver/darwin-dsymutil.c index 09451a81b797d..8cdb2f3cbf644 100644 --- a/clang/test/Driver/darwin-dsymutil.c +++ b/clang/test/Driver/darwin-dsymutil.c @@ -26,10 +26,21 @@ // // RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ // RUN: -o foo %s -g 2> %t -// RUN: FileCheck -check-prefix=CHECK-OUTPUT-NAME < %t %s +// RUN: FileCheck -Doutfile=foo -Ddsymfile=foo.dSYM \ +// RUN: -check-prefix=CHECK-OUTPUT-NAME < %t %s // -// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Linker", inputs: [{{.*}}], output: "foo" -// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Dsymutil", inputs: ["foo"], output: "foo.dSYM" +// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: -o bar/foo %s -g 2> %t +// RUN: FileCheck -Doutfile=bar/foo -Ddsymfile=bar/foo.dSYM \ +// RUN: -check-prefix=CHECK-OUTPUT-NAME < %t %s +// +// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ +// RUN: -o bar/foo -dsym-dir external %s -g 2> %t +// RUN: FileCheck -Doutfile=bar/foo -Ddsymfile=external/foo.dSYM \ +// RUN: -check-prefix=CHECK-OUTPUT-NAME < %t %s +// +// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Linker", inputs: [{{.*}}], output: "[[outfile]]" +// CHECK-OUTPUT-NAME: "x86_64-apple-darwin10" - "darwin::Dsymutil", inputs: ["[[outfile]]"], output: "[[dsymfile]]" // Check that we only use dsymutil when needed. // @@ -38,12 +49,5 @@ // RUN: -o foo %t.o -g 2> %t // RUN: not grep "Dsymutil" %t -// Check that we put the .dSYM in the right place. -// RUN: %clang -target x86_64-apple-darwin10 -ccc-print-bindings \ -// RUN: -o bar/foo %s -g 2> %t -// RUN: FileCheck -check-prefix=CHECK-LOCATION < %t %s - -// CHECK-LOCATION: "x86_64-apple-darwin10" - "darwin::Dsymutil", inputs: ["bar/foo"], output: "bar/foo.dSYM" - // Check that we don't crash when translating arguments for dsymutil. // RUN: %clang -m32 -arch x86_64 -g %s -### From 41b1e97b12c1407e40d8e5081bf1f9cf183934b0 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 3 Aug 2020 13:25:25 -0700 Subject: [PATCH 226/600] [CodeGen][ObjC] Mark calls to objc_unsafeClaimAutoreleasedReturnValue as notail on x86-64 This is needed because the epilogue code inserted before tail calls on x86-64 breaks the handshake between the caller and callee. Calls to objc_retainAutoreleasedReturnValue used to have the same problem, which was fixed in https://reviews.llvm.org/D59656. rdar://problem/66029552 Differential Revision: https://reviews.llvm.org/D84540 --- clang/lib/CodeGen/CGObjC.cpp | 14 +++++++---- clang/lib/CodeGen/TargetInfo.cpp | 6 ++--- clang/lib/CodeGen/TargetInfo.h | 8 +++---- clang/test/CodeGenObjC/arc-unsafeclaim.m | 30 ++++++++++++++---------- 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index cd2b84f5dd203..26dfb6259a290 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -2250,8 +2250,7 @@ llvm::Value * CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); llvm::CallInst::TailCallKind tailKind = - CGM.getTargetCodeGenInfo() - .shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() + CGM.getTargetCodeGenInfo().markARCOptimizedReturnCallsAsNoTail() ? llvm::CallInst::TCK_NoTail : llvm::CallInst::TCK_None; return emitARCValueOperation( @@ -2270,9 +2269,14 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { llvm::Value * CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); - return emitARCValueOperation(*this, value, nullptr, - CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue, - llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue); + llvm::CallInst::TailCallKind tailKind = + CGM.getTargetCodeGenInfo().markARCOptimizedReturnCallsAsNoTail() + ? llvm::CallInst::TCK_NoTail + : llvm::CallInst::TCK_None; + return emitARCValueOperation( + *this, value, nullptr, + CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue, + llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue, tailKind); } /// Release the given object. diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 9cd63ebe29ee0..f31d432eb3171 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -2404,10 +2404,8 @@ class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { } /// Disable tail call on x86-64. The epilogue code before the tail jump blocks - /// the autoreleaseRV/retainRV optimization. - bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const override { - return true; - } + /// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations. + bool markARCOptimizedReturnCallsAsNoTail() const override { return true; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 1152cabce4a0d..0df9667e91e16 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -163,11 +163,9 @@ class TargetCodeGenInfo { return ""; } - /// Determine whether a call to objc_retainAutoreleasedReturnValue should be - /// marked as 'notail'. - virtual bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const { - return false; - } + /// Determine whether a call to objc_retainAutoreleasedReturnValue or + /// objc_unsafeClaimAutoreleasedReturnValue should be marked as 'notail'. + virtual bool markARCOptimizedReturnCallsAsNoTail() const { return false; } /// Return a constant used by UBSan as a signature to identify functions /// possessing type information, or 0 if the platform is unsupported. diff --git a/clang/test/CodeGenObjC/arc-unsafeclaim.m b/clang/test/CodeGenObjC/arc-unsafeclaim.m index a8011e024180d..40f1f164455a7 100644 --- a/clang/test/CodeGenObjC/arc-unsafeclaim.m +++ b/clang/test/CodeGenObjC/arc-unsafeclaim.m @@ -1,16 +1,16 @@ // Make sure it works on x86-64. -// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -fobjc-runtime=macosx-10.11 -fobjc-arc -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNOPTIMIZED +// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -fobjc-runtime=macosx-10.11 -fobjc-arc -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNOPTIMIZED -check-prefix=NOTAIL-CALL // Make sure it works on x86-32. -// RUN: %clang_cc1 -triple i386-apple-darwin11 -fobjc-runtime=macosx-fragile-10.11 -fobjc-arc -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNOPTIMIZED -check-prefix=CHECK-MARKED +// RUN: %clang_cc1 -triple i386-apple-darwin11 -fobjc-runtime=macosx-fragile-10.11 -fobjc-arc -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNOPTIMIZED -check-prefix=CHECK-MARKED -check-prefix=CALL // Make sure it works on ARM. -// RUN: %clang_cc1 -triple arm64-apple-ios9 -fobjc-runtime=ios-9.0 -fobjc-arc -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNOPTIMIZED -check-prefix=CHECK-MARKED -// RUN: %clang_cc1 -triple arm64-apple-ios9 -fobjc-runtime=ios-9.0 -fobjc-arc -O -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPTIMIZED +// RUN: %clang_cc1 -triple arm64-apple-ios9 -fobjc-runtime=ios-9.0 -fobjc-arc -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNOPTIMIZED -check-prefix=CHECK-MARKED -check-prefix=CALL +// RUN: %clang_cc1 -triple arm64-apple-ios9 -fobjc-runtime=ios-9.0 -fobjc-arc -O -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPTIMIZED -check-prefix=CALL // Make sure it works on ARM64. -// RUN: %clang_cc1 -triple armv7-apple-ios9 -fobjc-runtime=ios-9.0 -fobjc-arc -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNOPTIMIZED -check-prefix=CHECK-MARKED -// RUN: %clang_cc1 -triple armv7-apple-ios9 -fobjc-runtime=ios-9.0 -fobjc-arc -O -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPTIMIZED +// RUN: %clang_cc1 -triple armv7-apple-ios9 -fobjc-runtime=ios-9.0 -fobjc-arc -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-UNOPTIMIZED -check-prefix=CHECK-MARKED -check-prefix=CALL +// RUN: %clang_cc1 -triple armv7-apple-ios9 -fobjc-runtime=ios-9.0 -fobjc-arc -O -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPTIMIZED -check-prefix=CALL // Make sure that it's implicitly disabled if the runtime version isn't high enough. // RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-10.10 -fobjc-arc -emit-llvm -o - %s | FileCheck %s -check-prefix=DISABLED @@ -29,7 +29,8 @@ void test_assign() { // CHECK: [[T0:%.*]] = call [[A:.*]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// NOTAIL-CALL-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// CALL-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8* // CHECK-NEXT: store i8* [[T4]], i8** [[X]] @@ -53,7 +54,8 @@ void test_assign_assign() { // CHECK: [[T0:%.*]] = call [[A]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// NOTAIL-CALL-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// CALL-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8* // CHECK-NEXT: store i8* [[T4]], i8** [[Y]] @@ -126,7 +128,8 @@ void test_init() { // CHECK: [[T0:%.*]] = call [[A]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// NOTAIL-CALL-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// CALL-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8* // CHECK-NEXT: store i8* [[T4]], i8** [[X]] @@ -144,7 +147,8 @@ void test_init_assignment() { // CHECK: [[T0:%.*]] = call [[A]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// NOTAIL-CALL-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// CALL-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: [[T4:%.*]] = bitcast [[A]]* [[T3]] to i8* // CHECK-NEXT: store i8* [[T4]], i8** [[X]] @@ -212,7 +216,8 @@ void test_ignored() { // CHECK: [[T0:%.*]] = call [[A]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// NOTAIL-CALL-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// CALL-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: ret void @@ -223,7 +228,8 @@ void test_cast_to_void() { // CHECK: [[T0:%.*]] = call [[A]]* @makeA() // CHECK-MARKED-NEXT: call void asm sideeffect // CHECK-NEXT: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8* -// CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// NOTAIL-CALL-NEXT: [[T2:%.*]] = notail call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) +// CALL-NEXT: [[T2:%.*]] = call i8* @llvm.objc.unsafeClaimAutoreleasedReturnValue(i8* [[T1]]) // CHECK-NEXT: bitcast i8* [[T2]] to [[A]]* // CHECK-NEXT: ret void From 11bb7c220ccdff1ffec4780ff92fb5acec8f6f0b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 3 Aug 2020 13:35:59 -0700 Subject: [PATCH 227/600] [MC] Set sh_link to 0 if the associated symbol is undefined Part of https://bugs.llvm.org/show_bug.cgi?id=41734 LTO can drop externally available definitions. Such AssociatedSymbol is not associated with a symbol. ELFWriter::writeSection() will assert. Allow a SHF_LINK_ORDER section to have sh_link=0. We need to give sh_link a syntax, a literal zero in the linked-to symbol position, e.g. `.section name,"ao",@progbits,0` Reviewed By: pcc Differential Revision: https://reviews.llvm.org/D72899 --- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 2 +- llvm/lib/MC/ELFObjectWriter.cpp | 8 +++++-- llvm/lib/MC/MCParser/ELFAsmParser.cpp | 8 ++++++- llvm/lib/MC/MCSectionELF.cpp | 6 +++-- .../CodeGen/X86/elf-associated-discarded.ll | 23 +++++++++++++++++++ llvm/test/CodeGen/X86/elf-associated.ll | 6 ++--- llvm/test/MC/ELF/section-linkorder.s | 8 +++++++ 7 files changed, 52 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/X86/elf-associated-discarded.ll create mode 100644 llvm/test/MC/ELF/section-linkorder.s diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 48599199626ce..8ef91250423f3 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -680,7 +680,7 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( // MD_associated in a unique section. unsigned UniqueID = MCContext::GenericSectionID; const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM); - if (LinkedToSym) { + if (GO->getMetadata(LLVMContext::MD_associated)) { UniqueID = NextUniqueID++; Flags |= ELF::SHF_LINK_ORDER; } else { diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 5a5692c0cb636..b44a36b9713ef 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -1024,9 +1024,13 @@ void ELFWriter::writeSection(const SectionIndexMapTy &SectionIndexMap, } if (Section.getFlags() & ELF::SHF_LINK_ORDER) { + // If the value in the associated metadata is not a definition, Sym will be + // undefined. Represent this with sh_link=0. const MCSymbol *Sym = Section.getLinkedToSymbol(); - const MCSectionELF *Sec = cast(&Sym->getSection()); - sh_link = SectionIndexMap.lookup(Sec); + if (Sym && Sym->isInSection()) { + const MCSectionELF *Sec = cast(&Sym->getSection()); + sh_link = SectionIndexMap.lookup(Sec); + } } WriteSecHdrEntry(StrTabBuilder.getOffset(Section.getName()), diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index e5ab13bc719d4..41779d023a5df 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -450,8 +450,14 @@ bool ELFAsmParser::parseLinkedToSym(MCSymbolELF *&LinkedToSym) { Lex(); StringRef Name; SMLoc StartLoc = L.getLoc(); - if (getParser().parseIdentifier(Name)) + if (getParser().parseIdentifier(Name)) { + if (getParser().getTok().getString() == "0") { + getParser().Lex(); + LinkedToSym = nullptr; + return false; + } return TokError("invalid linked-to symbol"); + } LinkedToSym = dyn_cast_or_null(getContext().lookupSymbol(Name)); if (!LinkedToSym || !LinkedToSym->isInSection()) return Error(StartLoc, "linked-to symbol is not in a section: " + Name); diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp index 77c259c27a04e..7a15556182658 100644 --- a/llvm/lib/MC/MCSectionELF.cpp +++ b/llvm/lib/MC/MCSectionELF.cpp @@ -172,9 +172,11 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, } if (Flags & ELF::SHF_LINK_ORDER) { - assert(LinkedToSym); OS << ","; - printName(OS, LinkedToSym->getName()); + if (LinkedToSym) + printName(OS, LinkedToSym->getName()); + else + OS << '0'; } if (isUnique()) diff --git a/llvm/test/CodeGen/X86/elf-associated-discarded.ll b/llvm/test/CodeGen/X86/elf-associated-discarded.ll new file mode 100644 index 0000000000000..5a4fad4ebb7d9 --- /dev/null +++ b/llvm/test/CodeGen/X86/elf-associated-discarded.ll @@ -0,0 +1,23 @@ +;; Test that we keep SHF_LINK_ORDER but reset sh_link to 0 if the associated +;; symbol is not defined. +; RUN: llc -mtriple=x86_64 -data-sections=1 < %s | FileCheck %s +; RUN: llc -filetype=obj -mtriple=x86_64 -data-sections=1 < %s | llvm-readelf -S - | FileCheck --check-prefix=SEC %s + +;; FIXME The assembly output cannot be assembled because foo is not defined. +;; This is difficult to fix because we allow loops (see elf-associated.ll +;; .data.c and .data.d). +; CHECK: .section .data.a,"awo",@progbits,foo +; CHECK: .section .data.b,"awo",@progbits,foo + +;; No 'L' (SHF_LINK_ORDER). sh_link=0. +; SEC; Name {{.*}} Flg Lk Inf +; SEC: .data.a {{.*}} WAL 0 0 +; SEC: .data.b {{.*}} WAL 0 0 + +;; The definition may be discarded by LTO. +declare void @foo() + +@a = global i32 1, !associated !0 +@b = global i32 1, !associated !0 + +!0 = !{void ()* @foo} diff --git a/llvm/test/CodeGen/X86/elf-associated.ll b/llvm/test/CodeGen/X86/elf-associated.ll index e0e9e00582892..14a4b5b85b0cf 100644 --- a/llvm/test/CodeGen/X86/elf-associated.ll +++ b/llvm/test/CodeGen/X86/elf-associated.ll @@ -36,15 +36,15 @@ ; Non-GlobalValue metadata. @l = global i32 1, section "ccc", !associated !5 !5 = !{i32* null} -; CHECK-DAG: .section ccc,"aw",@progbits +; CHECK-DAG: .section ccc,"awo",@progbits,0,unique,3 ; Null metadata. @m = global i32 1, section "ddd", !associated !6 !6 = distinct !{null} -; CHECK-DAG: .section ddd,"aw",@progbits +; CHECK-DAG: .section ddd,"awo",@progbits,0,unique,4 ; Aliases are OK. @n = alias i32, i32* inttoptr (i64 add (i64 ptrtoint (i32* @a to i64), i64 1297036692682702848) to i32*) @o = global i32 1, section "eee", !associated !7 !7 = !{i32* @n} -; CHECK-DAG: .section eee,"awo",@progbits,n,unique,3 +; CHECK-DAG: .section eee,"awo",@progbits,n,unique,5 diff --git a/llvm/test/MC/ELF/section-linkorder.s b/llvm/test/MC/ELF/section-linkorder.s new file mode 100644 index 0000000000000..a0f6357e52cb1 --- /dev/null +++ b/llvm/test/MC/ELF/section-linkorder.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc -triple x86_64 %s | FileCheck %s --check-prefix=ASM +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-readelf -S %t | FileCheck %s + +# ASM: .section .linkorder,"ao",@progbits,0 +# CHECK: Name Type {{.*}} Flg Lk +# CHECK: .linkorder PROGBITS {{.*}} AL 0 +.section .linkorder,"ao",@progbits,0 From 21de4e74acf603f02f886a9e6030945f077bca3f Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 3 Aug 2020 21:46:07 +0100 Subject: [PATCH 228/600] [ARM] Test for converting VPSEL to VMOVT. NFC --- .../test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll new file mode 100644 index 0000000000000..054499795ff7a --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -verify-machineinstrs -o - | FileCheck %s + +define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture %pResult, i32* nocapture %pIndex) { +; CHECK-LABEL: arm_min_helium_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r6, r7, lr} +; CHECK-NEXT: push {r4, r6, r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: mov r12, r1 +; CHECK-NEXT: vidup.u32 q2, r6, #1 +; CHECK-NEXT: cmp r1, #4 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge.w r12, #4 +; CHECK-NEXT: sub.w r6, r1, r12 +; CHECK-NEXT: adds r6, #3 +; CHECK-NEXT: mov.w lr, #1 +; CHECK-NEXT: adr r4, .LCPI0_0 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: add.w lr, lr, r6, lsr #2 +; CHECK-NEXT: vldrw.u32 q1, [r4] +; CHECK-NEXT: vmov.i32 q3, #0x4 +; CHECK-NEXT: mov r12, r1 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB0_1: @ %do.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 +; CHECK-NEXT: vcmpt.f32 ge, q1, q4 +; CHECK-NEXT: vpsel q0, q2, q0 +; CHECK-NEXT: vpsel q1, q4, q1 +; CHECK-NEXT: vadd.i32 q2, q2, q3 +; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %do.end +; CHECK-NEXT: vldr s8, .LCPI0_1 +; CHECK-NEXT: vdup.32 q3, r1 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vminnmv.f32 r0, q1 +; CHECK-NEXT: vcmp.f32 le, q1, r0 +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vpsel q0, q0, q3 +; CHECK-NEXT: vminv.u32 r1, q0 +; CHECK-NEXT: str r1, [r3] +; CHECK-NEXT: vstr s8, [r2] +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r6, r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long 0x5368d4a5 @ float 9.99999995E+11 +entry: + %0 = tail call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 0, i32 1) + %1 = extractvalue { <4 x i32>, i32 } %0, 0 + br label %do.body + +do.body: ; preds = %do.body, %entry + %curExtremValVec.0 = phi <4 x float> [ , %entry ], [ %8, %do.body ] + %indexVec.0 = phi <4 x i32> [ %1, %entry ], [ %11, %do.body ] + %2 = phi <4 x float> [ zeroinitializer, %entry ], [ %10, %do.body ] + %blkCnt.0 = phi i32 [ %blockSize, %entry ], [ %sub, %do.body ] + %pSrc.addr.0 = phi float* [ %pSrc, %entry ], [ %add.ptr, %do.body ] + %3 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) + %4 = bitcast float* %pSrc.addr.0 to <4 x float>* + %5 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %4, i32 4, <4 x i1> %3, <4 x float> zeroinitializer) + %6 = fcmp fast ole <4 x float> %5, %curExtremValVec.0 + %7 = and <4 x i1> %6, %3 + %8 = select fast <4 x i1> %7, <4 x float> %5, <4 x float> %curExtremValVec.0 + %9 = bitcast <4 x i32> %indexVec.0 to <4 x float> + %10 = select fast <4 x i1> %7, <4 x float> %9, <4 x float> %2 + %11 = add <4 x i32> %indexVec.0, + %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %sub = add nsw i32 %blkCnt.0, -4 + %cmp = icmp sgt i32 %blkCnt.0, 4 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + %12 = bitcast <4 x float> %10 to <4 x i32> + %13 = tail call fast float @llvm.arm.mve.minnmv.f32.v4f32(float 0x426D1A94A0000000, <4 x float> %8) + %.splatinsert = insertelement <4 x float> undef, float %13, i32 0 + %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + %14 = fcmp fast ole <4 x float> %8, %.splat + %.splatinsert1 = insertelement <4 x i32> undef, i32 %blockSize, i32 0 + %.splat2 = shufflevector <4 x i32> %.splatinsert1, <4 x i32> undef, <4 x i32> zeroinitializer + %15 = select <4 x i1> %14, <4 x i32> %12, <4 x i32> %.splat2 + %16 = tail call i32 @llvm.arm.mve.minv.v4i32(i32 %blockSize, <4 x i32> %15, i32 1) + store i32 %16, i32* %pIndex, align 4 + store float %13, float* %pResult, align 4 + ret void +} + +declare { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32, i32) #1 +declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 +declare float @llvm.arm.mve.minnmv.f32.v4f32(float, <4 x float>) #1 +declare i32 @llvm.arm.mve.minv.v4i32(i32, <4 x i32>, i32) #1 From 66e7dce714fabd3ddb1aed635e4b826476d4f1a2 Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Mon, 3 Aug 2020 13:48:30 -0700 Subject: [PATCH 229/600] Revert "[X86][SSE] Shuffle combine blends to OR(X,Y) if the relevant elements are known zero." This reverts commit 219f32f4b68679563443cdaae7b8174c9976409a. Commit contains unsigned compasions that break bots that build with -Wsign-compare. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 65 +++++-------------- llvm/test/CodeGen/X86/insertelement-ones.ll | 12 ++-- llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll | 8 +-- .../test/CodeGen/X86/vector-shuffle-128-v8.ll | 5 +- .../CodeGen/X86/vector-shuffle-256-v32.ll | 6 +- 5 files changed, 35 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b2bfcc2698f4d..e9bb50aacec0e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7401,8 +7401,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other // is a valid shuffle index. - SDValue N0 = peekThroughBitcasts(N.getOperand(0)); - SDValue N1 = peekThroughBitcasts(N.getOperand(1)); + SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); + SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1)); if (!N0.getValueType().isVector() || !N1.getValueType().isVector()) return false; SmallVector SrcMask0, SrcMask1; @@ -7413,24 +7413,34 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, true)) return false; + // Shuffle inputs must be the same size as the result. + if (llvm::any_of(SrcInputs0, [VT](SDValue Op) { + return VT.getSizeInBits() != Op.getValueSizeInBits(); + })) + return false; + if (llvm::any_of(SrcInputs1, [VT](SDValue Op) { + return VT.getSizeInBits() != Op.getValueSizeInBits(); + })) + return false; + size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0); narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1); - for (int i = 0; i != (int)MaskSize; ++i) { + for (size_t i = 0; i != MaskSize; ++i) { if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef) Mask.push_back(SM_SentinelUndef); else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) Mask.push_back(SM_SentinelZero); else if (Mask1[i] == SM_SentinelZero) - Mask.push_back(i); + Mask.push_back(Mask0[i]); else if (Mask0[i] == SM_SentinelZero) - Mask.push_back(i + MaskSize); + Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size())); else return false; } - Ops.push_back(N0); - Ops.push_back(N1); + Ops.append(SrcInputs0.begin(), SrcInputs0.end()); + Ops.append(SrcInputs1.begin(), SrcInputs1.end()); return true; } case ISD::INSERT_SUBVECTOR: { @@ -34209,7 +34219,6 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, bool IsUnary) { - unsigned NumMaskElts = Mask.size(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { @@ -34267,46 +34276,6 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, } } - // Attempt to match against a OR if we're performing a blend shuffle and the - // non-blended source element is zero in each case. - if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && - (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) { - bool IsBlend = true; - unsigned NumV1Elts = V1.getValueType().getVectorNumElements(); - unsigned NumV2Elts = V2.getValueType().getVectorNumElements(); - unsigned Scale1 = NumV1Elts / NumMaskElts; - unsigned Scale2 = NumV2Elts / NumMaskElts; - APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts); - APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts); - for (unsigned i = 0; i != NumMaskElts; ++i) { - int M = Mask[i]; - if (M == SM_SentinelUndef) - continue; - if (M == SM_SentinelZero) { - DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); - DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); - continue; - } - if (M == i) { - DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); - continue; - } - if (M == (i + NumMaskElts)) { - DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); - continue; - } - IsBlend = false; - break; - } - if (IsBlend && - DAG.computeKnownBits(V1, DemandedZeroV1).isZero() && - DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) { - Shuffle = ISD::OR; - SrcVT = DstVT = EVT(MaskVT).changeTypeToInteger().getSimpleVT(); - return true; - } - } - return false; } diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index 6a9a401264c56..3d8e42b9c07d0 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -389,9 +389,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] +; SSE2-NEXT: pand %xmm5, %xmm1 ; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: pandn %xmm3, %xmm5 +; SSE2-NEXT: por %xmm5, %xmm1 ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: por %xmm4, %xmm1 ; SSE2-NEXT: retq @@ -409,9 +411,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; SSE3-NEXT: movdqa %xmm3, %xmm4 ; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; SSE3-NEXT: por %xmm4, %xmm0 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] +; SSE3-NEXT: pand %xmm5, %xmm1 ; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE3-NEXT: por %xmm3, %xmm1 +; SSE3-NEXT: pandn %xmm3, %xmm5 +; SSE3-NEXT: por %xmm5, %xmm1 ; SSE3-NEXT: pand %xmm2, %xmm1 ; SSE3-NEXT: por %xmm4, %xmm1 ; SSE3-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll index 9256a43f8e339..6b49f22f21f1f 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll @@ -1314,10 +1314,10 @@ define void @trunc_v4i64_to_v4i8(<32 x i8>* %L, <4 x i8>* %S) nounwind { define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind { ; AVX1-LABEL: negative: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14] -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[u,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,zero,zero,zero,zero,zero,zero,xmm0[0,2,4,6,8,10,12,14] +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index 86423ce76065b..f448f41cf522e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1713,8 +1713,9 @@ define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { ; ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,u,u] -; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u] +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u] +; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v8i16_XX4X8acX: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index e5285aebda69e..82df05e5ae068 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -3358,9 +3358,9 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3] ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u] -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u] +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255] ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 From cb327922101b28ea70ec68d7f026da0e5e388eed Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 3 Aug 2020 13:54:00 -0700 Subject: [PATCH 230/600] [WebAssembly] Implement prototype v128.load{32,64}_zero instructions Specified in https://github.com/WebAssembly/simd/pull/237, these instructions load the first vector lane from memory and zero the other lanes. Since these instructions are not officially part of the SIMD proposal, they are only available on an opt-in basis via LLVM intrinsics and clang builtin functions. If these instructions are merged to the proposal, this implementation will change so that the instructions will be generated from normal IR. At that point the intrinsics and builtin functions would be removed. This PR also changes the opcodes for the experimental f32x4.qfm{a,s} instructions because their opcodes conflicted with those of the v128.load{32,64}_zero instructions. The new opcodes were chosen to match those used in V8. Differential Revision: https://reviews.llvm.org/D84820 --- .../clang/Basic/BuiltinsWebAssembly.def | 3 + clang/lib/CodeGen/CGBuiltin.cpp | 10 + clang/test/CodeGen/builtins-wasm.c | 12 + llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 14 ++ .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 2 + .../WebAssembly/WebAssemblyISelLowering.cpp | 9 + .../WebAssembly/WebAssemblyInstrMemory.td | 2 +- .../WebAssembly/WebAssemblyInstrSIMD.td | 50 +++- .../WebAssembly/simd-load-zero-offset.ll | 228 ++++++++++++++++++ llvm/test/MC/WebAssembly/simd-encodings.s | 16 +- 10 files changed, 334 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index d0f40f991a4c7..39f29740cf56d 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -169,5 +169,8 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128 TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4ii*", "nU", "simd128") +TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLi*", "nU", "simd128") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 042b41a09f198..2ef164b8b65ab 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -16497,6 +16497,16 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); return Builder.CreateCall(Callee, {Low, High}); } + case WebAssembly::BI__builtin_wasm_load32_zero: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero); + return Builder.CreateCall(Callee, {Ptr}); + } + case WebAssembly::BI__builtin_wasm_load64_zero: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load64_zero); + return Builder.CreateCall(Callee, {Ptr}); + } case WebAssembly::BI__builtin_wasm_shuffle_v8x16: { Value *Ops[18]; size_t OpIdx = 0; diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 0f66fceef4cca..14e0d0ac65ed2 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -737,6 +737,18 @@ i16x8 narrow_u_i16x8_i32x4(i32x4 low, i32x4 high) { // WEBASSEMBLY: ret } +i32x4 load32_zero(int *p) { + return __builtin_wasm_load32_zero(p); + // WEBASSEMBLY: call <4 x i32> @llvm.wasm.load32.zero(i32* %p) + // WEBASSEMBLY: ret +} + +i64x2 load64_zero(long long *p) { + return __builtin_wasm_load64_zero(p); + // WEBASSEMBLY: call <2 x i64> @llvm.wasm.load64.zero(i64* %p) + // WEBASSEMBLY: ret +} + i8x16 swizzle_v8x16(i8x16 x, i8x16 y) { return __builtin_wasm_swizzle_v8x16(x, y); // WEBASSEMBLY: call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %x, <16 x i8> %y) diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 9cc9f9eb6f187..627a579ae5065 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -190,6 +190,20 @@ def int_wasm_nearest : [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; +// TODO: Replace these intrinsic with normal ISel patterns once the +// load_zero instructions are merged to the proposal. +def int_wasm_load32_zero : + Intrinsic<[llvm_v4i32_ty], + [LLVMPointerType], + [IntrReadMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; + +def int_wasm_load64_zero : + Intrinsic<[llvm_v2i64_ty], + [LLVMPointerType], + [IntrReadMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; + //===----------------------------------------------------------------------===// // Thread-local storage intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 02b310628ee17..631e96dd9246b 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -232,6 +232,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(ATOMIC_NOTIFY) WASM_LOAD_STORE(ATOMIC_WAIT_I32) WASM_LOAD_STORE(LOAD_SPLAT_v32x4) + WASM_LOAD_STORE(LOAD_ZERO_v4i32) return 2; WASM_LOAD_STORE(LOAD_I64) WASM_LOAD_STORE(LOAD_F64) @@ -254,6 +255,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(LOAD_EXTEND_U_v4i32) WASM_LOAD_STORE(LOAD_EXTEND_S_v2i64) WASM_LOAD_STORE(LOAD_EXTEND_U_v2i64) + WASM_LOAD_STORE(LOAD_ZERO_v2i64) return 3; WASM_LOAD_STORE(LOAD_V128) WASM_LOAD_STORE(STORE_V128) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index cdfbfe388abbf..5d377e8aa9bd1 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -675,6 +675,15 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = Align(8); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; + case Intrinsic::wasm_load32_zero: + case Intrinsic::wasm_load64_zero: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8); + Info.flags = MachineMemOperand::MOLoad; + return true; default: return false; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index b3c63cc1f884b..48b934457267e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -70,7 +70,7 @@ defm LOAD_F64 : WebAssemblyLoad; multiclass LoadPatNoOffset { def : Pat<(ty (kind I32:$addr)), (!cast(inst # "_A32") 0, 0, I32:$addr)>, Requires<[HasAddr32]>; - def : Pat<(ty (kind I64:$addr)), (!cast(inst # "_A64") 0, 0, I64:$addr)>, + def : Pat<(ty (kind (i64 I64:$addr))), (!cast(inst # "_A64") 0, 0, I64:$addr)>, Requires<[HasAddr64]>; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 9bbccecffaa09..cd088751bc8a9 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -163,6 +163,43 @@ defm : LoadPatGlobalAddrOffOnly(exts[0]#types[1]), "LOAD_EXTEND"#exts[1]#"_"#types[0]>; } +// Load lane into zero vector +multiclass SIMDLoadZero simdop> { + let mayLoad = 1, UseNamedOperandTable = 1 in { + defm LOAD_ZERO_#vec_t#_A32 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + name#"\t$dst, ${off}(${addr})$p2align", + name#"\t$off$p2align", simdop>; + defm LOAD_ZERO_#vec_t#_A64 : + SIMD_I<(outs V128:$dst), + (ins P2Align:$p2align, offset64_op:$off, I64:$addr), + (outs), (ins P2Align:$p2align, offset64_op:$off), [], + name#"\t$dst, ${off}(${addr})$p2align", + name#"\t$off$p2align", simdop>; + } // mayLoad = 1, UseNamedOperandTable = 1 +} + +// TODO: Also support v4f32 and v2f64 once the instructions are merged +// to the proposal +defm "" : SIMDLoadZero; +defm "" : SIMDLoadZero; + +defm : LoadPatNoOffset; +defm : LoadPatNoOffset; + +defm : LoadPatImmOff; +defm : LoadPatImmOff; + +defm : LoadPatImmOff; +defm : LoadPatImmOff; + +defm : LoadPatOffsetOnly; +defm : LoadPatOffsetOnly; + +defm : LoadPatGlobalAddrOffOnly; +defm : LoadPatGlobalAddrOffOnly; // Store: v128.store let mayStore = 1, UseNamedOperandTable = 1 in { @@ -800,7 +837,7 @@ let isCommutable = 1 in defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))], "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", - 180>; + 186>; //===----------------------------------------------------------------------===// // Floating-point unary arithmetic @@ -1038,20 +1075,21 @@ def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; // Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) //===----------------------------------------------------------------------===// -multiclass SIMDQFM baseInst> { +multiclass SIMDQFM simdopA, + bits<32> simdopS> { defm QFMA_#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec_t V128:$dst), (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], - vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>; + vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", simdopA>; defm QFMS_#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), [(set (vec_t V128:$dst), (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], - vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; + vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", simdopS>; } -defm "" : SIMDQFM; -defm "" : SIMDQFM; +defm "" : SIMDQFM; +defm "" : SIMDQFM; diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll new file mode 100644 index 0000000000000..ab3643653deb4 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s + +; Test SIMD v128.load{32,64}_zero instructions + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare <4 x i32> @llvm.wasm.load32.zero(i32*) +declare <2 x i64> @llvm.wasm.load64.zero(i64*) + +;===---------------------------------------------------------------------------- +; v128.load32_zero +;===---------------------------------------------------------------------------- + +define <4 x i32> @load_zero_i32_no_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_no_offset: +; CHECK: .functype load_zero_i32_no_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 0 +; CHECK-NEXT: # fallthrough-return + %v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p) + ret <4 x i32> %v +} + +define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_offset: +; CHECK: .functype load_zero_i32_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 24 +; CHECK-NEXT: # fallthrough-return + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_folded_gep_offset: +; CHECK: .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 24 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr inbounds i32, i32* %p, i32 6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_gep_negative_offset: +; CHECK: .functype load_zero_i32_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -24 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load32_zero 0 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr inbounds i32, i32* %p, i32 -6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_offset: +; CHECK: .functype load_zero_i32_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 24 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load32_zero 0 +; CHECK-NEXT: # fallthrough-return + %q = ptrtoint i32* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_with_unfolded_gep_offset(i32* %p) { +; CHECK-LABEL: load_zero_i32_with_unfolded_gep_offset: +; CHECK: .functype load_zero_i32_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 24 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load32_zero 0 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr i32, i32* %p, i32 6 + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +define <4 x i32> @load_zero_i32_from_numeric_address() { +; CHECK-LABEL: load_zero_i32_from_numeric_address: +; CHECK: .functype load_zero_i32_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load32_zero 42 +; CHECK-NEXT: # fallthrough-return + %s = inttoptr i32 42 to i32* + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + ret <4 x i32> %t +} + +@gv_i32 = global i32 0 +define <4 x i32> @load_zero_i32_from_global_address() { +; CHECK-LABEL: load_zero_i32_from_global_address: +; CHECK: .functype load_zero_i32_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load32_zero gv_i32 +; CHECK-NEXT: # fallthrough-return + %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* @gv_i32) + ret <4 x i32> %t +} + +;===---------------------------------------------------------------------------- +; v128.load64_zero +;===---------------------------------------------------------------------------- + +define <2 x i64> @load_zero_i64_no_offset(i64* %p) { +; CHECK-LABEL: load_zero_i64_no_offset: +; CHECK: .functype load_zero_i64_no_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load64_zero 0 +; CHECK-NEXT: # fallthrough-return + %v = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %p) + ret <2 x i64> %v +} + +define <2 x i64> @load_zero_i64_with_folded_offset(i64* %p) { +; CHECK-LABEL: load_zero_i64_with_folded_offset: +; CHECK: .functype load_zero_i64_with_folded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load64_zero 24 +; CHECK-NEXT: # fallthrough-return + %q = ptrtoint i64* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + ret <2 x i64> %t +} + +define <2 x i64> @load_zero_i64_with_folded_gep_offset(i64* %p) { +; CHECK-LABEL: load_zero_i64_with_folded_gep_offset: +; CHECK: .functype load_zero_i64_with_folded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load64_zero 48 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr inbounds i64, i64* %p, i64 6 + %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + ret <2 x i64> %t +} + +define <2 x i64> @load_zero_i64_with_unfolded_gep_negative_offset(i64* %p) { +; CHECK-LABEL: load_zero_i64_with_unfolded_gep_negative_offset: +; CHECK: .functype load_zero_i64_with_unfolded_gep_negative_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -48 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load64_zero 0 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr inbounds i64, i64* %p, i64 -6 + %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + ret <2 x i64> %t +} + +define <2 x i64> @load_zero_i64_with_unfolded_offset(i64* %p) { +; CHECK-LABEL: load_zero_i64_with_unfolded_offset: +; CHECK: .functype load_zero_i64_with_unfolded_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 24 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load64_zero 0 +; CHECK-NEXT: # fallthrough-return + %q = ptrtoint i64* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + ret <2 x i64> %t +} + +define <2 x i64> @load_zero_i64_with_unfolded_gep_offset(i64* %p) { +; CHECK-LABEL: load_zero_i64_with_unfolded_gep_offset: +; CHECK: .functype load_zero_i64_with_unfolded_gep_offset (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 48 +; CHECK-NEXT: i32.add +; CHECK-NEXT: v128.load64_zero 0 +; CHECK-NEXT: # fallthrough-return + %s = getelementptr i64, i64* %p, i64 6 + %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + ret <2 x i64> %t +} + +define <2 x i64> @load_zero_i64_from_numeric_address() { +; CHECK-LABEL: load_zero_i64_from_numeric_address: +; CHECK: .functype load_zero_i64_from_numeric_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load64_zero 42 +; CHECK-NEXT: # fallthrough-return + %s = inttoptr i32 42 to i64* + %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + ret <2 x i64> %t +} + +@gv_i64 = global i64 0 +define <2 x i64> @load_zero_i64_from_global_address() { +; CHECK-LABEL: load_zero_i64_from_global_address: +; CHECK: .functype load_zero_i64_from_global_address () -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: v128.load64_zero gv_i64 +; CHECK-NEXT: # fallthrough-return + %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* @gv_i64) + ret <2 x i64> %t +} diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 5818588380a1f..bb101caefeb6f 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -463,9 +463,6 @@ main: # CHECK: i32x4.sub # encoding: [0xfd,0xb1,0x01] i32x4.sub - # CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xb4,0x01] - i32x4.dot_i16x8_s - # CHECK: i32x4.mul # encoding: [0xfd,0xb5,0x01] i32x4.mul @@ -481,6 +478,9 @@ main: # CHECK: i32x4.max_u # encoding: [0xfd,0xb9,0x01] i32x4.max_u + # CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xba,0x01] + i32x4.dot_i16x8_s + # CHECK: i64x2.neg # encoding: [0xfd,0xc1,0x01] i64x2.neg @@ -610,10 +610,16 @@ main: # CHECK: f32x4.convert_i32x4_u # encoding: [0xfd,0xfb,0x01] f32x4.convert_i32x4_u - # CHECK: f32x4.qfma # encoding: [0xfd,0xfc,0x01] + # CHECK: v128.load32_zero 32 # encoding: [0xfd,0xfc,0x01,0x02,0x20] + v128.load32_zero 32 + + # CHECK: v128.load64_zero 32 # encoding: [0xfd,0xfd,0x01,0x03,0x20] + v128.load64_zero 32 + + # CHECK: f32x4.qfma # encoding: [0xfd,0xb4,0x01] f32x4.qfma - # CHECK: f32x4.qfms # encoding: [0xfd,0xfd,0x01] + # CHECK: f32x4.qfms # encoding: [0xfd,0xd4,0x01] f32x4.qfms # CHECK: f64x2.qfma # encoding: [0xfd,0xfe,0x01] From 22916481c11e1d46132752086290a668e62fc9ce Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 3 Aug 2020 22:03:14 +0100 Subject: [PATCH 231/600] [ARM] Convert VPSEL to VMOV in tail predicated loops VPSEL has slightly different semantics under tail predication (it can end up selecting from Qn, Qm and Qd). We do not model that at the moment so they block tail predicated loops from being formed. This just converts them into a predicated VMOV instead (via a VORR), allowing tail predication to happen whilst still modelling the original behaviour of the input. Differential Revision: https://reviews.llvm.org/D85110 --- .../Target/ARM/MVEVPTOptimisationsPass.cpp | 43 +++++++++- .../cond-vector-reduce-mve-codegen.ll | 6 +- .../test/CodeGen/Thumb2/mve-pred-selectop3.ll | 80 +++++++++++-------- .../test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll | 22 ++--- llvm/test/CodeGen/Thumb2/mve-vctp.ll | 21 ++--- 5 files changed, 110 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp index 382ddd4572c74..8dbb8b53c8905 100644 --- a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp @@ -57,6 +57,7 @@ class MVEVPTOptimisations : public MachineFunctionPass { Register Target); bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB); bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB); + bool ConvertVPSEL(MachineBasicBlock &MBB); }; char MVEVPTOptimisations::ID = 0; @@ -356,7 +357,7 @@ bool MVEVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) { } for (MachineInstr *DeadInstruction : DeadInstructions) - DeadInstruction->removeFromParent(); + DeadInstruction->eraseFromParent(); return Modified; } @@ -430,7 +431,44 @@ bool MVEVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { } for (MachineInstr *DeadInstruction : DeadInstructions) - DeadInstruction->removeFromParent(); + DeadInstruction->eraseFromParent(); + + return !DeadInstructions.empty(); +} + +// Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a +// somewhat blunt approximation to allow tail predicated with vpsel +// instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly +// different semantics under tail predication. Until that is modelled we just +// convert to a VMOVT (via a predicated VORR) instead. +bool MVEVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) { + bool HasVCTP = false; + SmallVector DeadInstructions; + + for (MachineInstr &MI : MBB.instrs()) { + if (isVCTP(&MI)) { + HasVCTP = true; + continue; + } + + if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL) + continue; + + MachineInstrBuilder MIBuilder = + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(1)) + .addImm(ARMVCC::Then) + .add(MI.getOperand(4)) + .add(MI.getOperand(2)); + LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump(); + dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump()); + DeadInstructions.push_back(&MI); + } + + for (MachineInstr *DeadInstruction : DeadInstructions) + DeadInstruction->eraseFromParent(); return !DeadInstructions.empty(); } @@ -452,6 +490,7 @@ bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { for (MachineBasicBlock &MBB : Fn) { Modified |= ReplaceVCMPsByVPNOTs(MBB); Modified |= ReduceOldVCCRValueUses(MBB); + Modified |= ConvertVPSEL(MBB); } LLVM_DEBUG(dbgs() << "**************************************\n"); diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index eff56a041ee27..d1151f29a9b20 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -23,14 +23,14 @@ define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: and r4, r12, #15 ; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill -; CHECK-NEXT: vdup.32 q3, r4 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q1, [r2], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 -; CHECK-NEXT: vcmp.i32 eq, q3, zr +; CHECK-NEXT: vdup.32 q3, r4 +; CHECK-NEXT: vpt.i32 eq, q3, zr +; CHECK-NEXT: vmovt q1, q2 ; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: vpsel q1, q2, q1 ; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll b/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll index df92d30da6af4..bf43a4956a539 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-selectop3.ll @@ -1739,9 +1739,10 @@ define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i ; CHECK-LABEL: icmp_slt_v4i32_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.32 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.s32 gt, q1, q0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) @@ -1755,9 +1756,10 @@ define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i ; CHECK-LABEL: icmp_slt_v8i16_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.16 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.s16 gt, q1, q0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) @@ -1771,9 +1773,10 @@ define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i ; CHECK-LABEL: icmp_slt_v16i8_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.8 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.s8 gt, q1, q0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) @@ -1787,9 +1790,10 @@ define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i ; CHECK-LABEL: icmp_sgt_v4i32_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.32 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.s32 gt, q0, q1 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) @@ -1803,9 +1807,10 @@ define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i ; CHECK-LABEL: icmp_sgt_v8i16_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.16 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.s16 gt, q0, q1 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) @@ -1819,9 +1824,10 @@ define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i ; CHECK-LABEL: icmp_sgt_v16i8_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.8 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.s8 gt, q0, q1 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) @@ -1835,9 +1841,10 @@ define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_y(<4 x i32> %x, <4 x i32> %y, i ; CHECK-LABEL: icmp_ult_v4i32_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.32 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.u32 hi, q1, q0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) @@ -1851,9 +1858,10 @@ define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_y(<8 x i16> %x, <8 x i16> %y, i ; CHECK-LABEL: icmp_ult_v8i16_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.16 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.u16 hi, q1, q0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) @@ -1867,9 +1875,10 @@ define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_y(<16 x i8> %x, <16 x i8> %y, i ; CHECK-LABEL: icmp_ult_v16i8_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.8 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.u8 hi, q1, q0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) @@ -1883,9 +1892,10 @@ define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i ; CHECK-LABEL: icmp_ugt_v4i32_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.32 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.u32 hi, q0, q1 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) @@ -1899,9 +1909,10 @@ define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i ; CHECK-LABEL: icmp_ugt_v8i16_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.16 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.u16 hi, q0, q1 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) @@ -1915,9 +1926,10 @@ define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i ; CHECK-LABEL: icmp_ugt_v16i8_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.8 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.u8 hi, q0, q1 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) @@ -1931,9 +1943,10 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x f ; CHECK-LABEL: fcmp_fast_olt_v4f32_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.32 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.f32 gt, q1, q0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) @@ -1947,9 +1960,10 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x hal ; CHECK-LABEL: fcmp_fast_olt_v8f16_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.16 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.f16 gt, q1, q0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) @@ -1963,9 +1977,10 @@ define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x f ; CHECK-LABEL: fcmp_fast_ogt_v4f32_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.32 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.f32 gt, q0, q1 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) @@ -1979,9 +1994,10 @@ define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x hal ; CHECK-LABEL: fcmp_fast_ogt_v8f16_y: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vctp.16 r0 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vcmpt.f16 gt, q0, q1 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vmovt q1, q0 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll index 054499795ff7a..311a06a675771 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -9,32 +9,22 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture % ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: movs r6, #0 -; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: vidup.u32 q2, r6, #1 -; CHECK-NEXT: cmp r1, #4 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge.w r12, #4 -; CHECK-NEXT: sub.w r6, r1, r12 -; CHECK-NEXT: adds r6, #3 -; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: adr r4, .LCPI0_0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: add.w lr, lr, r6, lsr #2 ; CHECK-NEXT: vldrw.u32 q1, [r4] ; CHECK-NEXT: vmov.i32 q3, #0x4 ; CHECK-NEXT: mov r12, r1 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r12 -; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vldrw.u32 q4, [r0], #16 +; CHECK-NEXT: vcmp.f32 ge, q1, q4 ; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 -; CHECK-NEXT: vcmpt.f32 ge, q1, q4 -; CHECK-NEXT: vpsel q0, q2, q0 -; CHECK-NEXT: vpsel q1, q4, q1 +; CHECK-NEXT: vmovt q1, q4 +; CHECK-NEXT: vmovt q0, q2 ; CHECK-NEXT: vadd.i32 q2, q2, q3 -; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %do.end ; CHECK-NEXT: vldr s8, .LCPI0_1 ; CHECK-NEXT: vdup.32 q3, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-vctp.ll b/llvm/test/CodeGen/Thumb2/mve-vctp.ll index 67bc161e02c64..8cddbc79e2e13 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vctp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vctp.ll @@ -4,10 +4,11 @@ define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) { ; CHECK-LABEL: vctp8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vctp.8 r0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: vctp.8 r0 +; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q1 ; CHECK-NEXT: vstrw.32 q0, [r2] ; CHECK-NEXT: bx lr %pred = call <16 x i1> @llvm.arm.mve.vctp8(i32 %arg) @@ -20,10 +21,11 @@ define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) { define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) { ; CHECK-LABEL: vctp16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vctp.16 r0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: vctp.16 r0 +; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q1 ; CHECK-NEXT: vstrw.32 q0, [r2] ; CHECK-NEXT: bx lr %pred = call <8 x i1> @llvm.arm.mve.vctp16(i32 %arg) @@ -36,10 +38,11 @@ define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) { define void @vctp32(i32 %arg, <4 x i32> *%in, <4 x i32>* %out) { ; CHECK-LABEL: vctp32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vctp.32 r0 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: vctp.32 r0 +; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vpst +; CHECK-NEXT: vmovt q0, q1 ; CHECK-NEXT: vstrw.32 q0, [r2] ; CHECK-NEXT: bx lr %pred = call <4 x i1> @llvm.arm.mve.vctp32(i32 %arg) From 9a05fa10bd05525adedb6117351333699a3d4ae2 Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Mon, 3 Aug 2020 13:55:27 -0700 Subject: [PATCH 232/600] [HWASan] [GlobalISel] Add +tagged-globals backend feature for GlobalISel GlobalISel is the default ISel for aarch64 at -O0. Prior to D78465, GlobalISel didn't have support for dealing with address-of-global lowerings, so it fell back to SelectionDAGISel. HWASan Globals require special handling, as they contain the pointer tag in the top 16-bits, and are thus outside the code model. We need to generate a `movk` in the instruction sequence with a G3 relocation to ensure the bits are relocated properly. This is implemented in SelectionDAGISel, this patch does the same for GlobalISel. GlobalISel and SelectionDAGISel differ in their lowering sequence, so there are differences in the final instruction sequence, explained in `tagged-globals.ll`. Both of these implementations are correct, but GlobalISel is slightly larger code size / slightly slower (by a couple of arithmetic instructions). I don't see this as a problem for now as GlobalISel is only on by default at `-O0`. Reviewed By: aemerson, arsenm Differential Revision: https://reviews.llvm.org/D82615 --- .../hwasan/TestCases/exported-tagged-global.c | 16 +++ .../AArch64/GISel/AArch64LegalizerInfo.cpp | 23 ++++- llvm/test/CodeGen/AArch64/tagged-globals.ll | 98 ++++++++++++++++--- 3 files changed, 124 insertions(+), 13 deletions(-) create mode 100644 compiler-rt/test/hwasan/TestCases/exported-tagged-global.c diff --git a/compiler-rt/test/hwasan/TestCases/exported-tagged-global.c b/compiler-rt/test/hwasan/TestCases/exported-tagged-global.c new file mode 100644 index 0000000000000..198d8781f68af --- /dev/null +++ b/compiler-rt/test/hwasan/TestCases/exported-tagged-global.c @@ -0,0 +1,16 @@ +// RUN: %clang_hwasan %s -o %t +// RUN: %run %t +// RUN: %clang_hwasan -O1 %s -o %t +// RUN: %run %t +// RUN: %clang_hwasan -O1 -mllvm --aarch64-enable-global-isel-at-O=1 %s -o %t +// RUN: %run %t + +static int global; + +__attribute__((optnone)) int *address_of_global() { return &global; } + +int main(int argc, char **argv) { + int *global_address = address_of_global(); + *global_address = 13; + return 0; +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 6e5563af43638..063c451440dc1 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -351,7 +351,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) if (DstSize == 128 && !Query.Types[0].isVector()) return false; // Extending to a scalar s128 needs narrowing. - + // Make sure that we have something that will fit in a register, and // make sure it's a power of 2. if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) @@ -676,6 +676,27 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI, // Set the regclass on the dest reg too. MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); + // MO_TAGGED on the page indicates a tagged address. Set the tag now. We do so + // by creating a MOVK that sets bits 48-63 of the register to (global address + // + 0x100000000 - PC) >> 48. The additional 0x100000000 offset here is to + // prevent an incorrect tag being generated during relocation when the the + // global appears before the code section. Without the offset, a global at + // `0x0f00'0000'0000'1000` (i.e. at `0x1000` with tag `0xf`) that's referenced + // by code at `0x2000` would result in `0x0f00'0000'0000'1000 - 0x2000 = + // 0x0eff'ffff'ffff'f000`, meaning the tag would be incorrectly set to `0xe` + // instead of `0xf`. + // This assumes that we're in the small code model so we can assume a binary + // size of <= 4GB, which makes the untagged PC relative offset positive. The + // binary must also be loaded into address range [0, 2^48). Both of these + // properties need to be ensured at runtime when using tagged addresses. + if (OpFlags & AArch64II::MO_TAGGED) { + ADRP = MIRBuilder.buildInstr(AArch64::MOVKXi, {LLT::pointer(0, 64)}, {ADRP}) + .addGlobalAddress(GV, 0x100000000, + AArch64II::MO_PREL | AArch64II::MO_G3) + .addImm(48); + MRI.setRegClass(ADRP.getReg(0), &AArch64::GPR64RegClass); + } + MIRBuilder.buildInstr(AArch64::G_ADD_LOW, {DstReg}, {ADRP}) .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); diff --git a/llvm/test/CodeGen/AArch64/tagged-globals.ll b/llvm/test/CodeGen/AArch64/tagged-globals.ll index b0cf882584c6f..cdca6c7a1fba1 100644 --- a/llvm/test/CodeGen/AArch64/tagged-globals.ll +++ b/llvm/test/CodeGen/AArch64/tagged-globals.ll @@ -1,31 +1,105 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc --relocation-model=static < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK-STATIC,CHECK-SELECTIONDAGISEL +; RUN: llc --relocation-model=pic < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-PIC + +; Ensure that GlobalISel lowers correctly. GlobalISel is the default ISel for +; -O0 on aarch64. GlobalISel lowers the instruction sequence in the static +; relocation model different to SelectionDAGISel. GlobalISel does the lowering +; of AddLow *after* legalization, and thus doesn't differentiate between +; address-taken-only vs. address-taken-for-loadstore. Hence, we generate a movk +; instruction for load/store instructions as well with GlobalISel. GlobalISel +; also doesn't have the scaffolding to correctly check the bounds of the global +; offset, and cannot fold the lo12 bits into the load/store. Neither of these +; things are a problem as GlobalISel is only used by default at -O0, so we don't +; mind the code size and performance increase. + +; RUN: llc --aarch64-enable-global-isel-at-O=0 -O0 < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK-STATIC,CHECK-GLOBALISEL +; RUN: llc --aarch64-enable-global-isel-at-O=0 -O0 --relocation-model=pic < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-PIC target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-android" -@global = external hidden global i32 +@global = external global i32 declare void @func() define i32* @global_addr() #0 { - ; CHECK: global_addr: - ; CHECK: adrp x0, :pg_hi21_nc:global - ; CHECK: movk x0, #:prel_g3:global+4294967296 - ; CHECK: add x0, x0, :lo12:global + ; Static relocation model has common codegen between SelectionDAGISel and + ; GlobalISel when the address-taken of a global isn't folded into a load or + ; store instruction. + ; CHECK-STATIC: global_addr: + ; CHECK-STATIC: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-STATIC: movk [[REG]], #:prel_g3:global+4294967296 + ; CHECK-STATIC: add x0, [[REG]], :lo12:global + ; CHECK-STATIC: ret + + ; CHECK-PIC: global_addr: + ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global + ; CHECK-PIC: ldr x0, {{\[}}[[REG]], :got_lo12:global] + ; CHECK-PIC: ret + ret i32* @global } define i32 @global_load() #0 { - ; CHECK: global_load: - ; CHECK: adrp x8, :pg_hi21_nc:global - ; CHECK: ldr w0, [x8, :lo12:global] + ; CHECK-SELECTIONDAGISEL: global_load: + ; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-SELECTIONDAGISEL: ldr w0, {{\[}}[[REG]], :lo12:global{{\]}} + ; CHECK-SELECTIONDAGISEL: ret + + ; CHECK-GLOBALISEL: global_load: + ; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296 + ; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global + ; CHECK-GLOBALISEL: ldr w0, {{\[}}[[REG]]{{\]}} + ; CHECK-GLOBALISEL: ret + + ; CHECK-PIC: global_load: + ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global + ; CHECK-PIC: ldr [[REG]], {{\[}}[[REG]], :got_lo12:global] + ; CHECK-PIC: ldr w0, {{\[}}[[REG]]{{\]}} + ; CHECK-PIC: ret + %load = load i32, i32* @global ret i32 %load } +define void @global_store() #0 { + ; CHECK-SELECTIONDAGISEL: global_store: + ; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-SELECTIONDAGISEL: str wzr, {{\[}}[[REG]], :lo12:global{{\]}} + ; CHECK-SELECTIONDAGISEL: ret + + ; CHECK-GLOBALISEL: global_store: + ; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global + ; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296 + ; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global + ; CHECK-GLOBALISEL: str wzr, {{\[}}[[REG]]{{\]}} + ; CHECK-GLOBALISEL: ret + + ; CHECK-PIC: global_store: + ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global + ; CHECK-PIC: ldr [[REG]], {{\[}}[[REG]], :got_lo12:global] + ; CHECK-PIC: str wzr, {{\[}}[[REG]]{{\]}} + ; CHECK-PIC: ret + + store i32 0, i32* @global + ret void +} + define void ()* @func_addr() #0 { - ; CHECK: func_addr: - ; CHECK: adrp x0, func - ; CHECK: add x0, x0, :lo12:func + ; CHECK-STATIC: func_addr: + ; CHECK-STATIC: adrp [[REG:x[0-9]+]], func + ; CHECK-STATIC: add x0, [[REG]], :lo12:func + ; CHECK-STATIC: ret + + ; CHECK-PIC: func_addr: + ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:func + ; CHECK-PIC: ldr x0, {{\[}}[[REG]], :got_lo12:func] + ; CHECK-PIC: ret + ret void ()* @func } From 8c39e70679e93da3af9f881d314940c570d5d822 Mon Sep 17 00:00:00 2001 From: River Riddle Date: Mon, 3 Aug 2020 14:20:50 -0700 Subject: [PATCH 233/600] [mlir][OpFormatGen] Add support for eliding UnitAttr when used to anchor an optional group Unit attributes are given meaning by their existence, and thus have no meaningful value beyond "is it present". As such, in the format of an operation unit attributes are generally used to guard the printing of other elements and aren't generally printed themselves; as the presence of the group when parsing means that the unit attribute should be added. This revision adds support to the declarative format for eliding unit attributes in situations where they anchor an optional group, but aren't the first element. For example, ``` let assemblyFormat = "(`is_optional` $unit_attr^)? attr-dict"; ``` would print `foo.op is_optional` when $unit_attr is present, instead of the current `foo.op is_optional unit`. Differential Revision: https://reviews.llvm.org/D84577 --- mlir/docs/OpDefinitions.md | 32 +++++++++++++++++++- mlir/test/lib/Dialect/Test/TestOps.td | 11 +++++++ mlir/test/mlir-tblgen/op-format.mlir | 14 +++++++++ mlir/tools/mlir-tblgen/OpFormatGen.cpp | 42 +++++++++++++++++++++++--- 4 files changed, 93 insertions(+), 6 deletions(-) diff --git a/mlir/docs/OpDefinitions.md b/mlir/docs/OpDefinitions.md index c068aac09babe..0997f29727433 100644 --- a/mlir/docs/OpDefinitions.md +++ b/mlir/docs/OpDefinitions.md @@ -733,7 +733,7 @@ information. An optional group is defined by wrapping a set of elements within An example of an operation with an optional group is `std.return`, which has a variadic number of operands. -``` +```tablegen def ReturnOp : ... { let arguments = (ins Variadic:$operands); @@ -743,6 +743,36 @@ def ReturnOp : ... { } ``` +##### Unit Attributes + +In MLIR, the [`unit` Attribute](LangRef.md#unit-attribute) is special in that it +only has one possible value, i.e. it derives meaning from its existence. When a +unit attribute is used to anchor an optional group and is not the first element +of the group, the presence of the unit attribute can be directly correlated with +the presence of the optional group itself. As such, in these situations the unit +attribute will not be printed or present in the output and will be automatically +inferred when parsing by the presence of the optional group itself. + +For example, the following operation: + +```tablegen +def FooOp : ... { + let arguments = (ins UnitAttr:$is_read_only); + + let assemblyFormat = "attr-dict (`is_read_only` $is_read_only^)?"; +} +``` + +would be formatted as such: + +```mlir +// When the unit attribute is present: +foo.op is_read_only + +// When the unit attribute is not present: +foo.op +``` + #### Requirements The format specification has a certain set of requirements that must be adhered diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 0c26f8a719c09..742033b130bc3 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -1391,6 +1391,17 @@ def FormatInferVariadicTypeFromNonVariadic let assemblyFormat = "$operands attr-dict `:` type($result)"; } +def FormatOptionalUnitAttr : TEST_Op<"format_optional_unit_attribute"> { + let arguments = (ins UnitAttr:$is_optional); + let assemblyFormat = "(`is_optional` $is_optional^)? attr-dict"; +} + +def FormatOptionalUnitAttrNoElide + : TEST_Op<"format_optional_unit_attribute_no_elide"> { + let arguments = (ins UnitAttr:$is_optional); + let assemblyFormat = "($is_optional^)? attr-dict"; +} + //===----------------------------------------------------------------------===// // AllTypesMatch type inference //===----------------------------------------------------------------------===// diff --git a/mlir/test/mlir-tblgen/op-format.mlir b/mlir/test/mlir-tblgen/op-format.mlir index af5976b227068..959bbdc5c6bb5 100644 --- a/mlir/test/mlir-tblgen/op-format.mlir +++ b/mlir/test/mlir-tblgen/op-format.mlir @@ -82,6 +82,20 @@ test.format_operand_e_op %i64, %memref : i64, memref<1xf64> }) { arg_names = ["i", "j", "k"] } : () -> () +//===----------------------------------------------------------------------===// +// Format optional attributes +//===----------------------------------------------------------------------===// + +// CHECK: test.format_optional_unit_attribute is_optional +test.format_optional_unit_attribute is_optional + +// CHECK: test.format_optional_unit_attribute +// CHECK-NOT: is_optional +test.format_optional_unit_attribute + +// CHECK: test.format_optional_unit_attribute_no_elide unit +test.format_optional_unit_attribute_no_elide unit + //===----------------------------------------------------------------------===// // Format optional operands and results //===----------------------------------------------------------------------===// diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 13f2a2fd96dc9..edf804578aa02 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -107,6 +107,11 @@ struct AttributeVariable Optional attrType = var->attr.getValueType(); return attrType ? attrType->getBuilderCall() : llvm::None; } + + /// Return if this attribute refers to a UnitAttr. + bool isUnitAttr() const { + return var->attr.getBaseAttr().getAttrDefName() == "UnitAttr"; + } }; /// This class represents a variable that refers to an operand argument. @@ -645,9 +650,23 @@ static void genElementParser(Element *element, OpMethodBody &body, body << " if (!" << opVar->getVar()->name << "Operands.empty()) {\n"; } + // If the anchor is a unit attribute, we don't need to print it. When + // parsing, we will add this attribute if this group is present. + Element *elidedAnchorElement = nullptr; + auto *anchorAttr = dyn_cast(optional->getAnchor()); + if (anchorAttr && anchorAttr != firstElement && anchorAttr->isUnitAttr()) { + elidedAnchorElement = anchorAttr; + + // Add the anchor unit attribute to the operation state. + body << " result.addAttribute(\"" << anchorAttr->getVar()->name + << "\", parser.getBuilder().getUnitAttr());\n"; + } + // Generate the rest of the elements normally. - for (auto &childElement : llvm::drop_begin(elements, 1)) - genElementParser(&childElement, body, attrTypeCtx); + for (Element &childElement : llvm::drop_begin(elements, 1)) { + if (&childElement != elidedAnchorElement) + genElementParser(&childElement, body, attrTypeCtx); + } body << " }\n"; /// Literals. @@ -1058,10 +1077,23 @@ static void genElementPrinter(Element *element, OpMethodBody &body, << cast(anchor)->getVar()->name << "\")) {\n"; } + // If the anchor is a unit attribute, we don't need to print it. When + // parsing, we will add this attribute if this group is present. + auto elements = optional->getElements(); + Element *elidedAnchorElement = nullptr; + auto *anchorAttr = dyn_cast(anchor); + if (anchorAttr && anchorAttr != &*elements.begin() && + anchorAttr->isUnitAttr()) { + elidedAnchorElement = anchorAttr; + } + // Emit each of the elements. - for (Element &childElement : optional->getElements()) - genElementPrinter(&childElement, body, fmt, op, shouldEmitSpace, - lastWasPunctuation); + for (Element &childElement : elements) { + if (&childElement != elidedAnchorElement) { + genElementPrinter(&childElement, body, fmt, op, shouldEmitSpace, + lastWasPunctuation); + } + } body << " }\n"; return; } From 1ce82015f6d06f8026357e4faa925f900136b575 Mon Sep 17 00:00:00 2001 From: Alina Sbirlea Date: Thu, 25 Jun 2020 16:50:15 -0700 Subject: [PATCH 234/600] [MemorySSA] Restrict optimizations after a PhiTranslation. Merging alias results from different paths, when a path did phi translation is not necesarily correct. Conservatively terminate such paths. Aimed to fix PR46156. Differential Revision: https://reviews.llvm.org/D84905 --- llvm/include/llvm/Analysis/MemorySSA.h | 6 +- llvm/lib/Analysis/MemorySSA.cpp | 27 ++++-- .../Analysis/MemorySSA/phi-translation.ll | 82 +++++++++++++++++++ 3 files changed, 109 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 5ce2b3fd047f7..3ec09e8c0a45e 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -1214,6 +1214,8 @@ class upward_defs_iterator BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); } + bool performedPhiTranslation() const { return PerformedPhiTranslation; } + private: void fillInCurrentPair() { CurrentPair.first = *DefIterator; @@ -1226,6 +1228,7 @@ class upward_defs_iterator false)) { if (Translator.getAddr() != Location.Ptr) { CurrentPair.second = Location.getWithNewPtr(Translator.getAddr()); + PerformedPhiTranslation = true; return; } } else { @@ -1240,8 +1243,9 @@ class upward_defs_iterator memoryaccess_def_iterator DefIterator; MemoryLocation Location; MemoryAccess *OriginalAccess = nullptr; - bool WalkingPhi = false; DominatorTree *DT = nullptr; + bool WalkingPhi = false; + bool PerformedPhiTranslation = false; }; inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair, diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index f2f5fd70f4718..6fc827ae2b17c 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -519,9 +519,16 @@ template class ClobberWalker { UpwardsMemoryQuery *Query; unsigned *UpwardWalkLimit; - // Phi optimization bookkeeping + // Phi optimization bookkeeping: + // List of DefPath to process during the current phi optimization walk. SmallVector Paths; + // List of visited pairs; we can skip paths already + // visited with the same memory location. DenseSet VisitedPhis; + // Record if phi translation has been performed during the current phi + // optimization walk, as merging alias results after phi translation can + // yield incorrect results. Context in PR46156. + bool PerformedPhiTranslation = false; /// Find the nearest def or phi that `From` can legally be optimized to. const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { @@ -596,12 +603,13 @@ template class ClobberWalker { void addSearches(MemoryPhi *Phi, SmallVectorImpl &PausedSearches, ListIndex PriorNode) { - auto UpwardDefs = make_range( - upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT), upward_defs_end()); + auto UpwardDefsBegin = upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT); + auto UpwardDefs = make_range(UpwardDefsBegin, upward_defs_end()); for (const MemoryAccessPair &P : UpwardDefs) { PausedSearches.push_back(Paths.size()); Paths.emplace_back(P.second, P.first, PriorNode); } + PerformedPhiTranslation |= UpwardDefsBegin.performedPhiTranslation(); } /// Represents a search that terminated after finding a clobber. This clobber @@ -651,8 +659,16 @@ template class ClobberWalker { // - We still cache things for A, so C only needs to walk up a bit. // If this behavior becomes problematic, we can fix without a ton of extra // work. - if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) + if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) { + if (PerformedPhiTranslation) { + // If visiting this path performed Phi translation, don't continue, + // since it may not be correct to merge results from two paths if one + // relies on the phi translation. + TerminatedPath Term{Node.Last, PathIndex}; + return Term; + } continue; + } const MemoryAccess *SkipStopWhere = nullptr; if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) { @@ -765,7 +781,7 @@ template class ClobberWalker { /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, const MemoryLocation &Loc) { - assert(Paths.empty() && VisitedPhis.empty() && + assert(Paths.empty() && VisitedPhis.empty() && !PerformedPhiTranslation && "Reset the optimization state."); Paths.emplace_back(Loc, Start, Phi, None); @@ -921,6 +937,7 @@ template class ClobberWalker { void resetPhiOptznState() { Paths.clear(); VisitedPhis.clear(); + PerformedPhiTranslation = false; } public: diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll index c39ccd31d24b3..93ebc86210d1f 100644 --- a/llvm/test/Analysis/MemorySSA/phi-translation.ll +++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll @@ -287,3 +287,85 @@ the.end: ; preds = %for.main.body ret void } + + +@c = local_unnamed_addr global [2 x i16] zeroinitializer, align 2 + +define i32 @dont_merge_noalias_simple(i32* noalias %ptr) { +; CHECK-LABEL: define i32 @dont_merge_noalias_simple +; CHECK-LABEL: entry: +; CHECK: ; 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i16 1, i16* %s1.ptr, align 2 + +; CHECK-LABEL: %for.body +; CHECK: ; MemoryUse(4) MayAlias +; CHECK-NEXT: %lv = load i16, i16* %arrayidx, align 2 + +entry: + %s1.ptr = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 0 + store i16 1, i16* %s1.ptr, align 2 + br label %for.body + +for.body: ; preds = %for.body, %entry + %storemerge2 = phi i32 [ 1, %entry ], [ %dec, %for.body ] + %idxprom1 = zext i32 %storemerge2 to i64 + %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 %idxprom1 + %lv = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %lv to i32 + store i32 %conv, i32* %ptr, align 4 + %dec = add nsw i32 %storemerge2, -1 + %cmp = icmp sgt i32 %storemerge2, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %s2.ptr = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 0 + store i16 0, i16* %s2.ptr, align 2 + ret i32 0 +} + + +define i32 @dont_merge_noalias_complex(i32* noalias %ptr, i32* noalias %another) { +; CHECK-LABEL: define i32 @dont_merge_noalias_complex +; CHECK-LABEL: entry: +; CHECK: ; 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i16 1, i16* %s1.ptr, align 2 + +; CHECK-LABEL: %for.body +; CHECK: ; MemoryUse(7) MayAlias +; CHECK-NEXT: %lv = load i16, i16* %arrayidx, align 2 + +entry: + %s1.ptr = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 0 + store i16 1, i16* %s1.ptr, align 2 + br label %for.body + +for.body: ; preds = %for.body, %entry + %storemerge2 = phi i32 [ 1, %entry ], [ %dec, %merge.body ] + %idxprom1 = zext i32 %storemerge2 to i64 + %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 %idxprom1 + %lv = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %lv to i32 + store i32 %conv, i32* %ptr, align 4 + %dec = add nsw i32 %storemerge2, -1 + + %cmpif = icmp sgt i32 %storemerge2, 1 + br i1 %cmpif, label %if.body, label %else.body + +if.body: + store i32 %conv, i32* %another, align 4 + br label %merge.body + +else.body: + store i32 %conv, i32* %another, align 4 + br label %merge.body + +merge.body: + %cmp = icmp sgt i32 %storemerge2, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %s2.ptr = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 0 + store i16 0, i16* %s2.ptr, align 2 + ret i32 0 +} + From 089adc339e7dd6924d7e107fa362d9d12642f6e0 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 3 Aug 2020 10:50:42 -0700 Subject: [PATCH 235/600] [flang] Make preprocessing behavior tests runnable as regression tests And fix a minor bug exposed by doing so. Differential Revision: https://reviews.llvm.org/D85164 --- flang/lib/Parser/preprocessor.cpp | 2 +- flang/lib/Parser/preprocessor.h | 2 +- flang/lib/Parser/prescan.cpp | 30 +++++++++++++++-------- flang/test/Preprocessing/lit.local.cfg.py | 7 ------ flang/test/Preprocessing/pp001.F | 6 +++-- flang/test/Preprocessing/pp002.F | 6 +++-- flang/test/Preprocessing/pp003.F | 6 +++-- flang/test/Preprocessing/pp004.F | 6 +++-- flang/test/Preprocessing/pp005.F | 6 +++-- flang/test/Preprocessing/pp006.F | 6 +++-- flang/test/Preprocessing/pp007.F | 6 +++-- flang/test/Preprocessing/pp008.F | 6 +++-- flang/test/Preprocessing/pp009.F | 6 +++-- flang/test/Preprocessing/pp010.F | 6 +++-- flang/test/Preprocessing/pp011.F | 6 +++-- flang/test/Preprocessing/pp012.F | 6 +++-- flang/test/Preprocessing/pp013.F | 6 +++-- flang/test/Preprocessing/pp014.F | 6 +++-- flang/test/Preprocessing/pp015.F | 6 +++-- flang/test/Preprocessing/pp016.F | 6 +++-- flang/test/Preprocessing/pp017.F | 6 +++-- flang/test/Preprocessing/pp018.F | 6 +++-- flang/test/Preprocessing/pp019.F | 6 +++-- flang/test/Preprocessing/pp020.F | 6 +++-- flang/test/Preprocessing/pp021.F | 7 ++++-- flang/test/Preprocessing/pp022.F | 7 ++++-- flang/test/Preprocessing/pp023.F | 7 ++++-- flang/test/Preprocessing/pp024.F | 7 ++++-- flang/test/Preprocessing/pp025.F | 6 +++-- flang/test/Preprocessing/pp026.F | 6 +++-- flang/test/Preprocessing/pp027.F | 7 ++++-- flang/test/Preprocessing/pp028.F | 6 +++-- flang/test/Preprocessing/pp029.F | 6 +++-- flang/test/Preprocessing/pp030.F | 6 +++-- flang/test/Preprocessing/pp031.F | 5 +++- flang/test/Preprocessing/pp032.F | 7 ++++-- flang/test/Preprocessing/pp033.F | 7 ++++-- flang/test/Preprocessing/pp034.F | 7 ++++-- flang/test/Preprocessing/pp035.F | 7 ++++-- flang/test/Preprocessing/pp036.F | 7 ++++-- flang/test/Preprocessing/pp037.F | 7 ++++-- flang/test/Preprocessing/pp038.F | 6 +++-- flang/test/Preprocessing/pp039.F | 8 ++++-- flang/test/Preprocessing/pp040.F | 4 ++- flang/test/Preprocessing/pp041.F | 6 +++-- flang/test/Preprocessing/pp042.F | 6 +++-- flang/test/Preprocessing/pp043.F | 6 +++-- flang/test/Preprocessing/pp044.F | 6 +++-- flang/test/Preprocessing/pp101.F90 | 6 +++-- flang/test/Preprocessing/pp102.F90 | 6 +++-- flang/test/Preprocessing/pp103.F90 | 6 +++-- flang/test/Preprocessing/pp104.F90 | 6 +++-- flang/test/Preprocessing/pp105.F90 | 6 +++-- flang/test/Preprocessing/pp106.F90 | 6 +++-- flang/test/Preprocessing/pp107.F90 | 6 +++-- flang/test/Preprocessing/pp108.F90 | 6 +++-- flang/test/Preprocessing/pp109.F90 | 6 +++-- flang/test/Preprocessing/pp110.F90 | 6 +++-- flang/test/Preprocessing/pp111.F90 | 6 +++-- flang/test/Preprocessing/pp112.F90 | 6 +++-- flang/test/Preprocessing/pp113.F90 | 6 +++-- flang/test/Preprocessing/pp114.F90 | 6 +++-- flang/test/Preprocessing/pp115.F90 | 6 +++-- flang/test/Preprocessing/pp116.F90 | 6 +++-- flang/test/Preprocessing/pp117.F90 | 6 +++-- flang/test/Preprocessing/pp118.F90 | 6 +++-- flang/test/Preprocessing/pp119.F90 | 6 +++-- flang/test/Preprocessing/pp120.F90 | 6 +++-- flang/test/Preprocessing/pp121.F90 | 7 ++++-- flang/test/Preprocessing/pp122.F90 | 6 +++-- flang/test/Preprocessing/pp123.F90 | 6 +++-- flang/test/Preprocessing/pp124.F90 | 6 +++-- flang/test/Preprocessing/pp125.F90 | 6 +++-- flang/test/Preprocessing/pp126.F90 | 6 +++-- flang/test/Preprocessing/pp127.F90 | 6 +++-- flang/test/Preprocessing/pp128.F90 | 6 +++-- flang/test/Preprocessing/pp129.F90 | 4 ++- flang/test/Preprocessing/pp130.F90 | 6 +++-- 78 files changed, 330 insertions(+), 164 deletions(-) delete mode 100644 flang/test/Preprocessing/lit.local.cfg.py diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index a1f07967d9b08..9c10cedfd53bb 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -212,7 +212,7 @@ Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} { } void Preprocessor::Define(std::string macro, std::string value) { - definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); + definitions_.emplace(macro, Definition{value, allSources_}); } void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); } diff --git a/flang/lib/Parser/preprocessor.h b/flang/lib/Parser/preprocessor.h index 977d480c2482e..b4374a9acf5c1 100644 --- a/flang/lib/Parser/preprocessor.h +++ b/flang/lib/Parser/preprocessor.h @@ -67,6 +67,7 @@ class Preprocessor { void Define(std::string macro, std::string value); void Undefine(std::string macro); + bool IsNameDefined(const CharBlock &); std::optional MacroReplacement( const TokenSequence &, const Prescanner &); @@ -79,7 +80,6 @@ class Preprocessor { enum class CanDeadElseAppear { No, Yes }; CharBlock SaveTokenAsName(const CharBlock &); - bool IsNameDefined(const CharBlock &); TokenSequence ReplaceMacros(const TokenSequence &, const Prescanner &); void SkipDisabledConditionalCode( const std::string &, IsElseActive, Prescanner *, ProvenanceRange); diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index c81d6cb508464..face0d20e4fc0 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -246,7 +246,7 @@ void Prescanner::NextLine() { } void Prescanner::LabelField(TokenSequence &token, int outCol) { - bool badLabel{false}; + const char *bad{nullptr}; for (; *at_ != '\n' && column_ <= 6; ++at_) { if (*at_ == '\t') { ++at_; @@ -256,16 +256,18 @@ void Prescanner::LabelField(TokenSequence &token, int outCol) { if (*at_ != ' ' && !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space EmitChar(token, *at_); - if (!IsDecimalDigit(*at_) && !badLabel) { - Say(GetProvenance(at_), - "Character in fixed-form label field must be a digit"_en_US); - badLabel = true; + if (!bad && !IsDecimalDigit(*at_)) { + bad = at_; } ++outCol; } ++column_; } if (outCol > 1) { + if (bad && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { + Say(GetProvenance(bad), + "Character in fixed-form label field must be a digit"_en_US); + } token.CloseToken(); } SkipToNextSignificantCharacter(); @@ -1098,6 +1100,15 @@ const char *Prescanner::IsCompilerDirectiveSentinel( return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); } +constexpr bool IsDirective(const char *match, const char *dir) { + for (; *match; ++match) { + if (*match != ToLowerCaseLetter(*dir++)) { + return false; + } + } + return true; +} + Prescanner::LineClassification Prescanner::ClassifyLine( const char *start) const { if (inFixedForm_) { @@ -1122,13 +1133,12 @@ Prescanner::LineClassification Prescanner::ClassifyLine( return {LineClassification::Kind::IncludeLine, *quoteOffset}; } if (const char *dir{IsPreprocessorDirectiveLine(start)}) { - if (std::memcmp(dir, "if", 2) == 0 || std::memcmp(dir, "elif", 4) == 0 || - std::memcmp(dir, "else", 4) == 0 || std::memcmp(dir, "endif", 5) == 0) { + if (IsDirective("if", dir) || IsDirective("elif", dir) || + IsDirective("else", dir) || IsDirective("endif", dir)) { return {LineClassification::Kind::ConditionalCompilationDirective}; - } else if (std::memcmp(dir, "include", 7) == 0) { + } else if (IsDirective("include", dir)) { return {LineClassification::Kind::IncludeDirective}; - } else if (std::memcmp(dir, "define", 6) == 0 || - std::memcmp(dir, "undef", 5) == 0) { + } else if (IsDirective("define", dir) || IsDirective("undef", dir)) { return {LineClassification::Kind::DefinitionDirective}; } else { return {LineClassification::Kind::PreprocessorDirective}; diff --git a/flang/test/Preprocessing/lit.local.cfg.py b/flang/test/Preprocessing/lit.local.cfg.py deleted file mode 100644 index a7cf401d8c66c..0000000000000 --- a/flang/test/Preprocessing/lit.local.cfg.py +++ /dev/null @@ -1,7 +0,0 @@ -# -*- Python -*- - -from lit.llvm import llvm_config - -# Added this line file to prevent lit from discovering these tests -# See Issue #1052 -config.suffixes = [] diff --git a/flang/test/Preprocessing/pp001.F b/flang/test/Preprocessing/pp001.F index ba131b4a24c6f..95198641655a5 100644 --- a/flang/test/Preprocessing/pp001.F +++ b/flang/test/Preprocessing/pp001.F @@ -1,9 +1,11 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777.eq.777)then * keyword macros integer, parameter :: KWM = 666 #define KWM 777 if (KWM .eq. 777) then - print *, 'pp001.F pass' + print *, 'pp001.F yes' else - print *, 'pp001.F FAIL: ', KWM + print *, 'pp001.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp002.F b/flang/test/Preprocessing/pp002.F index f46baf73e2894..b4cbd7545adf8 100644 --- a/flang/test/Preprocessing/pp002.F +++ b/flang/test/Preprocessing/pp002.F @@ -1,10 +1,12 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(kwm.eq.777)then * #undef integer, parameter :: KWM = 777 #define KWM 666 #undef KWM if (KWM .eq. 777) then - print *, 'pp002.F pass' + print *, 'pp002.F yes' else - print *, 'pp002.F FAIL: ', KWM + print *, 'pp002.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp003.F b/flang/test/Preprocessing/pp003.F index 0470f1909a696..4df10c47d2359 100644 --- a/flang/test/Preprocessing/pp003.F +++ b/flang/test/Preprocessing/pp003.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * function-like macros integer function IFLM(x) integer :: x @@ -8,8 +10,8 @@ program main integer :: res res = IFLM(666) if (res .eq. 777) then - print *, 'pp003.F pass' + print *, 'pp003.F yes' else - print *, 'pp003.F FAIL: ', res + print *, 'pp003.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp004.F b/flang/test/Preprocessing/pp004.F index 800a96fbedbaf..788d3682ab98a 100644 --- a/flang/test/Preprocessing/pp004.F +++ b/flang/test/Preprocessing/pp004.F @@ -1,9 +1,11 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(kwm.eq.777)then * KWMs case-sensitive integer, parameter :: KWM = 777 #define KWM 666 if (kwm .eq. 777) then - print *, 'pp004.F pass' + print *, 'pp004.F yes' else - print *, 'pp004.F FAIL: ', kwm + print *, 'pp004.F no: ', kwm end if end diff --git a/flang/test/Preprocessing/pp005.F b/flang/test/Preprocessing/pp005.F index 05fab7a92f1b6..390e662e35574 100644 --- a/flang/test/Preprocessing/pp005.F +++ b/flang/test/Preprocessing/pp005.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=777 * KWM split across continuation, implicit padding integer, parameter :: KWM = 666 #define KWM 777 @@ -5,8 +7,8 @@ res = KW +M if (res .eq. 777) then - print *, 'pp005.F pass' + print *, 'pp005.F yes' else - print *, 'pp005.F FAIL: ', res + print *, 'pp005.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp006.F b/flang/test/Preprocessing/pp006.F index 55b87df8d4304..3c44728f56b50 100644 --- a/flang/test/Preprocessing/pp006.F +++ b/flang/test/Preprocessing/pp006.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=777 * ditto, but with intervening *comment line integer, parameter :: KWM = 666 #define KWM 777 @@ -6,8 +8,8 @@ *comment +M if (res .eq. 777) then - print *, 'pp006.F pass' + print *, 'pp006.F yes' else - print *, 'pp006.F FAIL: ', res + print *, 'pp006.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp007.F b/flang/test/Preprocessing/pp007.F index 8be4396a24928..ca78f4f4c6c5c 100644 --- a/flang/test/Preprocessing/pp007.F +++ b/flang/test/Preprocessing/pp007.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=kwm * KWM split across continuation, clipped after column 72 integer, parameter :: KWM = 666 #define KWM 777 @@ -8,8 +10,8 @@ res = KW comment +M if (res .eq. 777) then - print *, 'pp007.F pass' + print *, 'pp007.F yes' else - print *, 'pp007.F FAIL: ', res + print *, 'pp007.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp008.F b/flang/test/Preprocessing/pp008.F index 38c5b6657a871..9dd7bd92ea50f 100644 --- a/flang/test/Preprocessing/pp008.F +++ b/flang/test/Preprocessing/pp008.F @@ -1,11 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=kwm * KWM with spaces in name at invocation NOT replaced integer, parameter :: KWM = 777 #define KWM 666 integer :: res res = K W M if (res .eq. 777) then - print *, 'pp008.F pass' + print *, 'pp008.F yes' else - print *, 'pp008.F FAIL: ', res + print *, 'pp008.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp009.F b/flang/test/Preprocessing/pp009.F index a53623ff03692..c29021da484aa 100644 --- a/flang/test/Preprocessing/pp009.F +++ b/flang/test/Preprocessing/pp009.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * FLM call split across continuation, implicit padding integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFL +M(666) if (res .eq. 777) then - print *, 'pp009.F pass' + print *, 'pp009.F yes' else - print *, 'pp009.F FAIL: ', res + print *, 'pp009.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp010.F b/flang/test/Preprocessing/pp010.F index 0769c98274dd3..4a812cde86af0 100644 --- a/flang/test/Preprocessing/pp010.F +++ b/flang/test/Preprocessing/pp010.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * ditto, but with intervening *comment line integer function IFLM(x) integer :: x @@ -10,8 +12,8 @@ program main *comment +M(666) if (res .eq. 777) then - print *, 'pp010.F pass' + print *, 'pp010.F yes' else - print *, 'pp010.F FAIL: ', res + print *, 'pp010.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp011.F b/flang/test/Preprocessing/pp011.F index 4ec376649422b..c106c8bc3f5c5 100644 --- a/flang/test/Preprocessing/pp011.F +++ b/flang/test/Preprocessing/pp011.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=iflm(666) * FLM call name split across continuation, clipped integer function IFLM(x) integer :: x @@ -12,8 +14,8 @@ program main res = IFL comment +M(666) if (res .eq. 777) then - print *, 'pp011.F pass' + print *, 'pp011.F yes' else - print *, 'pp011.F FAIL: ', res + print *, 'pp011.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp012.F b/flang/test/Preprocessing/pp012.F index 703fabf7d8db6..411cfb887bb1f 100644 --- a/flang/test/Preprocessing/pp012.F +++ b/flang/test/Preprocessing/pp012.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * FLM call name split across continuation integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFL +M(666) if (res .eq. 777) then - print *, 'pp012.F pass' + print *, 'pp012.F yes' else - print *, 'pp012.F FAIL: ', res + print *, 'pp012.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp013.F b/flang/test/Preprocessing/pp013.F index 6fb8ca75b5c83..f05e2e30fee69 100644 --- a/flang/test/Preprocessing/pp013.F +++ b/flang/test/Preprocessing/pp013.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * FLM call split between name and ( integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFLM +(666) if (res .eq. 777) then - print *, 'pp013.F pass' + print *, 'pp013.F yes' else - print *, 'pp013.F FAIL: ', res + print *, 'pp013.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp014.F b/flang/test/Preprocessing/pp014.F index 397a31d1a8aaa..470966096e4ee 100644 --- a/flang/test/Preprocessing/pp014.F +++ b/flang/test/Preprocessing/pp014.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * FLM call split between name and (, with intervening *comment integer function IFLM(x) integer :: x @@ -10,8 +12,8 @@ program main *comment +(666) if (res .eq. 777) then - print *, 'pp014.F pass' + print *, 'pp014.F yes' else - print *, 'pp014.F FAIL: ', res + print *, 'pp014.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp015.F b/flang/test/Preprocessing/pp015.F index 4c399a835567e..8320bd704fed9 100644 --- a/flang/test/Preprocessing/pp015.F +++ b/flang/test/Preprocessing/pp015.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * FLM call split between name and (, clipped integer function IFLM(x) integer :: x @@ -12,8 +14,8 @@ program main res = IFLM comment +(666) if (res .eq. 777) then - print *, 'pp015.F pass' + print *, 'pp015.F yes' else - print *, 'pp015.F FAIL: ', res + print *, 'pp015.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp016.F b/flang/test/Preprocessing/pp016.F index 210ad0b3fddbf..95c89f8e1a03e 100644 --- a/flang/test/Preprocessing/pp016.F +++ b/flang/test/Preprocessing/pp016.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * FLM call split between name and ( and in argument integer function IFLM(x) integer :: x @@ -10,8 +12,8 @@ program main +(66 +6) if (res .eq. 777) then - print *, 'pp016.F pass' + print *, 'pp016.F yes' else - print *, 'pp016.F FAIL: ', res + print *, 'pp016.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp017.F b/flang/test/Preprocessing/pp017.F index e658fbd922cf4..a11f185147831 100644 --- a/flang/test/Preprocessing/pp017.F +++ b/flang/test/Preprocessing/pp017.F @@ -1,10 +1,12 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777.eq.777)then * KLM rescan integer, parameter :: KWM = 666, KWM2 = 667 #define KWM2 777 #define KWM KWM2 if (KWM .eq. 777) then - print *, 'pp017.F pass' + print *, 'pp017.F yes' else - print *, 'pp017.F FAIL: ', KWM + print *, 'pp017.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp018.F b/flang/test/Preprocessing/pp018.F index 877c6545e1c82..69c24e59f53d6 100644 --- a/flang/test/Preprocessing/pp018.F +++ b/flang/test/Preprocessing/pp018.F @@ -1,11 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(kwm2.eq.777)then * KLM rescan with #undef (so rescan is after expansion) integer, parameter :: KWM2 = 777, KWM = 667 #define KWM2 666 #define KWM KWM2 #undef KWM2 if (KWM .eq. 777) then - print *, 'pp018.F pass' + print *, 'pp018.F yes' else - print *, 'pp018.F FAIL: ', KWM + print *, 'pp018.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp019.F b/flang/test/Preprocessing/pp019.F index a2c9a02848575..d607ad3484079 100644 --- a/flang/test/Preprocessing/pp019.F +++ b/flang/test/Preprocessing/pp019.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * FLM rescan integer function IFLM(x) integer :: x @@ -10,8 +12,8 @@ program main integer :: res res = IFLM(666) if (res .eq. 777) then - print *, 'pp019.F pass' + print *, 'pp019.F yes' else - print *, 'pp019.F FAIL: ', res + print *, 'pp019.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp020.F b/flang/test/Preprocessing/pp020.F index f0d26357c5d2f..88525003ba893 100644 --- a/flang/test/Preprocessing/pp020.F +++ b/flang/test/Preprocessing/pp020.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((111)+666) * FLM expansion of argument integer function IFLM(x) integer :: x @@ -10,8 +12,8 @@ program main integer :: res res = IFLM(KWM) if (res .eq. 777) then - print *, 'pp020.F pass' + print *, 'pp020.F yes' else - print *, 'pp020.F FAIL: ', res + print *, 'pp020.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp021.F b/flang/test/Preprocessing/pp021.F index 45073ab6f1e6c..1662a680f5aa9 100644 --- a/flang/test/Preprocessing/pp021.F +++ b/flang/test/Preprocessing/pp021.F @@ -1,10 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: ch='KWM' +! CHECK: if(ch.eq.'KWM')then * KWM NOT expanded in 'literal' #define KWM 666 character(len=3) :: ch ch = 'KWM' if (ch .eq. 'KWM') then - print *, 'pp021.F pass' + print *, 'pp021.F yes' else - print *, 'pp021.F FAIL: ', ch + print *, 'pp021.F no: ', ch end if end diff --git a/flang/test/Preprocessing/pp022.F b/flang/test/Preprocessing/pp022.F index e9a1e8ba4b915..026c02e9d4f01 100644 --- a/flang/test/Preprocessing/pp022.F +++ b/flang/test/Preprocessing/pp022.F @@ -1,10 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: ch="KWM" +! CHECK: if(ch.eq.'KWM')then * KWM NOT expanded in "literal" #define KWM 666 character(len=3) :: ch ch = "KWM" if (ch .eq. 'KWM') then - print *, 'pp022.F pass' + print *, 'pp022.F yes' else - print *, 'pp022.F FAIL: ', ch + print *, 'pp022.F no: ', ch end if end diff --git a/flang/test/Preprocessing/pp023.F b/flang/test/Preprocessing/pp023.F index fb63d63f4fc1e..a0d053f6addc2 100644 --- a/flang/test/Preprocessing/pp023.F +++ b/flang/test/Preprocessing/pp023.F @@ -1,11 +1,14 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: ch=3hKWM +! CHECK: if(ch.eq.'KWM')then * KWM NOT expanded in 9HHOLLERITH literal #define KWM 666 #define HKWM 667 character(len=3) :: ch ch = 3HKWM if (ch .eq. 'KWM') then - print *, 'pp023.F pass' + print *, 'pp023.F yes' else - print *, 'pp023.F FAIL: ', ch + print *, 'pp023.F no: ', ch end if end diff --git a/flang/test/Preprocessing/pp024.F b/flang/test/Preprocessing/pp024.F index 9072f6e50cc89..6ea76cc9c24de 100644 --- a/flang/test/Preprocessing/pp024.F +++ b/flang/test/Preprocessing/pp024.F @@ -1,3 +1,6 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: 100format(3hKWM) +! CHECK: if(ch.eq.'KWM')then * KWM NOT expanded in Hollerith in FORMAT #define KWM 666 #define HKWM 667 @@ -5,8 +8,8 @@ 100 format(3HKWM) write(ch, 100) if (ch .eq. 'KWM') then - print *, 'pp024.F pass' + print *, 'pp024.F yes' else - print *, 'pp024.F FAIL: ', ch + print *, 'pp024.F no: ', ch end if end diff --git a/flang/test/Preprocessing/pp025.F b/flang/test/Preprocessing/pp025.F index 42ad011842ff7..49521d443bd3b 100644 --- a/flang/test/Preprocessing/pp025.F +++ b/flang/test/Preprocessing/pp025.F @@ -1,11 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=ikwm2z * KWM expansion is before token pasting due to fixed-form space removal integer, parameter :: IKWM2Z = 777 #define KWM KWM2 integer :: res res = I KWM Z if (res .eq. 777) then - print *, 'pp025.F pass' + print *, 'pp025.F yes' else - print *, 'pp025.F FAIL: ', res + print *, 'pp025.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp026.F b/flang/test/Preprocessing/pp026.F index e0ea032c383fc..b551f3b173ed3 100644 --- a/flang/test/Preprocessing/pp026.F +++ b/flang/test/Preprocessing/pp026.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((111)+666) * ## token pasting works in FLM integer function IFLM(x) integer :: x @@ -12,8 +14,8 @@ program main integer :: res res = IFLM(KWM) if (res .eq. 777) then - print *, 'pp026.F pass' + print *, 'pp026.F yes' else - print *, 'pp026.F FAIL: ', res + print *, 'pp026.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp027.F b/flang/test/Preprocessing/pp027.F index e2663800c1ce3..c1b787f9366d3 100644 --- a/flang/test/Preprocessing/pp027.F +++ b/flang/test/Preprocessing/pp027.F @@ -1,9 +1,12 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: kwm=666 +! CHECK: if(777.eq.777)then * #DEFINE works in fixed form integer, parameter :: KWM = 666 #DEFINE KWM 777 if (KWM .eq. 777) then - print *, 'pp027.F pass' + print *, 'pp027.F yes' else - print *, 'pp027.F FAIL: ', KWM + print *, 'pp027.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp028.F b/flang/test/Preprocessing/pp028.F index 2906d389b57d4..51eab84f36877 100644 --- a/flang/test/Preprocessing/pp028.F +++ b/flang/test/Preprocessing/pp028.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=kw * fixed-form clipping done before KWM expansion on source line integer, parameter :: KW = 777 #define KWM 666 @@ -7,8 +9,8 @@ *234567890123456789012345678901234567890123456789012345678901234567890123 res = KWM if (res .eq. 777) then - print *, 'pp028.F pass' + print *, 'pp028.F yes' else - print *, 'pp028.F FAIL: ', res + print *, 'pp028.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp029.F b/flang/test/Preprocessing/pp029.F index 4374ef84489b5..bb8efe6c1a2e0 100644 --- a/flang/test/Preprocessing/pp029.F +++ b/flang/test/Preprocessing/pp029.F @@ -1,10 +1,12 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(77 7.eq.777)then * \ newline allowed in #define integer, parameter :: KWM = 666 #define KWM 77\ 7 if (KWM .eq. 777) then - print *, 'pp029.F pass' + print *, 'pp029.F yes' else - print *, 'pp029.F FAIL: ', KWM + print *, 'pp029.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp030.F b/flang/test/Preprocessing/pp030.F index 3022e0ddf3dff..c04cf949f4142 100644 --- a/flang/test/Preprocessing/pp030.F +++ b/flang/test/Preprocessing/pp030.F @@ -1,9 +1,11 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777.eq.777)then * /* C comment */ erased from #define integer, parameter :: KWM = 666 #define KWM 777 /* C comment */ if (KWM .eq. 777) then - print *, 'pp030.F pass' + print *, 'pp030.F yes' else - print *, 'pp030.F FAIL: ', KWM + print *, 'pp030.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp031.F b/flang/test/Preprocessing/pp031.F index 0f59921bcb820..90b14647c4c93 100644 --- a/flang/test/Preprocessing/pp031.F +++ b/flang/test/Preprocessing/pp031.F @@ -1,9 +1,12 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777//ccomment.eq.777)then +! CHECK: print*,'pp031.F no: ',777//ccomment * // C++ comment NOT erased from #define integer, parameter :: KWM = 666 #define KWM 777 // C comment if (KWM .eq. 777) then print *, 'pp031.F FAIL (should not have compiled)' else - print *, 'pp031.F FAIL: ', KWM + print *, 'pp031.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp032.F b/flang/test/Preprocessing/pp032.F index 9d9f14238d86d..6b779141ecb56 100644 --- a/flang/test/Preprocessing/pp032.F +++ b/flang/test/Preprocessing/pp032.F @@ -1,10 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777.eq.777)then +! CHECK: print*,'pp032.F no: ',777 * /* C comment */ \ newline erased from #define integer, parameter :: KWM = 666 #define KWM 77/* C comment */\ 7 if (KWM .eq. 777) then - print *, 'pp032.F pass' + print *, 'pp032.F yes' else - print *, 'pp032.F FAIL: ', KWM + print *, 'pp032.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp033.F b/flang/test/Preprocessing/pp033.F index 34cf1996cc74d..3364527cba6a3 100644 --- a/flang/test/Preprocessing/pp033.F +++ b/flang/test/Preprocessing/pp033.F @@ -1,10 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777.eq.777)then +! CHECK: print*,'pp033.F no: ',777 * /* C comment \ newline */ erased from #define integer, parameter :: KWM = 666 #define KWM 77/* C comment \ */7 if (KWM .eq. 777) then - print *, 'pp033.F pass' + print *, 'pp033.F yes' else - print *, 'pp033.F FAIL: ', KWM + print *, 'pp033.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp034.F b/flang/test/Preprocessing/pp034.F index a9ed984b3b6ec..0c64aca62b1db 100644 --- a/flang/test/Preprocessing/pp034.F +++ b/flang/test/Preprocessing/pp034.F @@ -1,10 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777.eq.777)then +! CHECK: print*,'pp034.F no: ',777 * \ newline allowed in name on KWM definition integer, parameter :: KWMC = 666 #define KWM\ C 777 if (KWMC .eq. 777) then - print *, 'pp034.F pass' + print *, 'pp034.F yes' else - print *, 'pp034.F FAIL: ', KWMC + print *, 'pp034.F no: ', KWMC end if end diff --git a/flang/test/Preprocessing/pp035.F b/flang/test/Preprocessing/pp035.F index 0135c9c4551a7..808174129b897 100644 --- a/flang/test/Preprocessing/pp035.F +++ b/flang/test/Preprocessing/pp035.F @@ -1,3 +1,6 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777.eq.777)then +! CHECK: print*,'pp035.F no: ',777 * #if 2 .LT. 3 works integer, parameter :: KWM = 666 #if 2 .LT. 3 @@ -6,8 +9,8 @@ #define KWM 667 #endif if (KWM .eq. 777) then - print *, 'pp035.F pass' + print *, 'pp035.F yes' else - print *, 'pp035.F FAIL: ', KWM + print *, 'pp035.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp036.F b/flang/test/Preprocessing/pp036.F index ac922ae42ceb5..b7024c41fa60f 100644 --- a/flang/test/Preprocessing/pp036.F +++ b/flang/test/Preprocessing/pp036.F @@ -1,8 +1,11 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(.true.)then +! CHECK: print*,'pp036.F no: ',.true. * #define FALSE TRUE ... .FALSE. -> .TRUE. #define FALSE TRUE if (.FALSE.) then - print *, 'pp036.F pass' + print *, 'pp036.F yes' else - print *, 'pp036.F FAIL: ', .FALSE. + print *, 'pp036.F no: ', .FALSE. end if end diff --git a/flang/test/Preprocessing/pp037.F b/flang/test/Preprocessing/pp037.F index 6c3edb09eb6f4..52bfa8ffdab3d 100644 --- a/flang/test/Preprocessing/pp037.F +++ b/flang/test/Preprocessing/pp037.F @@ -1,11 +1,14 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(7777.eq.777)then +! CHECK: print*,'pp037.F no: ',7777 * fixed-form clipping NOT applied to #define integer, parameter :: KWM = 666 * 1 2 3 4 5 6 7 *234567890123456789012345678901234567890123456789012345678901234567890123 #define KWM 7777 if (KWM .eq. 777) then - print *, 'pp037.F pass' + print *, 'pp037.F yes' else - print *, 'pp037.F FAIL: ', KWM + print *, 'pp037.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp038.F b/flang/test/Preprocessing/pp038.F index 3c83dda7d03b7..7386aeebddeef 100644 --- a/flang/test/Preprocessing/pp038.F +++ b/flang/test/Preprocessing/pp038.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=((666)+111) * FLM call with closing ')' on next line (not a continuation) integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFLM(666 ) if (res .eq. 777) then - print *, 'pp038.F pass' + print *, 'pp038.F yes' else - print *, 'pp038.F FAIL: ', res + print *, 'pp038.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp039.F b/flang/test/Preprocessing/pp039.F index 52e6dd78603a0..9124474b86ba3 100644 --- a/flang/test/Preprocessing/pp039.F +++ b/flang/test/Preprocessing/pp039.F @@ -1,3 +1,7 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res=iflm +! CHECK: (666) +! CHECK-NOT: res=((666)+111) * FLM call with '(' on next line (not a continuation) integer function IFLM(x) integer :: x @@ -9,8 +13,8 @@ program main res = IFLM (666) if (res .eq. 777) then - print *, 'pp039.F pass' + print *, 'pp039.F yes' else - print *, 'pp039.F FAIL: ', res + print *, 'pp039.F no: ', res end if end diff --git a/flang/test/Preprocessing/pp040.F b/flang/test/Preprocessing/pp040.F index 59e901ac3c6b6..d589c38489cfb 100644 --- a/flang/test/Preprocessing/pp040.F +++ b/flang/test/Preprocessing/pp040.F @@ -1,5 +1,7 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK-NOT: FAIL HARD! * #define KWM c, then KWM works as comment line initiator #define KWM c KWM print *, 'pp040.F FAIL HARD!'; stop - print *, 'pp040.F pass' + print *, 'pp040.F yes' end diff --git a/flang/test/Preprocessing/pp041.F b/flang/test/Preprocessing/pp041.F index 33c5ced3924dc..dcf61536040ba 100644 --- a/flang/test/Preprocessing/pp041.F +++ b/flang/test/Preprocessing/pp041.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: j=666wmj=j+1wm211 * use KWM expansion as continuation indicators #define KWM 0 #define KWM2 1 @@ -6,8 +8,8 @@ KWM j = j + 1 KWM2 11 if (j .eq. 777) then - print *, 'pp041.F pass' + print *, 'pp041.F yes' else - print *, 'pp041.F FAIL', j + print *, 'pp041.F no', j end if end diff --git a/flang/test/Preprocessing/pp042.F b/flang/test/Preprocessing/pp042.F index 439e1affbca22..d5cc46a7a4712 100644 --- a/flang/test/Preprocessing/pp042.F +++ b/flang/test/Preprocessing/pp042.F @@ -1,6 +1,8 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK-NOT: goto 2 * #define c 1, then use c as label in fixed-form #define c 1 -c print *, 'pp042.F pass'; goto 2 - print *, 'pp042.F FAIL' +c print *, 'pp042.F yes'; goto 2 + print *, 'pp042.F no' 2 continue end diff --git a/flang/test/Preprocessing/pp043.F b/flang/test/Preprocessing/pp043.F index be0069cf85579..a079466e64f92 100644 --- a/flang/test/Preprocessing/pp043.F +++ b/flang/test/Preprocessing/pp043.F @@ -1,11 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(kwm.eq.777)then * #define with # in column 6 is a continuation line in fixed-form integer, parameter :: defineKWM666 = 555 integer, parameter :: KWM = #define KWM 666 ++222 if (KWM .eq. 777) then - print *, 'pp043.F pass' + print *, 'pp043.F yes' else - print *, 'pp043.F FAIL: ', KWM + print *, 'pp043.F no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp044.F b/flang/test/Preprocessing/pp044.F index 72ce6cc411598..c14b29c3050cc 100644 --- a/flang/test/Preprocessing/pp044.F +++ b/flang/test/Preprocessing/pp044.F @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK-NOT:z=111 * #define directive amid continuations integer, parameter :: KWM = 222, KWM111 = 333, KWM222 = 555 integer, parameter :: KWMKWM = 333 @@ -5,8 +7,8 @@ #define KWM 111 +KWM+444 if (z .EQ. 777) then - print *, 'pass' + print *, 'yes' else - print *, 'FAIL', z + print *, 'no', z end if end diff --git a/flang/test/Preprocessing/pp101.F90 b/flang/test/Preprocessing/pp101.F90 index 694201a8f33e0..b0f860157c82e 100644 --- a/flang/test/Preprocessing/pp101.F90 +++ b/flang/test/Preprocessing/pp101.F90 @@ -1,9 +1,11 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777 .eq. 777) then ! keyword macros integer, parameter :: KWM = 666 #define KWM 777 if (KWM .eq. 777) then - print *, 'pp101.F90 pass' + print *, 'pp101.F90 yes' else - print *, 'pp101.F90 FAIL: ', KWM + print *, 'pp101.F90 no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp102.F90 b/flang/test/Preprocessing/pp102.F90 index 22e4613b3b18a..e2c3207a4c248 100644 --- a/flang/test/Preprocessing/pp102.F90 +++ b/flang/test/Preprocessing/pp102.F90 @@ -1,10 +1,12 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(kwm .eq. 777) then ! #undef integer, parameter :: KWM = 777 #define KWM 666 #undef KWM if (KWM .eq. 777) then - print *, 'pp102.F90 pass' + print *, 'pp102.F90 yes' else - print *, 'pp102.F90 FAIL: ', KWM + print *, 'pp102.F90 no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp103.F90 b/flang/test/Preprocessing/pp103.F90 index 9df4c9dbdf7b0..3309d7f8205f0 100644 --- a/flang/test/Preprocessing/pp103.F90 +++ b/flang/test/Preprocessing/pp103.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = ((666)+111) ! function-like macros integer function IFLM(x) integer :: x @@ -8,8 +10,8 @@ program main integer :: res res = IFLM(666) if (res .eq. 777) then - print *, 'pp103.F90 pass' + print *, 'pp103.F90 yes' else - print *, 'pp103.F90 FAIL: ', res + print *, 'pp103.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp104.F90 b/flang/test/Preprocessing/pp104.F90 index b15f0db7c5b97..51248c22154b9 100644 --- a/flang/test/Preprocessing/pp104.F90 +++ b/flang/test/Preprocessing/pp104.F90 @@ -1,9 +1,11 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(kwm .eq. 777) then ! KWMs case-sensitive integer, parameter :: KWM = 777 #define KWM 666 if (kwm .eq. 777) then - print *, 'pp104.F90 pass' + print *, 'pp104.F90 yes' else - print *, 'pp104.F90 FAIL: ', kwm + print *, 'pp104.F90 no: ', kwm end if end diff --git a/flang/test/Preprocessing/pp105.F90 b/flang/test/Preprocessing/pp105.F90 index cd475db01c39e..898a4a1681377 100644 --- a/flang/test/Preprocessing/pp105.F90 +++ b/flang/test/Preprocessing/pp105.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = 777 ! KWM call name split across continuation, with leading & integer, parameter :: KWM = 666 #define KWM 777 @@ -5,8 +7,8 @@ res = KW& &M if (res .eq. 777) then - print *, 'pp105.F90 pass' + print *, 'pp105.F90 yes' else - print *, 'pp105.F90 FAIL: ', res + print *, 'pp105.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp106.F90 b/flang/test/Preprocessing/pp106.F90 index e169ff70b2cec..d83085fc1d718 100644 --- a/flang/test/Preprocessing/pp106.F90 +++ b/flang/test/Preprocessing/pp106.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = 777 ! ditto, with & ! comment integer, parameter :: KWM = 666 #define KWM 777 @@ -5,8 +7,8 @@ res = KW& ! comment &M if (res .eq. 777) then - print *, 'pp106.F90 pass' + print *, 'pp106.F90 yes' else - print *, 'pp106.F90 FAIL: ', res + print *, 'pp106.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp107.F90 b/flang/test/Preprocessing/pp107.F90 index bf6d427c04007..6973127a4cfab 100644 --- a/flang/test/Preprocessing/pp107.F90 +++ b/flang/test/Preprocessing/pp107.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = kwm ! KWM call name split across continuation, no leading &, with & ! comment integer, parameter :: KWM = 666 #define KWM 777 @@ -5,8 +7,8 @@ res = KW& ! comment M if (res .eq. 777) then - print *, 'pp107.F90 pass' + print *, 'pp107.F90 yes' else - print *, 'pp107.F90 FAIL: ', res + print *, 'pp107.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp108.F90 b/flang/test/Preprocessing/pp108.F90 index 7ce6ccbdedc17..b07ec984fd811 100644 --- a/flang/test/Preprocessing/pp108.F90 +++ b/flang/test/Preprocessing/pp108.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = kwm ! ditto, but without & ! comment integer, parameter :: KWM = 666 #define KWM 777 @@ -5,8 +7,8 @@ res = KW& M if (res .eq. 777) then - print *, 'pp108.F90 pass' + print *, 'pp108.F90 yes' else - print *, 'pp108.F90 FAIL: ', res + print *, 'pp108.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp109.F90 b/flang/test/Preprocessing/pp109.F90 index a80579d18b3ee..e75fd10c1cb88 100644 --- a/flang/test/Preprocessing/pp109.F90 +++ b/flang/test/Preprocessing/pp109.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = ((666)+111) ! FLM call name split with leading & integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFL& &M(666) if (res .eq. 777) then - print *, 'pp109.F90 pass' + print *, 'pp109.F90 yes' else - print *, 'pp109.F90 FAIL: ', res + print *, 'pp109.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp110.F90 b/flang/test/Preprocessing/pp110.F90 index f5bf3b1867f48..681408ebfddf8 100644 --- a/flang/test/Preprocessing/pp110.F90 +++ b/flang/test/Preprocessing/pp110.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = ((666)+111) ! ditto, with & ! comment integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFL& ! comment &M(666) if (res .eq. 777) then - print *, 'pp110.F90 pass' + print *, 'pp110.F90 yes' else - print *, 'pp110.F90 FAIL: ', res + print *, 'pp110.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp111.F90 b/flang/test/Preprocessing/pp111.F90 index 668fcdc5f8d39..4b49bf6ad6c92 100644 --- a/flang/test/Preprocessing/pp111.F90 +++ b/flang/test/Preprocessing/pp111.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = iflm (666) ! FLM call name split across continuation, no leading &, with & ! comment integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFL& ! comment M(666) if (res .eq. 777) then - print *, 'pp111.F90 pass' + print *, 'pp111.F90 yes' else - print *, 'pp111.F90 FAIL: ', res + print *, 'pp111.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp112.F90 b/flang/test/Preprocessing/pp112.F90 index 0a3c7f8906dc7..9828366bef73a 100644 --- a/flang/test/Preprocessing/pp112.F90 +++ b/flang/test/Preprocessing/pp112.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = iflm (666) ! ditto, but without & ! comment integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFL& M(666) if (res .eq. 777) then - print *, 'pp112.F90 pass' + print *, 'pp112.F90 yes' else - print *, 'pp112.F90 FAIL: ', res + print *, 'pp112.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp113.F90 b/flang/test/Preprocessing/pp113.F90 index 4c928033638f9..56e34389068f3 100644 --- a/flang/test/Preprocessing/pp113.F90 +++ b/flang/test/Preprocessing/pp113.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = ((666)+111) ! FLM call split across continuation between name and (, leading & integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFLM& &(666) if (res .eq. 777) then - print *, 'pp113.F90 pass' + print *, 'pp113.F90 yes' else - print *, 'pp113.F90 FAIL: ', res + print *, 'pp113.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp114.F90 b/flang/test/Preprocessing/pp114.F90 index f6c0e0263a2f3..4c44759ba96eb 100644 --- a/flang/test/Preprocessing/pp114.F90 +++ b/flang/test/Preprocessing/pp114.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = ((666)+111) ! ditto, with & ! comment, leading & integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFLM& ! comment &(666) if (res .eq. 777) then - print *, 'pp114.F90 pass' + print *, 'pp114.F90 yes' else - print *, 'pp114.F90 FAIL: ', res + print *, 'pp114.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp115.F90 b/flang/test/Preprocessing/pp115.F90 index 4a38aca533118..290f62e68948b 100644 --- a/flang/test/Preprocessing/pp115.F90 +++ b/flang/test/Preprocessing/pp115.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = iflm (666) ! ditto, with & ! comment, no leading & integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFLM& ! comment (666) if (res .eq. 777) then - print *, 'pp115.F90 pass' + print *, 'pp115.F90 yes' else - print *, 'pp115.F90 FAIL: ', res + print *, 'pp115.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp116.F90 b/flang/test/Preprocessing/pp116.F90 index 8708f79347cc6..d41fab771f842 100644 --- a/flang/test/Preprocessing/pp116.F90 +++ b/flang/test/Preprocessing/pp116.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = iflm (666) ! FLM call split between name and (, no leading & integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFLM& (666) if (res .eq. 777) then - print *, 'pp116.F90 pass' + print *, 'pp116.F90 yes' else - print *, 'pp116.F90 FAIL: ', res + print *, 'pp116.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp117.F90 b/flang/test/Preprocessing/pp117.F90 index 8b8687f03743d..c8d1c2ff31304 100644 --- a/flang/test/Preprocessing/pp117.F90 +++ b/flang/test/Preprocessing/pp117.F90 @@ -1,10 +1,12 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777 .eq. 777) then ! KWM rescan integer, parameter :: KWM = 666, KWM2 = 667 #define KWM2 777 #define KWM KWM2 if (KWM .eq. 777) then - print *, 'pp117.F90 pass' + print *, 'pp117.F90 yes' else - print *, 'pp117.F90 FAIL: ', KWM + print *, 'pp117.F90 no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp118.F90 b/flang/test/Preprocessing/pp118.F90 index 014d99791f1aa..e71cc410823ba 100644 --- a/flang/test/Preprocessing/pp118.F90 +++ b/flang/test/Preprocessing/pp118.F90 @@ -1,11 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(kwm2 .eq. 777) then ! KWM rescan with #undef, proving rescan after expansion integer, parameter :: KWM2 = 777, KWM = 667 #define KWM2 666 #define KWM KWM2 #undef KWM2 if (KWM .eq. 777) then - print *, 'pp118.F90 pass' + print *, 'pp118.F90 yes' else - print *, 'pp118.F90 FAIL: ', KWM + print *, 'pp118.F90 no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp119.F90 b/flang/test/Preprocessing/pp119.F90 index 37470de411a4e..ea9a15cb02de4 100644 --- a/flang/test/Preprocessing/pp119.F90 +++ b/flang/test/Preprocessing/pp119.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = ((666)+111) ! FLM rescan integer function IFLM(x) integer :: x @@ -10,8 +12,8 @@ program main integer :: res res = IFLM(666) if (res .eq. 777) then - print *, 'pp119.F90 pass' + print *, 'pp119.F90 yes' else - print *, 'pp119.F90 FAIL: ', res + print *, 'pp119.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp120.F90 b/flang/test/Preprocessing/pp120.F90 index f7e0ae1034908..9c4b88c8d15c9 100644 --- a/flang/test/Preprocessing/pp120.F90 +++ b/flang/test/Preprocessing/pp120.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = ((111)+666) ! FLM expansion of argument integer function IFLM(x) integer :: x @@ -10,8 +12,8 @@ program main integer :: res res = IFLM(KWM) if (res .eq. 777) then - print *, 'pp120.F90 pass' + print *, 'pp120.F90 yes' else - print *, 'pp120.F90 FAIL: ', res + print *, 'pp120.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp121.F90 b/flang/test/Preprocessing/pp121.F90 index bd855fe2f6ab4..cdf25e15ee44e 100644 --- a/flang/test/Preprocessing/pp121.F90 +++ b/flang/test/Preprocessing/pp121.F90 @@ -1,10 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: ch = 'KWM' +! CHECK: if(ch .eq. 'KWM') then ! KWM NOT expanded in 'literal' #define KWM 666 character(len=3) :: ch ch = 'KWM' if (ch .eq. 'KWM') then - print *, 'pp121.F90 pass' + print *, 'pp121.F90 yes' else - print *, 'pp121.F90 FAIL: ', ch + print *, 'pp121.F90 no: ', ch end if end diff --git a/flang/test/Preprocessing/pp122.F90 b/flang/test/Preprocessing/pp122.F90 index dbad83a61c6ac..14c8762af1c7e 100644 --- a/flang/test/Preprocessing/pp122.F90 +++ b/flang/test/Preprocessing/pp122.F90 @@ -1,10 +1,12 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: ch = "KWM" ! KWM NOT expanded in "literal" #define KWM 666 character(len=3) :: ch ch = "KWM" if (ch .eq. 'KWM') then - print *, 'pp122.F90 pass' + print *, 'pp122.F90 yes' else - print *, 'pp122.F90 FAIL: ', ch + print *, 'pp122.F90 no: ', ch end if end diff --git a/flang/test/Preprocessing/pp123.F90 b/flang/test/Preprocessing/pp123.F90 index 6e6c45244b8a9..1768cec1c23ba 100644 --- a/flang/test/Preprocessing/pp123.F90 +++ b/flang/test/Preprocessing/pp123.F90 @@ -1,11 +1,13 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: ch = 3hKWM ! KWM NOT expanded in Hollerith literal #define KWM 666 #define HKWM 667 character(len=3) :: ch ch = 3HKWM if (ch .eq. 'KWM') then - print *, 'pp123.F90 pass' + print *, 'pp123.F90 yes' else - print *, 'pp123.F90 FAIL: ', ch + print *, 'pp123.F90 no: ', ch end if end diff --git a/flang/test/Preprocessing/pp124.F90 b/flang/test/Preprocessing/pp124.F90 index 2cf4d56dba238..bb011515ce1c2 100644 --- a/flang/test/Preprocessing/pp124.F90 +++ b/flang/test/Preprocessing/pp124.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: 100 format(3hKWM) ! KWM NOT expanded in Hollerith in FORMAT #define KWM 666 #define HKWM 667 @@ -5,8 +7,8 @@ 100 format(3HKWM) write(ch, 100) if (ch .eq. 'KWM') then - print *, 'pp124.F90 pass' + print *, 'pp124.F90 yes' else - print *, 'pp124.F90 FAIL: ', ch + print *, 'pp124.F90 no: ', ch end if end diff --git a/flang/test/Preprocessing/pp125.F90 b/flang/test/Preprocessing/pp125.F90 index 5f3875d8e88ef..cf3909b888240 100644 --- a/flang/test/Preprocessing/pp125.F90 +++ b/flang/test/Preprocessing/pp125.F90 @@ -1,9 +1,11 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777 .eq. 777) then ! #DEFINE works in free form integer, parameter :: KWM = 666 #DEFINE KWM 777 if (KWM .eq. 777) then - print *, 'pp125.F90 pass' + print *, 'pp125.F90 yes' else - print *, 'pp125.F90 FAIL: ', KWM + print *, 'pp125.F90 no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp126.F90 b/flang/test/Preprocessing/pp126.F90 index c2684c51413b7..fefb7fcf118f4 100644 --- a/flang/test/Preprocessing/pp126.F90 +++ b/flang/test/Preprocessing/pp126.F90 @@ -1,10 +1,12 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: if(777 .eq. 777) then ! \ newline works in #define integer, parameter :: KWM = 666 #define KWM 77\ 7 if (KWM .eq. 777) then - print *, 'pp126.F90 pass' + print *, 'pp126.F90 yes' else - print *, 'pp126.F90 FAIL: ', KWM + print *, 'pp126.F90 no: ', KWM end if end diff --git a/flang/test/Preprocessing/pp127.F90 b/flang/test/Preprocessing/pp127.F90 index 19f83b6afbc9a..08feedf95305d 100644 --- a/flang/test/Preprocessing/pp127.F90 +++ b/flang/test/Preprocessing/pp127.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = iflm(666 ) ! FLM call with closing ')' on next line (not a continuation) integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFLM(666 ) if (res .eq. 777) then - print *, 'pp127.F90 pass' + print *, 'pp127.F90 yes' else - print *, 'pp127.F90 FAIL: ', res + print *, 'pp127.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp128.F90 b/flang/test/Preprocessing/pp128.F90 index 84b338bc7cc98..46918e05f5c39 100644 --- a/flang/test/Preprocessing/pp128.F90 +++ b/flang/test/Preprocessing/pp128.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: res = iflm ! FLM call with '(' on next line (not a continuation) integer function IFLM(x) integer :: x @@ -9,8 +11,8 @@ program main res = IFLM (666) if (res .eq. 777) then - print *, 'pp128.F90 pass' + print *, 'pp128.F90 yes' else - print *, 'pp128.F90 FAIL: ', res + print *, 'pp128.F90 no: ', res end if end diff --git a/flang/test/Preprocessing/pp129.F90 b/flang/test/Preprocessing/pp129.F90 index a8eea86996838..ab7e7f0baa8b8 100644 --- a/flang/test/Preprocessing/pp129.F90 +++ b/flang/test/Preprocessing/pp129.F90 @@ -1,5 +1,7 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK-NOT: stop ! #define KWM !, then KWM works as comment line initiator #define KWM ! KWM print *, 'pp129.F90 FAIL HARD!'; stop - print *, 'pp129.F90 pass' + print *, 'pp129.F90 yes' end diff --git a/flang/test/Preprocessing/pp130.F90 b/flang/test/Preprocessing/pp130.F90 index c3d8079210c66..af4ad126e6fa4 100644 --- a/flang/test/Preprocessing/pp130.F90 +++ b/flang/test/Preprocessing/pp130.F90 @@ -1,3 +1,5 @@ +! RUN: %f18 -E %s 2>&1 | FileCheck %s +! CHECK: j = j + & ! #define KWM &, use for continuation w/o pasting (ifort and nag seem to continue #define) #define KWM & @@ -6,8 +8,8 @@ j = j + KWM 111 if (j .eq. 777) then - print *, 'pp130.F90 pass' + print *, 'pp130.F90 yes' else - print *, 'pp130.F90 FAIL', j + print *, 'pp130.F90 no', j end if end From 7cf4603faee366a6e5860b6fdbedadd91872e231 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 3 Aug 2020 15:11:28 -0700 Subject: [PATCH 236/600] Reland D61689 Change -gz and -Wa,--compress-debug-sections to use gABI compression (SHF_COMPRESSED) with integrated assembler This fixes an inconsistency: clang -c -gz -fno-integrated-as means SHF_COMPRESSED while clang -c -gz -fintegrated-as means zlib-gnu. --- Since July 15, 2015 (binutils-gdb commit 19a7fe52ae3d0971e67a134bcb1648899e21ae1c, included in 2.26), gas --compress-debug-sections=zlib (gcc -gz) means zlib-gabi: SHF_COMPRESSED. Before that GCC/binutils used zlib-gnu (.zdebug). clang's -gz was introduced in rC306115 (Jun 2017) to indicate zlib-gnu. It is 2020 now and it is not unreasonable to assume users of the new feature to have new linkers (ld.bfd/gold >= 2.26, lld >= rLLD273661). Change clang's default accordingly to improve standard conformance. zlib-gnu becomes out of fashion and gets poorer toolchain support. Its mangled names confuse tools and are more likely to cause problems. Reviewed By: compnerd Differential Revision: https://reviews.llvm.org/D61689 --- clang/docs/ReleaseNotes.rst | 5 ++++- clang/lib/Frontend/CompilerInvocation.cpp | 3 +-- clang/tools/driver/cc1as_main.cpp | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9274081c4d62c..03eca8a26843e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -100,7 +100,10 @@ future versions of Clang. Modified Compiler Flags ----------------------- -- ... +- On ELF, ``-gz`` now defaults to ``-gz=zlib`` with the integrated assembler. + It produces ``SHF_COMPRESSED`` style compression of debug information. GNU + binutils 2.26 or newer, or lld is required to link produced object files. Use + ``-gz=zlib-gnu`` to get the old behavior. New Pragmas in Clang -------------------- diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0b5f33541060f..8e8bf9d9028eb 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1155,8 +1155,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, OPT_compress_debug_sections_EQ)) { if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.setCompressDebugSections(llvm::DebugCompressionType::GNU); + Opts.setCompressDebugSections(llvm::DebugCompressionType::Z); } else { auto DCT = llvm::StringSwitch(A->getValue()) .Case("none", llvm::DebugCompressionType::None) diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp index 77b99b2013640..87047be3c2bc6 100644 --- a/clang/tools/driver/cc1as_main.cpp +++ b/clang/tools/driver/cc1as_main.cpp @@ -224,8 +224,7 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections, OPT_compress_debug_sections_EQ)) { if (A->getOption().getID() == OPT_compress_debug_sections) { - // TODO: be more clever about the compression type auto-detection - Opts.CompressDebugSections = llvm::DebugCompressionType::GNU; + Opts.CompressDebugSections = llvm::DebugCompressionType::Z; } else { Opts.CompressDebugSections = llvm::StringSwitch(A->getValue()) From b43791e7016d04c0dcc0d36bb43b8e96110608c9 Mon Sep 17 00:00:00 2001 From: Christopher Tetreault Date: Mon, 3 Aug 2020 13:35:49 -0700 Subject: [PATCH 237/600] [SVE] Remove bad calls to VectorType::getNumElements() from PowerPC Differential Revision: https://reviews.llvm.org/D85154 --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 04b3dceae4754..8434fdebe1b07 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -113,7 +113,7 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // the permutation mask with respect to 31 and reverse the order of // V1 and V2. if (Constant *Mask = dyn_cast(II.getArgOperand(2))) { - assert(cast(Mask->getType())->getNumElements() == 16 && + assert(cast(Mask->getType())->getNumElements() == 16 && "Bad type for intrinsic!"); // Check that all of the elements are integer constants or undefs. From af3ec731d54d8ca5e5752256bee5b93a5929fa14 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Mon, 3 Aug 2020 15:21:44 -0700 Subject: [PATCH 238/600] [NFC][ARM] Silence unused variable in release builds --- llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp index 8dbb8b53c8905..0523ac9447c32 100644 --- a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp +++ b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp @@ -462,6 +462,8 @@ bool MVEVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) { .addImm(ARMVCC::Then) .add(MI.getOperand(4)) .add(MI.getOperand(2)); + // Silence unused variable warning in release builds. + (void)MIBuilder; LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump(); dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump()); DeadInstructions.push_back(&MI); From 1d7790604c0c4e189bf8d91583cae8f1e027b1fb Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Mon, 3 Aug 2020 13:21:08 -0700 Subject: [PATCH 239/600] [UBSan] Increase robustness of tests These UBSan tests assert the absence of runtime errors via `count 0`, which means "expect no output". This fails the test unnecessarily in some environments (e.g., iOS simulator in our case). Alter the test to be a bit more specific and "expect no error" instead of "expect no output". rdar://65503408 Differential Revision: https://reviews.llvm.org/D85155 --- compiler-rt/test/ubsan/TestCases/Misc/nonnull.cpp | 4 +++- compiler-rt/test/ubsan/TestCases/Misc/nullability.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/compiler-rt/test/ubsan/TestCases/Misc/nonnull.cpp b/compiler-rt/test/ubsan/TestCases/Misc/nonnull.cpp index d5cd2bf763b7c..c612cd8247156 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/nonnull.cpp +++ b/compiler-rt/test/ubsan/TestCases/Misc/nonnull.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsanitize=returns-nonnull-attribute -w %s -O3 -o %t -// RUN: %run %t foo 2>&1 | count 0 +// RUN: %run %t foo 2>&1 | FileCheck %s --check-prefix=NOERROR --allow-empty --implicit-check-not='runtime error' // RUN: %run %t 2>&1 | FileCheck %s // RUN: %clangxx -fsanitize=returns-nonnull-attribute -fno-sanitize-recover=returns-nonnull-attribute -w %s -O3 -o %t.abort // RUN: not %run %t.abort &> /dev/null @@ -40,3 +40,5 @@ int main(int argc, char **argv) { return 0; } + +// NOERROR-NOT: runtime error diff --git a/compiler-rt/test/ubsan/TestCases/Misc/nullability.c b/compiler-rt/test/ubsan/TestCases/Misc/nullability.c index a68d0f8ea3628..42837983865d8 100644 --- a/compiler-rt/test/ubsan/TestCases/Misc/nullability.c +++ b/compiler-rt/test/ubsan/TestCases/Misc/nullability.c @@ -1,6 +1,6 @@ // UNSUPPORTED: android // RUN: %clang -w -fsanitize=nullability-arg,nullability-assign,nullability-return %s -O3 -o %t -// RUN: %run %t foo 2>&1 | count 0 +// RUN: %run %t foo 2>&1 | FileCheck %s --check-prefix=NOERROR --allow-empty --implicit-check-not='runtime error' // RUN: %run %t 2>&1 | FileCheck %s // RUN: echo "nullability-arg:nullability.c" > %t.supp @@ -67,3 +67,5 @@ int main(int argc, char **argv) { nonnull_init2(p); return 0; } + +// NOERROR-NOT: runtime error From b5059b7140232559ed123cb94d4e8f75ca9a44dc Mon Sep 17 00:00:00 2001 From: Christopher Tetreault Date: Mon, 3 Aug 2020 15:16:16 -0700 Subject: [PATCH 240/600] [SVE] Remove bad call to VectorType::getNumElements() from ARM Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D85152 --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 9674a813a52af..68767398191aa 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -166,7 +166,7 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { if (auto *CI = dyn_cast(XorMask)) { if (CI->getValue().trunc(16).isAllOnesValue()) { auto TrueVector = IC.Builder.CreateVectorSplat( - cast(II.getType())->getNumElements(), + cast(II.getType())->getNumElements(), IC.Builder.getTrue()); return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector); } From 3b92db4c846ef3c7295444fa0b554905de0774b2 Mon Sep 17 00:00:00 2001 From: Christopher Tetreault Date: Mon, 3 Aug 2020 15:42:13 -0700 Subject: [PATCH 241/600] [SVE] Remove bad call to VectorType::getNumElements() from AMDGPU Differential Revision: https://reviews.llvm.org/D85151 --- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 372c5154acef0..e186f66a83536 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -728,7 +728,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, APInt DemandedElts, int DMaskIdx = -1) { - auto *IIVTy = cast(II.getType()); + auto *IIVTy = cast(II.getType()); unsigned VWidth = IIVTy->getNumElements(); if (VWidth == 1) return nullptr; From 045e79e77c252f2c73c640e820e977ef52836d50 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Sat, 25 Jul 2020 16:31:07 +0900 Subject: [PATCH 242/600] [VE] Extend integer arguments and return values smaller than 64 bits In order to follow NEC Aurora SX VE ABI correctly, change to sign/zero extend integer arguments and return values smaller than 64 bits in clang. Also update regression test. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D85071 --- clang/lib/CodeGen/TargetInfo.cpp | 13 +++-- clang/test/CodeGen/ve-abi.c | 92 ++++++++++++++++++++++++++++++-- 2 files changed, 95 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index f31d432eb3171..e011cfa811678 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -10743,21 +10743,24 @@ class VEABIInfo : public DefaultABIInfo { } // end anonymous namespace ABIArgInfo VEABIInfo::classifyReturnType(QualType Ty) const { - if (Ty->isAnyComplexType()) { + if (Ty->isAnyComplexType()) return ABIArgInfo::getDirect(); - } + uint64_t Size = getContext().getTypeSize(Ty); + if (Size < 64 && Ty->isIntegerType()) + return ABIArgInfo::getExtend(Ty); return DefaultABIInfo::classifyReturnType(Ty); } ABIArgInfo VEABIInfo::classifyArgumentType(QualType Ty) const { - if (Ty->isAnyComplexType()) { + if (Ty->isAnyComplexType()) return ABIArgInfo::getDirect(); - } + uint64_t Size = getContext().getTypeSize(Ty); + if (Size < 64 && Ty->isIntegerType()) + return ABIArgInfo::getExtend(Ty); return DefaultABIInfo::classifyArgumentType(Ty); } void VEABIInfo::computeInfo(CGFunctionInfo &FI) const { - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &Arg : FI.arguments()) Arg.info = classifyArgumentType(Arg.type); diff --git a/clang/test/CodeGen/ve-abi.c b/clang/test/CodeGen/ve-abi.c index aa35095d5dea0..1c230cb616a38 100644 --- a/clang/test/CodeGen/ve-abi.c +++ b/clang/test/CodeGen/ve-abi.c @@ -1,14 +1,96 @@ +/// Check that ABI is correctly implemented. +/// +/// 1. Check that all integer arguments and return values less than 64 bits +/// are sign/zero extended. +/// 2. Check that all complex arguments and return values are placed in +/// registers if it is possible. Not treat it as aggregate. +/// 3. Check that a function declared without argument type declarations is +/// treated as VARARGS (in order to place arguments in both registers and +/// memory locations in the back end) + // RUN: %clang_cc1 -triple ve-linux-gnu -emit-llvm %s -o - | FileCheck %s -// CHECK-LABEL: define { float, float } @p(float %a.coerce0, float %a.coerce1, float %b.coerce0, float %b.coerce1) #0 { -float __complex__ p(float __complex__ a, float __complex__ b) { +// CHECK-LABEL: define signext i8 @fun_si8(i8 signext %a, i8 signext %b) #0 { +char fun_si8(char a, char b) { + return a; +} + +// CHECK-LABEL: define zeroext i8 @fun_zi8(i8 zeroext %a, i8 zeroext %b) #0 { +unsigned char fun_zi8(unsigned char a, unsigned char b) { + return a; +} + +// CHECK-LABEL: define signext i16 @fun_si16(i16 signext %a, i16 signext %b) #0 { +short fun_si16(short a, short b) { + return a; +} + +// CHECK-LABEL: define zeroext i16 @fun_zi16(i16 zeroext %a, i16 zeroext %b) #0 { +unsigned short fun_zi16(unsigned short a, unsigned short b) { + return a; +} + +// CHECK-LABEL: define signext i32 @fun_si32(i32 signext %a, i32 signext %b) #0 { +int fun_si32(int a, int b) { + return a; +} + +// CHECK-LABEL: define zeroext i32 @fun_zi32(i32 zeroext %a, i32 zeroext %b) #0 { +unsigned int fun_zi32(unsigned int a, unsigned int b) { + return a; +} + +// CHECK-LABEL: define i64 @fun_si64(i64 %a, i64 %b) #0 { +long fun_si64(long a, long b) { + return a; +} + +// CHECK-LABEL: define i64 @fun_zi64(i64 %a, i64 %b) #0 { +unsigned long fun_zi64(unsigned long a, unsigned long b) { + return a; +} + +// CHECK-LABEL: define i128 @fun_si128(i128 %a, i128 %b) #0 { +__int128 fun_si128(__int128 a, __int128 b) { +} + +// CHECK-LABEL: define i128 @fun_zi128(i128 %a, i128 %b) #0 { +unsigned __int128 fun_zi128(unsigned __int128 a, unsigned __int128 b) { + return a; +} + +// CHECK-LABEL: define float @fun_float(float %a, float %b) #0 { +float fun_float(float a, float b) { + return a; +} + +// CHECK-LABEL: define double @fun_double(double %a, double %b) #0 { +double fun_double(double a, double b) { + return a; +} + +// CHECK-LABEL: define fp128 @fun_quad(fp128 %a, fp128 %b) #0 { +long double fun_quad(long double a, long double b) { + return a; +} + +// CHECK-LABEL: define { float, float } @fun_fcomplex(float %a.coerce0, float %a.coerce1, float %b.coerce0, float %b.coerce1) #0 { +float __complex__ fun_fcomplex(float __complex__ a, float __complex__ b) { + return a; +} + +// CHECK-LABEL: define { double, double } @fun_dcomplex(double %a.coerce0, double %a.coerce1, double %b.coerce0, double %b.coerce1) #0 { +double __complex__ fun_dcomplex(double __complex__ a, double __complex__ b) { + return a; } -// CHECK-LABEL: define { double, double } @q(double %a.coerce0, double %a.coerce1, double %b.coerce0, double %b.coerce1) #0 { -double __complex__ q(double __complex__ a, double __complex__ b) { +// CHECK-LABEL: define { fp128, fp128 } @fun_qcomplex(fp128 %a.coerce0, fp128 %a.coerce1, fp128 %b.coerce0, fp128 %b.coerce1) #0 { +long double __complex__ fun_qcomplex(long double __complex__ a, long double __complex__ b) { + return a; } +extern int hoge(); void func() { - // CHECK-LABEL: %call = call i32 (i32, i32, i32, i32, i32, i32, i32, ...) bitcast (i32 (...)* @hoge to i32 (i32, i32, i32, i32, i32, i32, i32, ...)*)(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7) + // CHECK: %call = call signext i32 (i32, i32, i32, i32, i32, i32, i32, ...) bitcast (i32 (...)* @hoge to i32 (i32, i32, i32, i32, i32, i32, i32, ...)*)(i32 signext 1, i32 signext 2, i32 signext 3, i32 signext 4, i32 signext 5, i32 signext 6, i32 signext 7) hoge(1, 2, 3, 4, 5, 6, 7); } From 509f5c4ec2dbae99713ac6292c3a6bd7ba50027c Mon Sep 17 00:00:00 2001 From: hgreving Date: Wed, 29 Jul 2020 16:48:03 -0700 Subject: [PATCH 243/600] [MC] Fix memory leak when allocating MCInst with bump allocator Adds the function createMCInst() to MCContext that creates a MCInst using a typed bump alloctor. MCInst contains a SmallVector. The SmallVector is POD only for <= 8 operands. The default untyped bump pointer allocator of MCContext does not delete the MCInst, so if the SmallVector grows, it's a leak. This fixes https://bugs.llvm.org/show_bug.cgi?id=46900. --- llvm/include/llvm/MC/MCContext.h | 6 ++++++ llvm/lib/MC/MCContext.cpp | 9 +++++++++ .../Hexagon/AsmParser/HexagonAsmParser.cpp | 2 +- .../Disassembler/HexagonDisassembler.cpp | 6 +++--- llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp | 2 +- .../Hexagon/MCTargetDesc/HexagonAsmBackend.cpp | 4 ++-- .../Hexagon/MCTargetDesc/HexagonMCCompound.cpp | 18 +++++++++--------- 7 files changed, 31 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 45be9bb3d225b..d041b06c5568c 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -97,6 +97,7 @@ namespace llvm { SpecificBumpPtrAllocator MachOAllocator; SpecificBumpPtrAllocator WasmAllocator; SpecificBumpPtrAllocator XCOFFAllocator; + SpecificBumpPtrAllocator MCInstAllocator; /// Bindings of names to symbols. SymbolTable Symbols; @@ -380,6 +381,11 @@ namespace llvm { /// @} + /// \name McInst Management + + /// Create and return a new MC instruction. + MCInst *createMCInst(); + /// \name Symbol Management /// @{ diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index a0f9212f3b142..5b0078974318a 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -90,6 +90,7 @@ void MCContext::reset() { ELFAllocator.DestroyAll(); MachOAllocator.DestroyAll(); XCOFFAllocator.DestroyAll(); + MCInstAllocator.DestroyAll(); MCSubtargetAllocator.DestroyAll(); InlineAsmUsedLabelNames.clear(); @@ -126,6 +127,14 @@ void MCContext::reset() { HadError = false; } +//===----------------------------------------------------------------------===// +// MCInst Management +//===----------------------------------------------------------------------===// + +MCInst *MCContext::createMCInst() { + return new (MCInstAllocator.Allocate()) MCInst; +} + //===----------------------------------------------------------------------===// // Symbol Manipulation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 1e7862c36ea09..3759962c41563 100644 --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -641,7 +641,7 @@ bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; return finishBundle(IDLoc, Out); } - MCInst *SubInst = new (getParser().getContext()) MCInst; + MCInst *SubInst = getParser().getContext().createMCInst(); if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo, MatchingInlineAsm)) { if (InBrackets) diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index f3a87ef20a608..aeaeac65de960 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -175,7 +175,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, while (Result == Success && !Complete) { if (Bytes.size() < HEXAGON_INSTR_SIZE) return MCDisassembler::Fail; - MCInst *Inst = new (getContext()) MCInst; + MCInst *Inst = getContext().createMCInst(); Result = getSingleInstruction(*Inst, MI, Bytes, Address, cs, Complete); MI.addOperand(MCOperand::createInst(Inst)); Size += HEXAGON_INSTR_SIZE; @@ -384,8 +384,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB, break; } MI.setOpcode(Hexagon::DuplexIClass0 + duplexIClass); - MCInst *MILow = new (getContext()) MCInst; - MCInst *MIHigh = new (getContext()) MCInst; + MCInst *MILow = getContext().createMCInst(); + MCInst *MIHigh = getContext().createMCInst(); auto TmpExtender = CurrentExtender; CurrentExtender = nullptr; // constant extenders in duplex must always be in slot 1 diff --git a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp index 188d91355a35e..9507de95231fe 100644 --- a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -104,7 +104,7 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, HexagonMCInstrInfo::setOuterLoop(MCB); return; } - MCInst *MCI = new (AP.OutContext) MCInst; + MCInst *MCI = AP.OutContext.createMCInst(); MCI->setOpcode(MI->getOpcode()); assert(MCI->getOpcode() == static_cast(MI->getOpcode()) && "MCI opcode should have been set on construction"); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index e7069819fa575..627c53cadd84f 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -74,7 +74,7 @@ class HexagonAsmBackend : public MCAsmBackend { void setExtender(MCContext &Context) const { if (Extender == nullptr) - const_cast(this)->Extender = new (Context) MCInst; + const_cast(this)->Extender = Context.createMCInst(); } MCInst *takeExtender() const { @@ -736,7 +736,7 @@ class HexagonAsmBackend : public MCAsmBackend { auto &Inst = const_cast(RF.getInst()); while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < MaxPacketSize) { - MCInst *Nop = new (Context) MCInst; + MCInst *Nop = Context.createMCInst(); Nop->setOpcode(Hexagon::A2_nop); Inst.addOperand(MCOperand::createInst(Nop)); Size -= 4; diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index 82b2074c5cd86..e7ade7834a9f4 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -210,7 +210,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, case Hexagon::A2_tfrsi: Rt = L.getOperand(0); compoundOpcode = J4_jumpseti; - CompoundInsn = new (Context) MCInst; + CompoundInsn = Context.createMCInst(); CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rt); @@ -223,7 +223,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, Rs = L.getOperand(1); compoundOpcode = J4_jumpsetr; - CompoundInsn = new (Context) MCInst; + CompoundInsn = Context.createMCInst(); CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rt); CompoundInsn->addOperand(Rs); @@ -237,7 +237,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, Rt = L.getOperand(2); compoundOpcode = cmpeqBitOpcode[getCompoundOp(R)]; - CompoundInsn = new (Context) MCInst; + CompoundInsn = Context.createMCInst(); CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); CompoundInsn->addOperand(Rt); @@ -250,7 +250,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, Rt = L.getOperand(2); compoundOpcode = cmpgtBitOpcode[getCompoundOp(R)]; - CompoundInsn = new (Context) MCInst; + CompoundInsn = Context.createMCInst(); CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); CompoundInsn->addOperand(Rt); @@ -263,7 +263,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, Rt = L.getOperand(2); compoundOpcode = cmpgtuBitOpcode[getCompoundOp(R)]; - CompoundInsn = new (Context) MCInst; + CompoundInsn = Context.createMCInst(); CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); CompoundInsn->addOperand(Rt); @@ -281,7 +281,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)]; Rs = L.getOperand(1); - CompoundInsn = new (Context) MCInst; + CompoundInsn = Context.createMCInst(); CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); CompoundInsn->addOperand(L.getOperand(2)); @@ -299,7 +299,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)]; Rs = L.getOperand(1); - CompoundInsn = new (Context) MCInst; + CompoundInsn = Context.createMCInst(); CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); CompoundInsn->addOperand(L.getOperand(2)); @@ -310,7 +310,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, LLVM_DEBUG(dbgs() << "CX: C2_cmpgtui\n"); Rs = L.getOperand(1); compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)]; - CompoundInsn = new (Context) MCInst; + CompoundInsn = Context.createMCInst(); CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); CompoundInsn->addOperand(L.getOperand(2)); @@ -321,7 +321,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, LLVM_DEBUG(dbgs() << "CX: S2_tstbit_i\n"); Rs = L.getOperand(1); compoundOpcode = tstBitOpcode[getCompoundOp(R)]; - CompoundInsn = new (Context) MCInst; + CompoundInsn = Context.createMCInst(); CompoundInsn->setOpcode(compoundOpcode); CompoundInsn->addOperand(Rs); CompoundInsn->addOperand(R.getOperand(1)); From 7c5630fe9908a8bf10be2e9d26054406fac8de87 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 3 Aug 2020 11:20:35 -0700 Subject: [PATCH 244/600] [flang] Handle spaces (more) correctly in REAL input Fixes problems in FCVS test fm110.f. Add more comments, too. Differential Revision: https://reviews.llvm.org/D85163 --- flang/runtime/edit-input.cpp | 56 +++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp index ebbe41b49b2ce..998edc954ba75 100644 --- a/flang/runtime/edit-input.cpp +++ b/flang/runtime/edit-input.cpp @@ -13,6 +13,8 @@ namespace Fortran::runtime::io { +// For fixed-width fields, initialize the number of remaining characters. +// Skip over leading blanks, then return the first non-blank character (if any). static std::optional PrepareInput( IoStatementState &io, const DataEdit &edit, std::optional &remaining) { remaining.reset(); @@ -61,7 +63,8 @@ static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n, return true; } -// Returns false if there's a '-' sign +// Prepares input from a field, and consumes the sign, if any. +// Returns true if there's a '-' sign. static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, std::optional &next, std::optional &remaining) { next = PrepareInput(io, edit, remaining); @@ -69,6 +72,7 @@ static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, if (next) { negative = *next == '-'; if (negative || *next == '+') { + io.SkipSpaces(remaining); next = io.NextInField(remaining); } } @@ -126,39 +130,44 @@ bool EditIntegerInput( return true; } +// Parses a REAL input number from the input source as a normalized +// fraction into a supplied buffer -- there's an optional '-', a +// decimal point, and at least one digit. The adjusted exponent value +// is returned in a reference argument. The returned value is the number +// of characters that (should) have been written to the buffer -- this can +// be larger than the buffer size and can indicate overflow. Replaces +// blanks with zeroes if appropriate. static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit, int &exponent) { std::optional remaining; std::optional next; int got{0}; std::optional decimalPoint; - if (ScanNumericPrefix(io, edit, next, remaining) && next) { + auto Put{[&](char ch) -> void { if (got < bufferSize) { - buffer[got++] = '-'; + buffer[got] = ch; } + ++got; + }}; + if (ScanNumericPrefix(io, edit, next, remaining)) { + Put('-'); } if (!next) { // empty field means zero - if (got < bufferSize) { - buffer[got++] = '0'; - } + Put('0'); return got; } - if (got < bufferSize) { - buffer[got++] = '.'; // input field is normalized to a fraction - } char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.'; - auto start{got}; - if ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')) { + char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; + if (first == 'N' || first == 'I') { // NaN or infinity - convert to upper case + // Subtle: a blank field of digits could be followed by 'E' or 'D', for (; next && ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')); next = io.NextInField(remaining)) { - if (got < bufferSize) { - if (*next >= 'a' && *next <= 'z') { - buffer[got++] = *next - 'a' + 'A'; - } else { - buffer[got++] = *next; - } + if (*next >= 'a' && *next <= 'z') { + Put(*next - 'a' + 'A'); + } else { + Put(*next); } } if (next && *next == '(') { // NaN(...) @@ -167,7 +176,10 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, } } exponent = 0; - } else if (*next == decimal || (*next >= '0' && *next <= '9')) { + } else if (first == decimal || (first >= '0' && first <= '9') || + first == 'E' || first == 'D' || first == 'Q') { + Put('.'); // input field is normalized to a fraction + auto start{got}; for (; next; next = io.NextInField(remaining)) { char32_t ch{*next}; if (ch == ' ' || ch == '\t') { @@ -180,9 +192,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, if (ch == '0' && got == start && !decimalPoint) { // omit leading zeroes before the decimal } else if (ch >= '0' && ch <= '9') { - if (got < bufferSize) { - buffer[got++] = ch; - } + Put(ch); } else if (ch == decimal && !decimalPoint) { // the decimal point is *not* copied to the buffer decimalPoint = got - start; // # of digits before the decimal point @@ -190,8 +200,8 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io, break; } } - if (got == start && got < bufferSize) { - buffer[got++] = '0'; // all digits were zeroes + if (got == start) { + Put('0'); // emit at least one digit } if (next && (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || From 49bbb8b60e451d173c7dd42993592e8aa4d95f24 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 3 Aug 2020 11:11:39 -0700 Subject: [PATCH 245/600] Remove unneeded RoundDefault enumerator, and fix spelling in comments --- flang/include/flang/Decimal/decimal.h | 3 +-- flang/lib/Decimal/big-radix-floating-point.h | 6 +++--- flang/lib/Decimal/binary-to-decimal.cpp | 1 - flang/lib/Decimal/decimal-to-binary.cpp | 2 -- flang/lib/Evaluate/host.h | 6 +++--- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/flang/include/flang/Decimal/decimal.h b/flang/include/flang/Decimal/decimal.h index fa687e92d35b4..214b1d15d44b3 100644 --- a/flang/include/flang/Decimal/decimal.h +++ b/flang/include/flang/Decimal/decimal.h @@ -44,12 +44,11 @@ struct ConversionToDecimalResult { }; enum FortranRounding { - RoundNearest, /* RN */ + RoundNearest, /* RN and RP */ RoundUp, /* RU */ RoundDown, /* RD */ RoundToZero, /* RZ - no rounding */ RoundCompatible, /* RC: like RN, but ties go away from 0 */ - RoundDefault, /* RP: maps to one of the above */ }; /* The "minimize" flag causes the fewest number of output digits diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h index 2fbb777104d6d..b0ee69ad5e426 100644 --- a/flang/lib/Decimal/big-radix-floating-point.h +++ b/flang/lib/Decimal/big-radix-floating-point.h @@ -66,12 +66,12 @@ template class BigRadixFloatingPointNumber { public: explicit BigRadixFloatingPointNumber( - enum FortranRounding rounding = RoundDefault) + enum FortranRounding rounding = RoundNearest) : rounding_{rounding} {} // Converts a binary floating point value. explicit BigRadixFloatingPointNumber( - Real, enum FortranRounding = RoundDefault); + Real, enum FortranRounding = RoundNearest); BigRadixFloatingPointNumber &SetToZero() { isNegative_ = false; @@ -355,7 +355,7 @@ template class BigRadixFloatingPointNumber { int digitLimit_{maxDigits}; // precision clamp int exponent_{0}; // signed power of ten bool isNegative_{false}; - enum FortranRounding rounding_ { RoundDefault }; + enum FortranRounding rounding_ { RoundNearest }; }; } // namespace Fortran::decimal #endif diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp index bcc0f08558aa4..c89bffc8ccd4c 100644 --- a/flang/lib/Decimal/binary-to-decimal.cpp +++ b/flang/lib/Decimal/binary-to-decimal.cpp @@ -143,7 +143,6 @@ BigRadixFloatingPointNumber::ConvertToDecimal(char *buffer, bool incr{false}; switch (rounding_) { case RoundNearest: - case RoundDefault: incr = *end > '5' || (*end == '5' && (p > end + 1 || ((end[-1] - '0') & 1) != 0)); break; diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp index 502f0a003d641..5e927e93b3bbe 100644 --- a/flang/lib/Decimal/decimal-to-binary.cpp +++ b/flang/lib/Decimal/decimal-to-binary.cpp @@ -150,7 +150,6 @@ void BigRadixFloatingPointNumber radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); break; case RoundUp: @@ -260,7 +259,6 @@ ConversionToBinaryResult IntermediateFloat::ToBinary( bool incr{false}; switch (rounding) { case RoundNearest: - case RoundDefault: incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); break; case RoundUp: diff --git a/flang/lib/Evaluate/host.h b/flang/lib/Evaluate/host.h index 2fac0424f17cc..1fc2423f4f0c3 100644 --- a/flang/lib/Evaluate/host.h +++ b/flang/lib/Evaluate/host.h @@ -140,14 +140,14 @@ template <> struct HostTypeHelper> { // It should be defined when gcc/clang have a better support for it. template <> struct HostTypeHelper> { - // IEE 754 64bits + // IEEE 754 64bits using Type = std::conditional_t::is_iec559, float, UnsupportedType>; }; template <> struct HostTypeHelper> { - // IEE 754 64bits + // IEEE 754 64bits using Type = std::conditional_t::is_iec559, double, UnsupportedType>; @@ -162,7 +162,7 @@ template <> struct HostTypeHelper> { }; template <> struct HostTypeHelper> { - // IEE 754 128bits + // IEEE 754 128bits using Type = std::conditional_t::digits == 113 && std::numeric_limits::max_exponent == 16384, From 3b44b6c900d1b71e6a6590e376d11dc303ac5159 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Tue, 4 Aug 2020 00:31:19 +0100 Subject: [PATCH 246/600] [clang-tidy][NFC] Use correct size call for reserve --- .../clang-tidy/readability/IdentifierNamingCheck.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index e7fe25d8e2214..c2a32474b2a8b 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -124,7 +124,7 @@ static StringRef const StyleNames[] = { static std::vector> getNamingStyles(const ClangTidyCheck::OptionsView &Options) { std::vector> Styles; - Styles.reserve(StyleNames->size()); + Styles.reserve(array_lengthof(StyleNames)); for (auto const &StyleName : StyleNames) { auto CaseOptional = Options.getOptional( (StyleName + "Case").str()); From adb5c23f8c0d60eeec41dcbe21d1b26184e1c97d Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Mon, 3 Aug 2020 16:30:41 -0700 Subject: [PATCH 247/600] [test] Exit with an error if no tests are run. If the test suite is misconfigured when it's run (a bad regexp, wrong test directory, etc.), the test suite may not discover any tests. When this happens, the test runner exits happily because no tests failed: ``` Ran 0 tests in 0.000s RESULT: PASSED (0 passes, 0 failures, 0 errors, 0 skipped, 0 expected failures, 0 unexpected successes) ``` Change this to return an error so the misconfiguration can be more easily detected. Verified that `lldb-dotest -p TestDoesNotExist.py` successfully fails. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D85169 --- lldb/packages/Python/lldbsuite/test/dotest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 3fb802f1c1aa5..6607f52c49dbd 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -1039,6 +1039,10 @@ def run_suite(): (configuration.suite.countTestCases(), configuration.suite.countTestCases() != 1 and "s" or "")) + if configuration.suite.countTestCases() == 0: + logging.error("did not discover any matching tests") + exitTestSuite(1) + # Invoke the test runner. if configuration.count == 1: result = unittest2.TextTestRunner( From c9e6887f837933aa7e2bb2511a6f883739e30faa Mon Sep 17 00:00:00 2001 From: Christopher Tetreault Date: Mon, 3 Aug 2020 16:15:45 -0700 Subject: [PATCH 248/600] [SVE] Remove bad calls to VectorType::getNumElements() from X86 Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D85156 --- .../Target/X86/X86InstCombineIntrinsic.cpp | 51 ++++++++++--------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index c9e092e5deca7..3632305aa5cbf 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -202,7 +202,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, auto Vec = II.getArgOperand(0); auto Amt = II.getArgOperand(1); - auto VT = cast(Vec->getType()); + auto VT = cast(Vec->getType()); auto SVT = VT->getElementType(); auto AmtVT = Amt->getType(); unsigned VWidth = VT->getNumElements(); @@ -234,7 +234,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 && cast(AmtVT)->getElementType() == SVT && "Unexpected shift-by-scalar type"); - unsigned NumAmtElts = cast(AmtVT)->getNumElements(); + unsigned NumAmtElts = cast(AmtVT)->getNumElements(); APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0); APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2); KnownBits KnownLowerBits = llvm::computeKnownBits( @@ -350,7 +350,7 @@ static Value *simplifyX86varShift(const IntrinsicInst &II, auto Vec = II.getArgOperand(0); auto Amt = II.getArgOperand(1); - auto VT = cast(II.getType()); + auto VT = cast(II.getType()); auto SVT = VT->getElementType(); int NumElts = VT->getNumElements(); int BitWidth = SVT->getIntegerBitWidth(); @@ -448,10 +448,10 @@ static Value *simplifyX86pack(IntrinsicInst &II, if (isa(Arg0) && isa(Arg1)) return UndefValue::get(ResTy); - auto *ArgTy = cast(Arg0->getType()); + auto *ArgTy = cast(Arg0->getType()); unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128; unsigned NumSrcElts = ArgTy->getNumElements(); - assert(cast(ResTy)->getNumElements() == (2 * NumSrcElts) && + assert(cast(ResTy)->getNumElements() == (2 * NumSrcElts) && "Unexpected packing types"); unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; @@ -513,7 +513,7 @@ static Value *simplifyX86movmsk(const IntrinsicInst &II, if (isa(Arg)) return Constant::getNullValue(ResTy); - auto *ArgTy = dyn_cast(Arg->getType()); + auto *ArgTy = dyn_cast(Arg->getType()); // We can't easily peek through x86_mmx types. if (!ArgTy) return nullptr; @@ -567,7 +567,7 @@ static Value *simplifyX86insertps(const IntrinsicInst &II, if (!CInt) return nullptr; - VectorType *VecTy = cast(II.getType()); + auto *VecTy = cast(II.getType()); assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); // The immediate permute control byte looks like this: @@ -810,7 +810,7 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II, if (!V) return nullptr; - auto *VecTy = cast(II.getType()); + auto *VecTy = cast(II.getType()); unsigned NumElts = VecTy->getNumElements(); assert((NumElts == 16 || NumElts == 32 || NumElts == 64) && "Unexpected number of elements in shuffle mask!"); @@ -855,7 +855,7 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II, if (!V) return nullptr; - auto *VecTy = cast(II.getType()); + auto *VecTy = cast(II.getType()); unsigned NumElts = VecTy->getNumElements(); bool IsPD = VecTy->getScalarType()->isDoubleTy(); unsigned NumLaneElts = IsPD ? 2 : 4; @@ -903,7 +903,7 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II, if (!V) return nullptr; - auto *VecTy = cast(II.getType()); + auto *VecTy = cast(II.getType()); unsigned Size = VecTy->getNumElements(); assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) && "Unexpected shuffle mask size"); @@ -1084,7 +1084,7 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // These intrinsics only demand the 0th element of their input vectors. If // we can simplify the input based on that, do so now. Value *Arg = II.getArgOperand(0); - unsigned VWidth = cast(Arg->getType())->getNumElements(); + unsigned VWidth = cast(Arg->getType())->getNumElements(); if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) { return IC.replaceOperand(II, 0, V); } @@ -1136,7 +1136,7 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { bool MadeChange = false; Value *Arg0 = II.getArgOperand(0); Value *Arg1 = II.getArgOperand(1); - unsigned VWidth = cast(Arg0->getType())->getNumElements(); + unsigned VWidth = cast(Arg0->getType())->getNumElements(); if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) { IC.replaceOperand(II, 0, V); MadeChange = true; @@ -1362,7 +1362,7 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { Value *Arg1 = II.getArgOperand(1); assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 && "Unexpected packed shift size"); - unsigned VWidth = cast(Arg1->getType())->getNumElements(); + unsigned VWidth = cast(Arg1->getType())->getNumElements(); if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) { return IC.replaceOperand(II, 1, V); @@ -1433,7 +1433,8 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { bool MadeChange = false; Value *Arg0 = II.getArgOperand(0); Value *Arg1 = II.getArgOperand(1); - unsigned VWidth = cast(Arg0->getType())->getNumElements(); + unsigned VWidth = + cast(Arg0->getType())->getNumElements(); APInt UndefElts1(VWidth, 0); APInt DemandedElts1 = @@ -1476,8 +1477,8 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { case Intrinsic::x86_sse4a_extrq: { Value *Op0 = II.getArgOperand(0); Value *Op1 = II.getArgOperand(1); - unsigned VWidth0 = cast(Op0->getType())->getNumElements(); - unsigned VWidth1 = cast(Op1->getType())->getNumElements(); + unsigned VWidth0 = cast(Op0->getType())->getNumElements(); + unsigned VWidth1 = cast(Op1->getType())->getNumElements(); assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && VWidth1 == 16 && "Unexpected operand sizes"); @@ -1517,7 +1518,7 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining // bits of the lower 64-bits. The upper 64-bits are undefined. Value *Op0 = II.getArgOperand(0); - unsigned VWidth = cast(Op0->getType())->getNumElements(); + unsigned VWidth = cast(Op0->getType())->getNumElements(); assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && "Unexpected operand size"); @@ -1541,10 +1542,10 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { case Intrinsic::x86_sse4a_insertq: { Value *Op0 = II.getArgOperand(0); Value *Op1 = II.getArgOperand(1); - unsigned VWidth = cast(Op0->getType())->getNumElements(); + unsigned VWidth = cast(Op0->getType())->getNumElements(); assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && - cast(Op1->getType())->getNumElements() == 2 && + cast(Op1->getType())->getNumElements() == 2 && "Unexpected operand size"); // See if we're dealing with constant values. @@ -1577,8 +1578,8 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // undefined. Value *Op0 = II.getArgOperand(0); Value *Op1 = II.getArgOperand(1); - unsigned VWidth0 = cast(Op0->getType())->getNumElements(); - unsigned VWidth1 = cast(Op1->getType())->getNumElements(); + unsigned VWidth0 = cast(Op0->getType())->getNumElements(); + unsigned VWidth1 = cast(Op1->getType())->getNumElements(); assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && VWidth1 == 2 && "Unexpected operand sizes"); @@ -1650,9 +1651,9 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { "Not expecting mask and operands with different sizes"); unsigned NumMaskElts = - cast(Mask->getType())->getNumElements(); + cast(Mask->getType())->getNumElements(); unsigned NumOperandElts = - cast(II.getType())->getNumElements(); + cast(II.getType())->getNumElements(); if (NumMaskElts == NumOperandElts) { return SelectInst::Create(BoolVec, Op1, Op0); } @@ -1768,7 +1769,7 @@ Optional X86TTIImpl::simplifyDemandedUseBitsIntrinsic( ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>. } else { auto Arg = II.getArgOperand(0); - auto ArgType = cast(Arg->getType()); + auto ArgType = cast(Arg->getType()); ArgWidth = ArgType->getNumElements(); } @@ -1934,7 +1935,7 @@ Optional X86TTIImpl::simplifyDemandedVectorEltsIntrinsic( case Intrinsic::x86_avx512_packusdw_512: case Intrinsic::x86_avx512_packuswb_512: { auto *Ty0 = II.getArgOperand(0)->getType(); - unsigned InnerVWidth = cast(Ty0)->getNumElements(); + unsigned InnerVWidth = cast(Ty0)->getNumElements(); assert(VWidth == (InnerVWidth * 2) && "Unexpected input size"); unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128; From d8334c43606a08dc13a69d0993dc7a52d5c0fe56 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 3 Aug 2020 11:29:15 -0700 Subject: [PATCH 249/600] [flang] Acquire file accessibility, size, positioning Extend the raw file wrapper to get accessibility, positioning, and size information. This is needed for INQUIRE (to follow). Differential Revision: https://reviews.llvm.org/D85160 --- flang/runtime/file.cpp | 21 +++++++++++++++++++-- flang/runtime/file.h | 4 ++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/flang/runtime/file.cpp b/flang/runtime/file.cpp index 341702df995b8..6823b19e57912 100644 --- a/flang/runtime/file.cpp +++ b/flang/runtime/file.cpp @@ -18,6 +18,7 @@ #include #include #else +#include #include #endif @@ -84,8 +85,7 @@ void OpenFile::Open(OpenStatus status, std::optional action, fd_ = openfile_mkstemp(handler); } else { if (!path_.get()) { - handler.SignalError( - "FILE= is required unless STATUS='OLD' and unit is connected"); + handler.SignalError("FILE= is required"); return; } int flags{0}; @@ -134,8 +134,18 @@ void OpenFile::Open(OpenStatus status, std::optional action, mayWrite_ = *action != Action::Read; if (status == OpenStatus::Old || status == OpenStatus::Unknown) { knownSize_.reset(); +#ifndef _WIN32 + struct stat buf; + if (::fstat(fd_, &buf) == 0) { + mayPosition_ = S_ISREG(buf.st_mode); + knownSize_ = buf.st_size; + } +#else // TODO: _WIN32 + mayPosition_ = true; +#endif } else { knownSize_ = 0; + mayPosition_ = true; } } @@ -385,4 +395,11 @@ int OpenFile::PendingResult(const Terminator &terminator, int iostat) { } bool IsATerminal(int fd) { return ::isatty(fd); } + +bool IsExtant(const char *path) { return ::access(path, F_OK) == 0; } +bool MayRead(const char *path) { return ::access(path, R_OK) == 0; } +bool MayWrite(const char *path) { return ::access(path, W_OK) == 0; } +bool MayReadAndWrite(const char *path) { + return ::access(path, R_OK | W_OK) == 0; +} } // namespace Fortran::runtime::io diff --git a/flang/runtime/file.h b/flang/runtime/file.h index 1d25a91558a4c..7e7b27c4be2a4 100644 --- a/flang/runtime/file.h +++ b/flang/runtime/file.h @@ -95,5 +95,9 @@ class OpenFile { }; bool IsATerminal(int fd); +bool IsExtant(const char *path); +bool MayRead(const char *path); +bool MayWrite(const char *path); +bool MayReadAndWrite(const char *path); } // namespace Fortran::runtime::io #endif // FORTRAN_RUNTIME_FILE_H_ From d879ac8a6eef683392a02dd0aa62c69d61b894eb Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 3 Aug 2020 11:31:13 -0700 Subject: [PATCH 250/600] [flang] Defer "next input record" processing until handlers established External input was detecting "end of file" conditions in BeginExternal...Input() and BeginUnformattedInput() routines before EnableHandlers() could have been called. Defer the "start next record" processing to the input data item handlers (and EndIoStatement() for when there are no data items). Differential Revision: https://reviews.llvm.org/D85161 --- flang/runtime/io-api.cpp | 17 ++++++++--------- flang/runtime/io-stmt.cpp | 19 ++++++++++++++++++- flang/runtime/io-stmt.h | 6 ++++++ 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 708090ac68561..f36144d0c3c46 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -149,9 +149,6 @@ Cookie BeginExternalListIO( unit.SetDirection(DIR, handler); IoStatementState &io{unit.BeginIoStatement>( unit, sourceFile, sourceLine)}; - if constexpr (DIR == Direction::Input) { - unit.BeginReadingRecord(handler); - } return &io; } @@ -185,9 +182,6 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, IoStatementState &io{ unit.BeginIoStatement>( unit, format, formatLength, sourceFile, sourceLine)}; - if constexpr (DIR == Direction::Input) { - unit.BeginReadingRecord(handler); - } return &io; } @@ -218,9 +212,7 @@ Cookie BeginUnformattedIO( unit, sourceFile, sourceLine)}; IoErrorHandler handler{terminator}; unit.SetDirection(DIR, handler); - if constexpr (DIR == Direction::Input) { - unit.BeginReadingRecord(handler); - } else { + if constexpr (DIR == Direction::Output) { if (unit.access == Access::Sequential && !unit.isFixedRecordLength) { // Create space for (sub)record header to be completed by // UnformattedIoStatementState::EndIoStatement() @@ -838,6 +830,7 @@ bool IONAME(OutputDescriptor)(Cookie cookie, const Descriptor &) { bool IONAME(InputDescriptor)(Cookie cookie, const Descriptor &) { IoStatementState &io{*cookie}; + io.BeginReadingRecord(); io.GetIoErrorHandler().Crash("InputDescriptor: not yet implemented"); // TODO } @@ -855,6 +848,7 @@ bool IONAME(OutputUnformattedBlock)(Cookie cookie, const char *x, bool IONAME(InputUnformattedBlock)( Cookie cookie, char *x, std::size_t length, std::size_t elementBytes) { IoStatementState &io{*cookie}; + io.BeginReadingRecord(); if (auto *unf{io.get_if>()}) { return unf->Receive(x, length, elementBytes); } @@ -883,6 +877,7 @@ bool IONAME(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { "InputInteger64() called for a non-input I/O statement"); return false; } + io.BeginReadingRecord(); if (auto edit{io.GetNextDataEdit()}) { if (edit->descriptor == DataEdit::ListDirectedNullValue) { return true; @@ -922,6 +917,7 @@ static bool InputReal(Cookie cookie, REAL &x) { "InputReal() called for a non-input I/O statement"); return false; } + io.BeginReadingRecord(); if (auto edit{io.GetNextDataEdit()}) { if (edit->descriptor == DataEdit::ListDirectedNullValue) { return true; @@ -968,6 +964,7 @@ static bool InputComplex(Cookie cookie, REAL x[2]) { "InputComplex() called for a non-input I/O statement"); return false; } + io.BeginReadingRecord(); for (int j{0}; j < 2; ++j) { if (auto edit{io.GetNextDataEdit()}) { if (edit->descriptor == DataEdit::ListDirectedNullValue) { @@ -1012,6 +1009,7 @@ bool IONAME(InputAscii)(Cookie cookie, char *x, std::size_t length) { "InputAscii() called for a non-input I/O statement"); return false; } + io.BeginReadingRecord(); if (auto edit{io.GetNextDataEdit()}) { if (edit->descriptor == DataEdit::ListDirectedNullValue) { return true; @@ -1044,6 +1042,7 @@ bool IONAME(InputLogical)(Cookie cookie, bool &truth) { "InputLogical() called for a non-input I/O statement"); return false; } + io.BeginReadingRecord(); if (auto edit{io.GetNextDataEdit()}) { if (edit->descriptor == DataEdit::ListDirectedNullValue) { return true; diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index b8e7781f235b0..a903f708bc627 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -185,7 +185,10 @@ int NoopCloseStatementState::EndIoStatement() { } template int ExternalIoStatementState::EndIoStatement() { - if (!unit().nonAdvancing) { + if constexpr (DIR == Direction::Input) { + BeginReadingRecord(); // in case of READ with no data items + } + if (!unit().nonAdvancing && GetIoStat() != IostatEnd) { unit().AdvanceRecord(*this); } if constexpr (DIR == Direction::Output) { @@ -260,6 +263,16 @@ void ExternalIoStatementState::HandleRelativePosition(std::int64_t n) { return unit().HandleRelativePosition(n); } +template +void ExternalIoStatementState::BeginReadingRecord() { + if constexpr (DIR == Direction::Input) { + if (!beganReading_) { + beganReading_ = true; + unit().BeginReadingRecord(*this); + } + } +} + template ExternalFormattedIoStatementState::ExternalFormattedIoStatementState( ExternalFileUnit &unit, const CHAR *format, std::size_t formatLength, @@ -315,6 +328,10 @@ MutableModes &IoStatementState::mutableModes() { [](auto &x) -> MutableModes & { return x.get().mutableModes(); }, u_); } +void IoStatementState::BeginReadingRecord() { + std::visit([](auto &x) { return x.get().BeginReadingRecord(); }, u_); +} + IoErrorHandler &IoStatementState::GetIoErrorHandler() const { return std::visit( [](auto &x) -> IoErrorHandler & { diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index 755e5946ff3b9..ddc264aea3605 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -59,6 +59,7 @@ class IoStatementState { IoErrorHandler &GetIoErrorHandler() const; ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit MutableModes &mutableModes(); + void BeginReadingRecord(); // N.B.: this also works with base classes template A *get_if() const { @@ -108,6 +109,7 @@ struct IoStatementBase : public DefaultFormatControlCallbacks { int EndIoStatement(); std::optional GetNextDataEdit(IoStatementState &, int = 1); ExternalFileUnit *GetExternalFileUnit() const { return nullptr; } + void BeginReadingRecord() {} }; struct InputStatementState {}; @@ -247,6 +249,10 @@ class ExternalIoStatementState : public ExternalIoStatementBase, void BackspaceRecord(); void HandleRelativePosition(std::int64_t); void HandleAbsolutePosition(std::int64_t); + void BeginReadingRecord(); + +private: + bool beganReading_{false}; }; template From 1beb00db1f5197efb73f839da681b8e439f37628 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 3 Aug 2020 16:53:41 -0700 Subject: [PATCH 251/600] Fix use-after-scope in 7209f83112db caught by the sanitizer bots --- clang/lib/Driver/Driver.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 35263fbe1b2d8..e6a267621d8b4 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4601,12 +4601,12 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, } SmallString<128> BasePath(BaseInput); + SmallString<128> ExternalPath(""); StringRef BaseName; // Dsymutil actions should use the full path. if (isa(JA) && C.getArgs().hasArg(options::OPT_dsym_dir)) { - SmallString<128> ExternalPath( - C.getArgs().getLastArg(options::OPT_dsym_dir)->getValue()); + ExternalPath += C.getArgs().getLastArg(options::OPT_dsym_dir)->getValue(); // We use posix style here because the tests (specifically // darwin-dsymutil.c) demonstrate that posix style paths are acceptable // even on Windows and if we don't then the similar test covering this From 81eeabbd97f32f7cd7dbe403e2b15db6fd23ad81 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Tue, 23 Jun 2020 20:00:04 -0700 Subject: [PATCH 252/600] [ELF] Add --dependency-file option Clang and GCC have a feature (-MD flag) to create a dependency file in a format that build systems such as Make or Ninja can read, which specifies all the additional inputs such .h files. This change introduces the same functionality to lld bringing it to feature parity with ld and gold which gained this feature recently. See https://sourceware.org/bugzilla/show_bug.cgi?id=22843 for more details and discussion. The implementation corresponds to -MD -MP compiler flag where the generated dependency file also includes phony targets which works around the errors where the dependency is removed. This matches the format used by ld and gold. Fixes PR42806 Differential Revision: https://reviews.llvm.org/D82437 --- lld/ELF/Config.h | 6 ++- lld/ELF/Driver.cpp | 75 ++++++++++++++++++++++++++++++++++ lld/ELF/InputFiles.cpp | 1 + lld/ELF/Options.td | 3 ++ lld/test/ELF/dependency-file.s | 21 ++++++++++ 5 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 lld/test/ELF/dependency-file.s diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index e74a4a0c5b223..1afeee02efb30 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -10,7 +10,9 @@ #define LLD_ELF_CONFIG_H #include "lld/Common/ErrorHandler.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/ELF.h" @@ -90,11 +92,13 @@ struct Configuration { uint8_t osabi = 0; uint32_t andFeatures = 0; llvm::CachePruningPolicy thinLTOCachePolicy; + llvm::SetVector dependencyFiles; // for --dependency-file llvm::StringMap sectionStartMap; llvm::StringRef bfdname; llvm::StringRef chroot; - llvm::StringRef dynamicLinker; + llvm::StringRef dependencyFile; llvm::StringRef dwoDir; + llvm::StringRef dynamicLinker; llvm::StringRef entry; llvm::StringRef emulation; llvm::StringRef fini; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 3e60ffdb1dc1d..012da1485acb1 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -918,6 +918,7 @@ static void readConfigs(opt::InputArgList &args) { config->optimizeBBJumps = args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false); config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true); + config->dependencyFile = args.getLastArgValue(OPT_dependency_file); config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true); config->disableVerify = args.hasArg(OPT_disable_verify); config->discard = getDiscard(args); @@ -1564,6 +1565,75 @@ static void handleLibcall(StringRef name) { sym->fetch(); } +// Handle --dependency-file=. If that option is given, lld creates a +// file at a given path with the following contents: +// +// : ... +// +// : +// +// where is a pathname of an output file and +// ... is a list of pathnames of all input files. `make` command can read a +// file in the above format and interpret it as a dependency info. We write +// phony targets for every to avoid an error when that file is +// removed. +// +// This option is useful if you want to make your final executable to depend +// on all input files including system libraries. Here is why. +// +// When you write a Makefile, you usually write it so that the final +// executable depends on all user-generated object files. Normally, you +// don't make your executable to depend on system libraries (such as libc) +// because you don't know the exact paths of libraries, even though system +// libraries that are linked to your executable statically are technically a +// part of your program. By using --dependency-file option, you can make +// lld to dump dependency info so that you can maintain exact dependencies +// easily. +static void writeDependencyFile() { + std::error_code ec; + raw_fd_ostream os(config->dependencyFile, ec, sys::fs::F_None); + if (ec) { + error("cannot open " + config->dependencyFile + ": " + ec.message()); + return; + } + + // We use the same escape rules as Clang/GCC which are accepted by Make/Ninja: + // * A space is escaped by a backslash which itself must be escaped. + // * A hash sign is escaped by a single backslash. + // * $ is escapes as $$. + auto printFilename = [](raw_fd_ostream &os, StringRef filename) { + llvm::SmallString<256> nativePath; + llvm::sys::path::native(filename.str(), nativePath); + llvm::sys::path::remove_dots(nativePath, /*remove_dot_dot=*/true); + for (unsigned i = 0, e = nativePath.size(); i != e; ++i) { + if (nativePath[i] == '#') { + os << '\\'; + } else if (nativePath[i] == ' ') { + os << '\\'; + unsigned j = i; + while (j > 0 && nativePath[--j] == '\\') + os << '\\'; + } else if (nativePath[i] == '$') { + os << '$'; + } + os << nativePath[i]; + } + }; + + os << config->outputFile << ":"; + for (StringRef path : config->dependencyFiles) { + os << " \\\n "; + printFilename(os, path); + } + os << "\n"; + + for (StringRef path : config->dependencyFiles) { + os << "\n"; + printFilename(os, path); + os << ":\n"; + } +} + // Replaces common symbols with defined symbols reside in .bss sections. // This function is called after all symbol names are resolved. As a // result, the passes after the symbol resolution won't see any @@ -2065,6 +2135,11 @@ template void LinkerDriver::link(opt::InputArgList &args) { return false; }); + // Since we now have a complete set of input files, we can create + // a .d file to record build dependencies. + if (!config->dependencyFile.empty()) + writeDependencyFile(); + // Now that the number of partitions is fixed, save a pointer to the main // partition. mainPart = &partitions[0]; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index c142c00517ccf..6199f43b466a2 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -110,6 +110,7 @@ Optional elf::readFile(StringRef path) { path = saver.save(config->chroot + path); log(path); + config->dependencyFiles.insert(llvm::CachedHashString(path)); auto mbOrErr = MemoryBuffer::getFile(path, -1, false); if (auto ec = mbOrErr.getError()) { diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 18bc612f6af45..c3cadafdccd28 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -132,6 +132,9 @@ defm demangle: B<"demangle", "Demangle symbol names (default)", "Do not demangle symbol names">; +defm dependency_file: EEq<"dependency-file", "Write a dependency file">, + MetaVarName<"">; + def disable_new_dtags: F<"disable-new-dtags">, HelpText<"Disable new dynamic tags">; diff --git a/lld/test/ELF/dependency-file.s b/lld/test/ELF/dependency-file.s new file mode 100644 index 0000000000000..e7dbf9c7695f7 --- /dev/null +++ b/lld/test/ELF/dependency-file.s @@ -0,0 +1,21 @@ +# REQUIRES: x86 +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o "%t/bar baz.o" +# RUN: llvm-mc -filetype=obj -triple=x86_64 /dev/null -o "%t/#quux$.o" +# RUN: ld.lld -o %t/foo.exe %t/foo.o %t/"bar baz.o" "%t/#quux$.o" --dependency-file=%t/foo.d +# RUN: FileCheck --match-full-lines -DFILE=%t %s < %t/foo.d + +# CHECK: [[FILE]]{{/|(\\)+}}foo.exe: \ +# CHECK-NEXT: [[FILE]]{{/|(\\)+}}foo.o \ +# CHECK-NEXT: [[FILE]]{{/|(\\)+}}bar\ baz.o \ +# CHECK-NEXT: [[FILE]]{{/|(\\)+}}\#quux$$.o +# CHECK-EMPTY: +# CHECK-NEXT: [[FILE]]{{/|(\\)+}}foo.o: +# CHECK-EMPTY: +# CHECK-NEXT: [[FILE]]{{/|(\\)+}}bar\ baz.o: +# CHECK-EMPTY: +# CHECK-NEXT: [[FILE]]{{/|(\\)+}}\#quux$$.o: + +.global _start +_start: From 0c938a8dd80ad707ec1f20d936cc5c9d73df8de5 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 3 Aug 2020 16:37:25 -0700 Subject: [PATCH 253/600] OpenMP: Fix typo variabls -> variables --- openmp/runtime/tools/lib/Platform.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/runtime/tools/lib/Platform.pm b/openmp/runtime/tools/lib/Platform.pm index a6b158d907535..38593a154d03f 100644 --- a/openmp/runtime/tools/lib/Platform.pm +++ b/openmp/runtime/tools/lib/Platform.pm @@ -450,7 +450,7 @@ C<--target-architecture=I> and C<--target-os=I> options. Typical usage Platform::target_options(), # Let script recognize --target-os and --target-arch options. ... ); - # Initialize variabls after parsing command line. + # Initialize variables after parsing command line. ( $os, $arch, $platform ) = ( Platform::target_os(), Platform::target_arch(), Platform::target_platform() ); =back From ffe0066b62e989ca3e59f1ed211324ca7ec37b5a Mon Sep 17 00:00:00 2001 From: Shinji Okumura Date: Tue, 4 Aug 2020 08:59:23 +0900 Subject: [PATCH 254/600] [Attributor][NFC] Clang format --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 5cd0c711ddde1..89d359bcbe034 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -2006,7 +2006,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { if (idx >= Callee->arg_size()) break; Value *ArgVal = CB.getArgOperand(idx); - if(!ArgVal) + if (!ArgVal) continue; IRPosition CalleeArgumentIRP = IRPosition::argument(*Callee->getArg(idx)); From 675ad1bc6a96d3c7ef1909c91695189cd818a143 Mon Sep 17 00:00:00 2001 From: peter klausler Date: Mon, 3 Aug 2020 11:35:29 -0700 Subject: [PATCH 255/600] [flang] Implement runtime support for INQUIRE statements Differential Revision: https://reviews.llvm.org/D85166 --- flang/runtime/io-api.cpp | 126 ++++++--- flang/runtime/io-api.h | 38 ++- flang/runtime/io-error.h | 2 +- flang/runtime/io-stmt.cpp | 506 ++++++++++++++++++++++++++++++++++++- flang/runtime/io-stmt.h | 88 ++++++- flang/runtime/memory.h | 4 +- flang/runtime/tools.cpp | 7 + flang/runtime/tools.h | 2 + flang/runtime/unit-map.cpp | 14 + flang/runtime/unit-map.h | 7 + flang/runtime/unit.cpp | 33 ++- flang/runtime/unit.h | 8 +- 12 files changed, 760 insertions(+), 75 deletions(-) diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index f36144d0c3c46..f64fe97b2d233 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -23,6 +23,23 @@ namespace Fortran::runtime::io { +const char *InquiryKeywordHashDecode( + char *buffer, std::size_t n, InquiryKeywordHash hash) { + if (n < 1) { + return nullptr; + } + char *p{buffer + n}; + *--p = '\0'; + while (hash > 1) { + if (p < buffer) { + return nullptr; + } + *--p = 'A' + (hash % 26); + hash /= 26; + } + return hash == 1 ? p : nullptr; +} + template Cookie BeginInternalArrayListIO(const Descriptor &descriptor, void ** /*scratchArea*/, std::size_t /*scratchBytes*/, @@ -289,8 +306,8 @@ Cookie IONAME(BeginBackspace)( Cookie IONAME(BeginEndfile)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; - ExternalFileUnit &unit{ - ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)}; + ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous( + unitNumber, Direction::Output, true /*formatted*/, terminator)}; return &unit.BeginIoStatement( unit, ExternalMiscIoStatementState::Endfile, sourceFile, sourceLine); } @@ -298,12 +315,50 @@ Cookie IONAME(BeginEndfile)( Cookie IONAME(BeginRewind)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; - ExternalFileUnit &unit{ - ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)}; + ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous( + unitNumber, Direction::Input, true /*formatted*/, terminator)}; return &unit.BeginIoStatement( unit, ExternalMiscIoStatementState::Rewind, sourceFile, sourceLine); } +Cookie IONAME(BeginInquireUnit)( + ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { + if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { + return &unit->BeginIoStatement( + *unit, sourceFile, sourceLine); + } else { + // INQUIRE(UNIT=unrecognized unit) + Terminator oom{sourceFile, sourceLine}; + return &New{oom}(sourceFile, sourceLine) + .release() + ->ioStatementState(); + } +} + +Cookie IONAME(BeginInquireFile)(const char *path, std::size_t pathLength, + const char *sourceFile, int sourceLine) { + Terminator oom{sourceFile, sourceLine}; + auto trimmed{ + SaveDefaultCharacter(path, TrimTrailingSpaces(path, pathLength), oom)}; + if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(trimmed.get())}) { + // INQUIRE(FILE=) to a connected unit + return &unit->BeginIoStatement( + *unit, sourceFile, sourceLine); + } else { + return &New{oom}( + std::move(trimmed), sourceFile, sourceLine) + .release() + ->ioStatementState(); + } +} + +Cookie IONAME(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { + Terminator oom{sourceFile, sourceLine}; + return &New{oom}(sourceFile, sourceLine) + .release() + ->ioStatementState(); +} + // Control list items void IONAME(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, @@ -522,29 +577,21 @@ bool IONAME(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { io.GetIoErrorHandler().Crash( "SetAccess() called when not in an OPEN statement"); } - ConnectionState &connection{open->GetConnectionState()}; - Access access{connection.access}; static const char *keywords[]{"SEQUENTIAL", "DIRECT", "STREAM", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { case 0: - access = Access::Sequential; + open->set_access(Access::Sequential); break; case 1: - access = Access::Direct; + open->set_access(Access::Direct); break; case 2: - access = Access::Stream; + open->set_access(Access::Stream); break; default: open->SignalError(IostatErrorInKeyword, "Invalid ACCESS='%.*s'", static_cast(length), keyword); } - if (access != connection.access) { - if (open->wasExtant()) { - open->SignalError("ACCESS= may not be changed on an open unit"); - } - connection.access = access; - } return true; } @@ -661,25 +708,18 @@ bool IONAME(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { io.GetIoErrorHandler().Crash( "SetEncoding() called when not in an OPEN statement"); } - bool isUnformatted{false}; static const char *keywords[]{"FORMATTED", "UNFORMATTED", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { case 0: - isUnformatted = false; + open->set_isUnformatted(false); break; case 1: - isUnformatted = true; + open->set_isUnformatted(true); break; default: open->SignalError(IostatErrorInKeyword, "Invalid FORM='%.*s'", static_cast(length), keyword); } - if (isUnformatted != open->unit().isUnformatted) { - if (open->wasExtant()) { - open->SignalError("FORM= may not be changed on an open unit"); - } - open->unit().isUnformatted = isUnformatted; - } return true; } @@ -777,11 +817,10 @@ bool IONAME(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { "SetStatus() called when not in an OPEN or CLOSE statement"); } -bool IONAME(SetFile)( - Cookie cookie, const char *path, std::size_t chars, int kind) { +bool IONAME(SetFile)(Cookie cookie, const char *path, std::size_t chars) { IoStatementState &io{*cookie}; if (auto *open{io.get_if()}) { - open->set_path(path, chars, kind); + open->set_path(path, chars); return true; } io.GetIoErrorHandler().Crash( @@ -789,7 +828,8 @@ bool IONAME(SetFile)( return false; } -static bool SetInteger(int &x, int kind, int value) { +template +static bool SetInteger(INT &x, int kind, std::int64_t value) { switch (kind) { case 1: reinterpret_cast(x) = value; @@ -798,7 +838,7 @@ static bool SetInteger(int &x, int kind, int value) { reinterpret_cast(x) = value; return true; case 4: - x = value; + reinterpret_cast(x) = value; return true; case 8: reinterpret_cast(x) = value; @@ -1059,6 +1099,34 @@ void IONAME(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { } } +bool IONAME(InquireCharacter)(Cookie cookie, InquiryKeywordHash inquiry, + char *result, std::size_t length) { + IoStatementState &io{*cookie}; + return io.Inquire(inquiry, result, length); +} + +bool IONAME(InquireLogical)( + Cookie cookie, InquiryKeywordHash inquiry, bool &result) { + IoStatementState &io{*cookie}; + return io.Inquire(inquiry, result); +} + +bool IONAME(InquirePendingId)(Cookie cookie, std::int64_t id, bool &result) { + IoStatementState &io{*cookie}; + return io.Inquire(HashInquiryKeyword("PENDING"), id, result); +} + +bool IONAME(InquireInteger64)( + Cookie cookie, InquiryKeywordHash inquiry, std::int64_t &result, int kind) { + IoStatementState &io{*cookie}; + std::int64_t n; + if (io.Inquire(inquiry, n)) { + SetInteger(result, kind, n); + return true; + } + return false; +} + enum Iostat IONAME(EndIoStatement)(Cookie cookie) { IoStatementState &io{*cookie}; return static_cast(io.EndIoStatement()); diff --git a/flang/runtime/io-api.h b/flang/runtime/io-api.h index f6ebc63e3f3d2..a38152d6ec1c1 100644 --- a/flang/runtime/io-api.h +++ b/flang/runtime/io-api.h @@ -29,6 +29,26 @@ using ExternalUnit = int; using AsynchronousId = int; static constexpr ExternalUnit DefaultUnit{-1}; // READ(*), WRITE(*), PRINT +// INQUIRE specifiers are encoded as simple base-26 packings of +// the spellings of their keywords. +using InquiryKeywordHash = std::uint64_t; +constexpr InquiryKeywordHash HashInquiryKeyword(const char *p) { + InquiryKeywordHash hash{1}; + while (char ch{*p++}) { + std::uint64_t letter{0}; + if (ch >= 'a' && ch <= 'z') { + letter = ch - 'a'; + } else { + letter = ch - 'A'; + } + hash = 26 * hash + letter; + } + return hash; +} + +const char *InquiryKeywordHashDecode( + char *buffer, std::size_t, InquiryKeywordHash); + extern "C" { #define IONAME(name) RTNAME(io##name) @@ -150,7 +170,7 @@ Cookie IONAME(BeginOpenNewUnit)( // BeginInquireIoLength() is basically a no-op output statement. Cookie IONAME(BeginInquireUnit)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInquireFile)(const char *, std::size_t, int kind = 1, +Cookie IONAME(BeginInquireFile)(const char *, std::size_t, const char *sourceFile = nullptr, int sourceLine = 0); Cookie IONAME(BeginInquireIoLength)( const char *sourceFile = nullptr, int sourceLine = 0); @@ -255,10 +275,7 @@ bool IONAME(SetRecl)(Cookie, std::size_t); // RECL= // For CLOSE: STATUS=KEEP, DELETE bool IONAME(SetStatus)(Cookie, const char *, std::size_t); -// SetFile() may pass a CHARACTER argument of non-default kind, -// and such filenames are converted to UTF-8 before being -// presented to the filesystem. -bool IONAME(SetFile)(Cookie, const char *, std::size_t chars, int kind = 1); +bool IONAME(SetFile)(Cookie, const char *, std::size_t chars); // Acquires the runtime-created unit number for OPEN(NEWUNIT=) bool IONAME(GetNewUnit)(Cookie, int &, int kind = 4); @@ -275,18 +292,17 @@ void IONAME(GetIoMsg)(Cookie, char *, std::size_t); // IOMSG= // INQUIRE() specifiers are mostly identified by their NUL-terminated // case-insensitive names. -// ACCESS, ACTION, ASYNCHRONOUS, BLANK, DECIMAL, DELIM, DIRECT, ENCODING, -// FORM, FORMATTED, NAME, PAD, POSITION, READ, READWRITE, ROUND, +// ACCESS, ACTION, ASYNCHRONOUS, BLANK, CONVERT, DECIMAL, DELIM, DIRECT, +// ENCODING, FORM, FORMATTED, NAME, PAD, POSITION, READ, READWRITE, ROUND, // SEQUENTIAL, SIGN, STREAM, UNFORMATTED, WRITE: -bool IONAME(InquireCharacter)( - Cookie, const char *specifier, char *, std::size_t); +bool IONAME(InquireCharacter)(Cookie, InquiryKeywordHash, char *, std::size_t); // EXIST, NAMED, OPENED, and PENDING (without ID): -bool IONAME(InquireLogical)(Cookie, const char *specifier, bool &); +bool IONAME(InquireLogical)(Cookie, InquiryKeywordHash, bool &); // PENDING with ID bool IONAME(InquirePendingId)(Cookie, std::int64_t, bool &); // NEXTREC, NUMBER, POS, RECL, SIZE bool IONAME(InquireInteger64)( - Cookie, const char *specifier, std::int64_t &, int kind = 8); + Cookie, InquiryKeywordHash, std::int64_t &, int kind = 8); // This function must be called to end an I/O statement, and its // cookie value may not be used afterwards unless it is recycled diff --git a/flang/runtime/io-error.h b/flang/runtime/io-error.h index 8d43c40ef103c..5dd7f5e03d080 100644 --- a/flang/runtime/io-error.h +++ b/flang/runtime/io-error.h @@ -38,7 +38,7 @@ class IoErrorHandler : public Terminator { void SignalError(int iostatOrErrno, const char *msg, ...); void SignalError(int iostatOrErrno); - template void SignalError(const char *msg, X &&... xs) { + template void SignalError(const char *msg, X &&...xs) { SignalError(IostatGenericError, msg, std::forward(xs)...); } diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index a903f708bc627..8300b1ea3c27b 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -26,6 +26,37 @@ std::optional IoStatementBase::GetNextDataEdit( return std::nullopt; } +bool IoStatementBase::Inquire(InquiryKeywordHash, char *, std::size_t) { + Crash( + "IoStatementBase::Inquire() called for I/O statement other than INQUIRE"); + return false; +} + +bool IoStatementBase::Inquire(InquiryKeywordHash, bool &) { + Crash( + "IoStatementBase::Inquire() called for I/O statement other than INQUIRE"); + return false; +} + +bool IoStatementBase::Inquire(InquiryKeywordHash, std::int64_t, bool &) { + Crash( + "IoStatementBase::Inquire() called for I/O statement other than INQUIRE"); + return false; +} + +bool IoStatementBase::Inquire(InquiryKeywordHash, std::int64_t &) { + Crash( + "IoStatementBase::Inquire() called for I/O statement other than INQUIRE"); + return false; +} + +void IoStatementBase::BadInquiryKeywordHashCrash(InquiryKeywordHash inquiry) { + char buffer[16]; + const char *decode{InquiryKeywordHashDecode(buffer, sizeof buffer, inquiry)}; + Crash("bad InquiryKeywordHash 0x%x (%s)", inquiry, + decode ? decode : "(cannot decode)"); +} + template InternalIoStatementState::InternalIoStatementState( Buffer scalar, std::size_t length, const char *sourceFile, int sourceLine) @@ -151,14 +182,9 @@ int ExternalIoStatementBase::EndIoStatement() { return result; } -void OpenStatementState::set_path( - const char *path, std::size_t length, int kind) { - if (kind != 1) { // TODO - Crash("OPEN: FILE= with unimplemented: CHARACTER(KIND=%d)", kind); - } - std::size_t bytes{length * kind}; // TODO: UTF-8 encoding of Unicode path - path_ = SaveDefaultCharacter(path, bytes, *this); - pathLength_ = length; +void OpenStatementState::set_path(const char *path, std::size_t length) { + pathLength_ = TrimTrailingSpaces(path, length); + path_ = SaveDefaultCharacter(path, pathLength_, *this); } int OpenStatementState::EndIoStatement() { @@ -166,8 +192,31 @@ int OpenStatementState::EndIoStatement() { SignalError("OPEN statement for connected unit may not have STATUS= other " "than 'OLD'"); } - unit().OpenUnit(status_.value_or(OpenStatus::Unknown), action_, position_, - std::move(path_), pathLength_, convert_, *this); + if (path_.get() || wasExtant_ || + (status_ && *status_ == OpenStatus::Scratch)) { + unit().OpenUnit(status_.value_or(OpenStatus::Unknown), action_, position_, + std::move(path_), pathLength_, convert_, *this); + } else { + unit().OpenAnonymousUnit(status_.value_or(OpenStatus::Unknown), action_, + position_, convert_, *this); + } + if (access_) { + if (*access_ != unit().access) { + if (wasExtant_) { + SignalError("ACCESS= may not be changed on an open unit"); + } + } + unit().access = *access_; + } + if (!isUnformatted_) { + isUnformatted_ = unit().access != Access::Sequential; + } + if (*isUnformatted_ != unit().isUnformatted) { + if (wasExtant_) { + SignalError("FORM= may not be changed on an open unit"); + } + unit().isUnformatted = *isUnformatted_; + } return ExternalIoStatementBase::EndIoStatement(); } @@ -178,7 +227,7 @@ int CloseStatementState::EndIoStatement() { return result; } -int NoopCloseStatementState::EndIoStatement() { +int NoUnitIoStatementState::EndIoStatement() { auto result{IoStatementBase::EndIoStatement()}; FreeMemory(this); return result; @@ -454,6 +503,26 @@ bool ListDirectedStatementState::NeedAdvance( width > connection.RemainingSpaceInRecord(); } +bool IoStatementState::Inquire( + InquiryKeywordHash inquiry, char *out, std::size_t chars) { + return std::visit( + [&](auto &x) { return x.get().Inquire(inquiry, out, chars); }, u_); +} + +bool IoStatementState::Inquire(InquiryKeywordHash inquiry, bool &out) { + return std::visit([&](auto &x) { return x.get().Inquire(inquiry, out); }, u_); +} + +bool IoStatementState::Inquire( + InquiryKeywordHash inquiry, std::int64_t id, bool &out) { + return std::visit( + [&](auto &x) { return x.get().Inquire(inquiry, id, out); }, u_); +} + +bool IoStatementState::Inquire(InquiryKeywordHash inquiry, std::int64_t &n) { + return std::visit([&](auto &x) { return x.get().Inquire(inquiry, n); }, u_); +} + bool ListDirectedStatementState::EmitLeadingSpaceOrAdvance( IoStatementState &io, std::size_t length, bool isCharacter) { if (length == 0) { @@ -678,4 +747,419 @@ int ExternalMiscIoStatementState::EndIoStatement() { return ExternalIoStatementBase::EndIoStatement(); } +InquireUnitState::InquireUnitState( + ExternalFileUnit &unit, const char *sourceFile, int sourceLine) + : ExternalIoStatementBase{unit, sourceFile, sourceLine} {} + +bool InquireUnitState::Inquire( + InquiryKeywordHash inquiry, char *result, std::size_t length) { + const char *str{nullptr}; + switch (inquiry) { + case HashInquiryKeyword("ACCESS"): + switch (unit().access) { + case Access::Sequential: + str = "SEQUENTIAL"; + break; + case Access::Direct: + str = "DIRECT"; + break; + case Access::Stream: + str = "STREAM"; + break; + } + break; + case HashInquiryKeyword("ACTION"): + str = unit().mayWrite() ? unit().mayRead() ? "READWRITE" : "WRITE" : "READ"; + break; + case HashInquiryKeyword("ASYNCHRONOUS"): + str = unit().mayAsynchronous() ? "YES" : "NO"; + break; + case HashInquiryKeyword("BLANK"): + str = unit().isUnformatted ? "UNDEFINED" + : unit().modes.editingFlags & blankZero ? "ZERO" + : "NULL"; + break; + case HashInquiryKeyword("CONVERT"): + str = unit().swapEndianness() ? "SWAP" : "NATIVE"; + break; + case HashInquiryKeyword("DECIMAL"): + str = unit().isUnformatted ? "UNDEFINED" + : unit().modes.editingFlags & decimalComma ? "COMMA" + : "POINT"; + break; + case HashInquiryKeyword("DELIM"): + if (unit().isUnformatted) { + str = "UNDEFINED"; + } else { + switch (unit().modes.delim) { + case '\'': + str = "APOSTROPHE"; + break; + case '"': + str = "QUOTE"; + break; + default: + str = "NONE"; + break; + } + } + break; + case HashInquiryKeyword("DIRECT"): + str = unit().mayPosition() ? "YES" : "NO"; + break; + case HashInquiryKeyword("ENCODING"): + str = unit().isUnformatted ? "UNDEFINED" + : unit().isUTF8 ? "UTF-8" + : "ASCII"; + break; + case HashInquiryKeyword("FORM"): + str = unit().isUnformatted ? "UNFORMATTED" : "FORMATTED"; + break; + case HashInquiryKeyword("FORMATTED"): + str = "YES"; + break; + case HashInquiryKeyword("NAME"): + str = unit().path(); + if (!str) { + return true; // result is undefined + } + break; + case HashInquiryKeyword("PAD"): + str = unit().isUnformatted ? "UNDEFINED" : unit().modes.pad ? "YES" : "NO"; + break; + case HashInquiryKeyword("POSITION"): + if (unit().access == Access::Direct) { + str = "UNDEFINED"; + } else { + auto size{unit().knownSize()}; + auto pos{unit().position()}; + if (pos == size.value_or(pos + 1)) { + str = "APPEND"; + } else if (pos == 0) { + str = "REWIND"; + } else { + str = "ASIS"; // processor-dependent & no common behavior + } + } + break; + case HashInquiryKeyword("READ"): + str = unit().mayRead() ? "YES" : "NO"; + break; + case HashInquiryKeyword("READWRITE"): + str = unit().mayRead() && unit().mayWrite() ? "YES" : "NO"; + break; + case HashInquiryKeyword("ROUND"): + if (unit().isUnformatted) { + str = "UNDEFINED"; + } else { + switch (unit().modes.round) { + case decimal::FortranRounding::RoundNearest: + str = "NEAREST"; + break; + case decimal::FortranRounding::RoundUp: + str = "UP"; + break; + case decimal::FortranRounding::RoundDown: + str = "DOWN"; + break; + case decimal::FortranRounding::RoundToZero: + str = "ZERO"; + break; + case decimal::FortranRounding::RoundCompatible: + str = "COMPATIBLE"; + break; + } + } + break; + case HashInquiryKeyword("SEQUENTIAL"): + str = "YES"; + break; + case HashInquiryKeyword("SIGN"): + str = unit().isUnformatted ? "UNDEFINED" + : unit().modes.editingFlags & signPlus ? "PLUS" + : "SUPPRESS"; + break; + case HashInquiryKeyword("STREAM"): + str = "YES"; + break; + case HashInquiryKeyword("WRITE"): + str = unit().mayWrite() ? "YES" : "NO"; + break; + case HashInquiryKeyword("UNFORMATTED"): + str = "YES"; + break; + } + if (str) { + ToFortranDefaultCharacter(result, length, str); + return true; + } else { + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +bool InquireUnitState::Inquire(InquiryKeywordHash inquiry, bool &result) { + switch (inquiry) { + case HashInquiryKeyword("EXIST"): + result = true; + return true; + case HashInquiryKeyword("NAMED"): + result = unit().path() != nullptr; + return true; + case HashInquiryKeyword("OPENED"): + result = true; + return true; + case HashInquiryKeyword("PENDING"): + result = false; // asynchronous I/O is not implemented + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +bool InquireUnitState::Inquire( + InquiryKeywordHash inquiry, std::int64_t, bool &result) { + switch (inquiry) { + case HashInquiryKeyword("PENDING"): + result = false; // asynchronous I/O is not implemented + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +bool InquireUnitState::Inquire( + InquiryKeywordHash inquiry, std::int64_t &result) { + switch (inquiry) { + case HashInquiryKeyword("NEXTREC"): + if (unit().access == Access::Direct) { + result = unit().currentRecordNumber; + } + return true; + case HashInquiryKeyword("NUMBER"): + result = unit().unitNumber(); + return true; + case HashInquiryKeyword("POS"): + result = unit().position(); + return true; + case HashInquiryKeyword("RECL"): + if (unit().access == Access::Stream) { + result = -2; + } else if (unit().isFixedRecordLength && unit().recordLength) { + result = *unit().recordLength; + } else { + result = std::numeric_limits::max(); + } + return true; + case HashInquiryKeyword("SIZE"): + if (auto size{unit().knownSize()}) { + result = *size; + } else { + result = -1; + } + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +InquireNoUnitState::InquireNoUnitState(const char *sourceFile, int sourceLine) + : NoUnitIoStatementState{sourceFile, sourceLine, *this} {} + +bool InquireNoUnitState::Inquire( + InquiryKeywordHash inquiry, char *result, std::size_t length) { + switch (inquiry) { + case HashInquiryKeyword("ACCESS"): + case HashInquiryKeyword("ACTION"): + case HashInquiryKeyword("ASYNCHRONOUS"): + case HashInquiryKeyword("BLANK"): + case HashInquiryKeyword("CONVERT"): + case HashInquiryKeyword("DECIMAL"): + case HashInquiryKeyword("DELIM"): + case HashInquiryKeyword("FORM"): + case HashInquiryKeyword("NAME"): + case HashInquiryKeyword("PAD"): + case HashInquiryKeyword("POSITION"): + case HashInquiryKeyword("ROUND"): + case HashInquiryKeyword("SIGN"): + ToFortranDefaultCharacter(result, length, "UNDEFINED"); + return true; + case HashInquiryKeyword("DIRECT"): + case HashInquiryKeyword("ENCODING"): + case HashInquiryKeyword("FORMATTED"): + case HashInquiryKeyword("READ"): + case HashInquiryKeyword("READWRITE"): + case HashInquiryKeyword("SEQUENTIAL"): + case HashInquiryKeyword("STREAM"): + case HashInquiryKeyword("WRITE"): + case HashInquiryKeyword("UNFORMATTED"): + ToFortranDefaultCharacter(result, length, "UNKNONN"); + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +bool InquireNoUnitState::Inquire(InquiryKeywordHash inquiry, bool &result) { + switch (inquiry) { + case HashInquiryKeyword("EXIST"): + result = true; + return true; + case HashInquiryKeyword("NAMED"): + case HashInquiryKeyword("OPENED"): + case HashInquiryKeyword("PENDING"): + result = false; + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +bool InquireNoUnitState::Inquire( + InquiryKeywordHash inquiry, std::int64_t, bool &result) { + switch (inquiry) { + case HashInquiryKeyword("PENDING"): + result = false; + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +bool InquireNoUnitState::Inquire( + InquiryKeywordHash inquiry, std::int64_t &result) { + switch (inquiry) { + case HashInquiryKeyword("NEXTREC"): + case HashInquiryKeyword("NUMBER"): + case HashInquiryKeyword("POS"): + case HashInquiryKeyword("RECL"): + case HashInquiryKeyword("SIZE"): + result = -1; + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +InquireUnconnectedFileState::InquireUnconnectedFileState( + OwningPtr &&path, const char *sourceFile, int sourceLine) + : NoUnitIoStatementState{sourceFile, sourceLine, *this}, path_{std::move( + path)} {} + +bool InquireUnconnectedFileState::Inquire( + InquiryKeywordHash inquiry, char *result, std::size_t length) { + const char *str{nullptr}; + switch (inquiry) { + case HashInquiryKeyword("ACCESS"): + case HashInquiryKeyword("ACTION"): + case HashInquiryKeyword("ASYNCHRONOUS"): + case HashInquiryKeyword("BLANK"): + case HashInquiryKeyword("CONVERT"): + case HashInquiryKeyword("DECIMAL"): + case HashInquiryKeyword("DELIM"): + case HashInquiryKeyword("FORM"): + case HashInquiryKeyword("PAD"): + case HashInquiryKeyword("POSITION"): + case HashInquiryKeyword("ROUND"): + case HashInquiryKeyword("SIGN"): + str = "UNDEFINED"; + break; + case HashInquiryKeyword("DIRECT"): + case HashInquiryKeyword("ENCODING"): + str = "UNKNONN"; + break; + case HashInquiryKeyword("READ"): + str = MayRead(path_.get()) ? "YES" : "NO"; + break; + case HashInquiryKeyword("READWRITE"): + str = MayReadAndWrite(path_.get()) ? "YES" : "NO"; + break; + case HashInquiryKeyword("WRITE"): + str = MayWrite(path_.get()) ? "YES" : "NO"; + break; + case HashInquiryKeyword("FORMATTED"): + case HashInquiryKeyword("SEQUENTIAL"): + case HashInquiryKeyword("STREAM"): + case HashInquiryKeyword("UNFORMATTED"): + str = "YES"; + break; + case HashInquiryKeyword("NAME"): + str = path_.get(); + return true; + } + if (str) { + ToFortranDefaultCharacter(result, length, str); + return true; + } else { + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +bool InquireUnconnectedFileState::Inquire( + InquiryKeywordHash inquiry, bool &result) { + switch (inquiry) { + case HashInquiryKeyword("EXIST"): + result = IsExtant(path_.get()); + return true; + case HashInquiryKeyword("NAMED"): + result = true; + return true; + case HashInquiryKeyword("OPENED"): + result = false; + return true; + case HashInquiryKeyword("PENDING"): + result = false; + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +bool InquireUnconnectedFileState::Inquire( + InquiryKeywordHash inquiry, std::int64_t, bool &result) { + switch (inquiry) { + case HashInquiryKeyword("PENDING"): + result = false; + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +bool InquireUnconnectedFileState::Inquire( + InquiryKeywordHash inquiry, std::int64_t &result) { + switch (inquiry) { + case HashInquiryKeyword("NEXTREC"): + case HashInquiryKeyword("NUMBER"): + case HashInquiryKeyword("POS"): + case HashInquiryKeyword("RECL"): + case HashInquiryKeyword("SIZE"): + result = -1; + return true; + default: + BadInquiryKeywordHashCrash(inquiry); + return false; + } +} + +InquireIOLengthState::InquireIOLengthState( + const char *sourceFile, int sourceLine) + : NoUnitIoStatementState{sourceFile, sourceLine, *this} {} + +bool InquireIOLengthState::Emit( + const char *, std::size_t n, std::size_t /*elementBytes*/) { + bytes_ += n; + return true; +} + } // namespace Fortran::runtime::io diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index ddc264aea3605..9e68deab2e641 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -16,6 +16,7 @@ #include "file.h" #include "format.h" #include "internal-unit.h" +#include "io-api.h" #include "io-error.h" #include #include @@ -26,6 +27,11 @@ namespace Fortran::runtime::io { class ExternalFileUnit; class OpenStatementState; +class InquireUnitState; +class InquireNoUnitState; +class InquireUnconnectedFileState; +class InquireIOLengthState; +class ExternalMiscIoStatementState; class CloseStatementState; class NoopCloseStatementState; @@ -36,7 +42,6 @@ template class ExternalFormattedIoStatementState; template class ExternalListIoStatementState; template class UnformattedIoStatementState; -class ExternalMiscIoStatementState; // The Cookie type in the I/O API is a pointer (for C) to this class. class IoStatementState { @@ -60,6 +65,10 @@ class IoStatementState { ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit MutableModes &mutableModes(); void BeginReadingRecord(); + bool Inquire(InquiryKeywordHash, char *, std::size_t); + bool Inquire(InquiryKeywordHash, bool &); + bool Inquire(InquiryKeywordHash, std::int64_t, bool &); // PENDING= + bool Inquire(InquiryKeywordHash, std::int64_t &); // N.B.: this also works with base classes template A *get_if() const { @@ -98,6 +107,10 @@ class IoStatementState { std::reference_wrapper>, std::reference_wrapper>, std::reference_wrapper>, + std::reference_wrapper, + std::reference_wrapper, + std::reference_wrapper, + std::reference_wrapper, std::reference_wrapper> u_; }; @@ -110,6 +123,12 @@ struct IoStatementBase : public DefaultFormatControlCallbacks { std::optional GetNextDataEdit(IoStatementState &, int = 1); ExternalFileUnit *GetExternalFileUnit() const { return nullptr; } void BeginReadingRecord() {} + + bool Inquire(InquiryKeywordHash, char *, std::size_t); + bool Inquire(InquiryKeywordHash, bool &); + bool Inquire(InquiryKeywordHash, std::int64_t, bool &); + bool Inquire(InquiryKeywordHash, std::int64_t &); + void BadInquiryKeywordHashCrash(InquiryKeywordHash); }; struct InputStatementState {}; @@ -303,10 +322,12 @@ class OpenStatementState : public ExternalIoStatementBase { wasExtant} {} bool wasExtant() const { return wasExtant_; } void set_status(OpenStatus status) { status_ = status; } // STATUS= - void set_path(const char *, std::size_t, int kind); // FILE= + void set_path(const char *, std::size_t); // FILE= void set_position(Position position) { position_ = position; } // POSITION= void set_action(Action action) { action_ = action; } // ACTION= void set_convert(Convert convert) { convert_ = convert; } // CONVERT= + void set_access(Access access) { access_ = access; } // ACCESS= + void set_isUnformatted(bool yes = true) { isUnformatted_ = yes; } // FORM= int EndIoStatement(); private: @@ -317,6 +338,8 @@ class OpenStatementState : public ExternalIoStatementBase { Convert convert_{Convert::Native}; OwningPtr path_; std::size_t pathLength_; + std::optional isUnformatted_; + std::optional access_; }; class CloseStatementState : public ExternalIoStatementBase { @@ -331,21 +354,31 @@ class CloseStatementState : public ExternalIoStatementBase { CloseStatus status_{CloseStatus::Keep}; }; -class NoopCloseStatementState : public IoStatementBase { +// For CLOSE(bad unit) and INQUIRE(unconnected unit) +class NoUnitIoStatementState : public IoStatementBase { public: - NoopCloseStatementState(const char *sourceFile, int sourceLine) - : IoStatementBase{sourceFile, sourceLine}, ioStatementState_{*this} {} IoStatementState &ioStatementState() { return ioStatementState_; } - void set_status(CloseStatus) {} // discards MutableModes &mutableModes() { return connection_.modes; } ConnectionState &GetConnectionState() { return connection_; } int EndIoStatement(); +protected: + template + NoUnitIoStatementState(const char *sourceFile, int sourceLine, A &stmt) + : IoStatementBase{sourceFile, sourceLine}, ioStatementState_{stmt} {} + private: IoStatementState ioStatementState_; // points to *this ConnectionState connection_; }; +class NoopCloseStatementState : public NoUnitIoStatementState { +public: + NoopCloseStatementState(const char *sourceFile, int sourceLine) + : NoUnitIoStatementState{sourceFile, sourceLine, *this} {} + void set_status(CloseStatus) {} // discards +}; + extern template class InternalIoStatementState; extern template class InternalIoStatementState; extern template class InternalFormattedIoStatementState; @@ -369,6 +402,49 @@ extern template class FormatControl< extern template class FormatControl< ExternalFormattedIoStatementState>; +class InquireUnitState : public ExternalIoStatementBase { +public: + InquireUnitState(ExternalFileUnit &unit, const char *sourceFile = nullptr, + int sourceLine = 0); + bool Inquire(InquiryKeywordHash, char *, std::size_t); + bool Inquire(InquiryKeywordHash, bool &); + bool Inquire(InquiryKeywordHash, std::int64_t, bool &); + bool Inquire(InquiryKeywordHash, std::int64_t &); +}; + +class InquireNoUnitState : public NoUnitIoStatementState { +public: + InquireNoUnitState(const char *sourceFile = nullptr, int sourceLine = 0); + bool Inquire(InquiryKeywordHash, char *, std::size_t); + bool Inquire(InquiryKeywordHash, bool &); + bool Inquire(InquiryKeywordHash, std::int64_t, bool &); + bool Inquire(InquiryKeywordHash, std::int64_t &); +}; + +class InquireUnconnectedFileState : public NoUnitIoStatementState { +public: + InquireUnconnectedFileState(OwningPtr &&path, + const char *sourceFile = nullptr, int sourceLine = 0); + bool Inquire(InquiryKeywordHash, char *, std::size_t); + bool Inquire(InquiryKeywordHash, bool &); + bool Inquire(InquiryKeywordHash, std::int64_t, bool &); + bool Inquire(InquiryKeywordHash, std::int64_t &); + +private: + OwningPtr path_; // trimmed and NUL terminated +}; + +class InquireIOLengthState : public NoUnitIoStatementState, + public OutputStatementState { +public: + InquireIOLengthState(const char *sourceFile = nullptr, int sourceLine = 0); + std::size_t bytes() const { return bytes_; } + bool Emit(const char *, std::size_t, std::size_t elementBytes = 0); + +private: + std::size_t bytes_{0}; +}; + class ExternalMiscIoStatementState : public ExternalIoStatementBase { public: enum Which { Flush, Backspace, Endfile, Rewind }; diff --git a/flang/runtime/memory.h b/flang/runtime/memory.h index f21b237f3905b..4b09fe80772ed 100644 --- a/flang/runtime/memory.h +++ b/flang/runtime/memory.h @@ -42,7 +42,7 @@ template class SizedNew { public: explicit SizedNew(const Terminator &terminator) : terminator_{terminator} {} template - [[nodiscard]] OwningPtr operator()(std::size_t bytes, X &&... x) { + [[nodiscard]] OwningPtr operator()(std::size_t bytes, X &&...x) { return OwningPtr{new (AllocateMemoryOrCrash(terminator_, bytes)) A{std::forward(x)...}}; } @@ -53,7 +53,7 @@ template class SizedNew { template struct New : public SizedNew { using SizedNew::SizedNew; - template [[nodiscard]] OwningPtr operator()(X &&... x) { + template [[nodiscard]] OwningPtr operator()(X &&...x) { return SizedNew::operator()(sizeof(A), std::forward(x)...); } }; diff --git a/flang/runtime/tools.cpp b/flang/runtime/tools.cpp index ea9ad9063344b..219daaf2880b1 100644 --- a/flang/runtime/tools.cpp +++ b/flang/runtime/tools.cpp @@ -12,6 +12,13 @@ namespace Fortran::runtime { +std::size_t TrimTrailingSpaces(const char *s, std::size_t n) { + while (n > 0 && s[n - 1] == ' ') { + --n; + } + return n; +} + OwningPtr SaveDefaultCharacter( const char *s, std::size_t length, const Terminator &terminator) { if (s) { diff --git a/flang/runtime/tools.h b/flang/runtime/tools.h index fad19f607c683..6c5eb63cc8c11 100644 --- a/flang/runtime/tools.h +++ b/flang/runtime/tools.h @@ -18,6 +18,8 @@ namespace Fortran::runtime { class Terminator; +std::size_t TrimTrailingSpaces(const char *, std::size_t); + OwningPtr SaveDefaultCharacter( const char *, std::size_t, const Terminator &); diff --git a/flang/runtime/unit-map.cpp b/flang/runtime/unit-map.cpp index 905beb4d084fa..1cd2115f4aa1b 100644 --- a/flang/runtime/unit-map.cpp +++ b/flang/runtime/unit-map.cpp @@ -72,6 +72,20 @@ void UnitMap::FlushAll(IoErrorHandler &handler) { } } +ExternalFileUnit *UnitMap::Find(const char *path) { + if (path) { + // TODO: Faster data structure + for (int j{0}; j < buckets_; ++j) { + for (Chain *p{bucket_[j].get()}; p; p = p->next.get()) { + if (p->unit.path() && std::strcmp(p->unit.path(), path) == 0) { + return &p->unit; + } + } + } + } + return nullptr; +} + ExternalFileUnit &UnitMap::Create(int n, const Terminator &terminator) { Chain &chain{*New{terminator}(n).release()}; chain.next.reset(&chain); diff --git a/flang/runtime/unit-map.h b/flang/runtime/unit-map.h index be244f5ae463a..961962a2d635c 100644 --- a/flang/runtime/unit-map.h +++ b/flang/runtime/unit-map.h @@ -34,6 +34,12 @@ class UnitMap { return p ? *p : Create(n, terminator); } + // Unit look-up by name is needed for INQUIRE(FILE="...") + ExternalFileUnit *LookUp(const char *path) { + CriticalSection critical{lock_}; + return Find(path); + } + ExternalFileUnit &NewUnit(const Terminator &terminator) { CriticalSection critical{lock_}; return Create(nextNewUnit_--, terminator); @@ -72,6 +78,7 @@ class UnitMap { } return nullptr; } + ExternalFileUnit *Find(const char *path); ExternalFileUnit &Create(int, const Terminator &); diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index a4c69df8d6a9a..be36666f66e46 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -59,20 +59,19 @@ ExternalFileUnit &ExternalFileUnit::LookUpOrCreateAnonymous( ExternalFileUnit &result{ GetUnitMap().LookUpOrCreate(unit, terminator, exists)}; if (!exists) { - // I/O to an unconnected unit reads/creates a local file, e.g. fort.7 - std::size_t pathMaxLen{32}; - auto path{SizedNew{terminator}(pathMaxLen)}; - std::snprintf(path.get(), pathMaxLen, "fort.%d", unit); IoErrorHandler handler{terminator}; - result.OpenUnit( - dir == Direction::Input ? OpenStatus::Old : OpenStatus::Replace, - Action::ReadWrite, Position::Rewind, std::move(path), - std::strlen(path.get()), Convert::Native, handler); + result.OpenAnonymousUnit( + dir == Direction::Input ? OpenStatus::Unknown : OpenStatus::Replace, + Action::ReadWrite, Position::Rewind, Convert::Native, handler); result.isUnformatted = isUnformatted; } return result; } +ExternalFileUnit *ExternalFileUnit::LookUp(const char *path) { + return GetUnitMap().LookUp(path); +} + ExternalFileUnit &ExternalFileUnit::CreateNew( int unit, const Terminator &terminator) { bool wasExtant{false}; @@ -125,10 +124,7 @@ void ExternalFileUnit::OpenUnit(OpenStatus status, std::optional action, handler.SignalError(IostatOpenBadRecl, "OPEN(UNIT=%d,ACCESS='DIRECT',RECL=%jd): record length is invalid", unitNumber(), static_cast(*recordLength)); - } else if (!totalBytes) { - handler.SignalError(IostatOpenUnknownSize, - "OPEN(UNIT=%d,ACCESS='DIRECT'): file size is not known"); - } else if (*totalBytes % *recordLength != 0) { + } else if (totalBytes && (*totalBytes % *recordLength != 0)) { handler.SignalError(IostatOpenBadAppend, "OPEN(UNIT=%d,ACCESS='DIRECT',RECL=%jd): record length is not an " "even divisor of the file size %jd", @@ -137,7 +133,7 @@ void ExternalFileUnit::OpenUnit(OpenStatus status, std::optional action, } } if (position == Position::Append) { - if (*totalBytes && recordLength && *recordLength) { + if (totalBytes && recordLength && *recordLength) { endfileRecordNumber = 1 + (*totalBytes / *recordLength); } else { // Fake it so that we can backspace relative from the end @@ -149,6 +145,17 @@ void ExternalFileUnit::OpenUnit(OpenStatus status, std::optional action, } } +void ExternalFileUnit::OpenAnonymousUnit(OpenStatus status, + std::optional action, Position position, Convert convert, + IoErrorHandler &handler) { + // I/O to an unconnected unit reads/creates a local file, e.g. fort.7 + std::size_t pathMaxLen{32}; + auto path{SizedNew{handler}(pathMaxLen)}; + std::snprintf(path.get(), pathMaxLen, "fort.%d", unitNumber_); + OpenUnit(status, action, position, std::move(path), std::strlen(path.get()), + convert, handler); +} + void ExternalFileUnit::CloseUnit(CloseStatus status, IoErrorHandler &handler) { DoImpliedEndfile(handler); Flush(handler); diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h index f94e4229cd4c8..9d66d962bc56d 100644 --- a/flang/runtime/unit.h +++ b/flang/runtime/unit.h @@ -35,6 +35,7 @@ class ExternalFileUnit : public ConnectionState, public: explicit ExternalFileUnit(int unitNumber) : unitNumber_{unitNumber} {} int unitNumber() const { return unitNumber_; } + bool swapEndianness() const { return swapEndianness_; } static ExternalFileUnit *LookUp(int unit); static ExternalFileUnit &LookUpOrCrash(int unit, const Terminator &); @@ -42,6 +43,7 @@ class ExternalFileUnit : public ConnectionState, int unit, const Terminator &, bool &wasExtant); static ExternalFileUnit &LookUpOrCreateAnonymous( int unit, Direction, bool isUnformatted, const Terminator &); + static ExternalFileUnit *LookUp(const char *path); static ExternalFileUnit &CreateNew(int unit, const Terminator &); static ExternalFileUnit *LookUpForClose(int unit); static int NewUnit(const Terminator &); @@ -51,13 +53,15 @@ class ExternalFileUnit : public ConnectionState, void OpenUnit(OpenStatus, std::optional, Position, OwningPtr &&path, std::size_t pathLength, Convert, IoErrorHandler &); + void OpenAnonymousUnit( + OpenStatus, std::optional, Position, Convert, IoErrorHandler &); void CloseUnit(CloseStatus, IoErrorHandler &); void DestroyClosed(); bool SetDirection(Direction, IoErrorHandler &); template - IoStatementState &BeginIoStatement(X &&... xs) { + IoStatementState &BeginIoStatement(X &&...xs) { // TODO: Child data transfer statements vs. locking lock_.Take(); // dropped in EndIoStatement() A &state{u_.emplace(std::forward(xs)...)}; @@ -111,7 +115,7 @@ class ExternalFileUnit : public ConnectionState, ExternalListIoStatementState, ExternalListIoStatementState, UnformattedIoStatementState, - UnformattedIoStatementState, + UnformattedIoStatementState, InquireUnitState, ExternalMiscIoStatementState> u_; From ba955397ac44e5df6135469d76c645fdcac256da Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Mon, 3 Aug 2020 08:37:52 -0400 Subject: [PATCH 256/600] [SCEVExpander][PowerPC]clear scev rewriter before deleting instructions. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D85130 --- llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index a7546d2be5d83..0068ad7174ee7 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -606,6 +606,10 @@ bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, NewBasePtr = NewPHI; } + // Clear the rewriter cache, because values that are in the rewriter's cache + // can be deleted below, causing the AssertingVH in the cache to trigger. + SCEVE.clear(); + if (Instruction *IDel = dyn_cast(BasePtr)) BBChanged.insert(IDel->getParent()); BasePtr->replaceAllUsesWith(NewBasePtr); From a06c28df3e8c85ceb665d3d9a1ebc2853dfd87a9 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Mon, 3 Aug 2020 18:37:50 -0700 Subject: [PATCH 257/600] Temporarily revert "[test] Exit with an error if no tests are run." This reverts commit adb5c23f8c0d60eeec41dcbe21d1b26184e1c97d. It surprisingly fails on a windows build bot: http://lab.llvm.org:8011/builders/lldb-x64-windows-ninja/builds/18009 Will reland after some investigation and/or after adding some extra logging to help debug the issue. --- lldb/packages/Python/lldbsuite/test/dotest.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 6607f52c49dbd..3fb802f1c1aa5 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -1039,10 +1039,6 @@ def run_suite(): (configuration.suite.countTestCases(), configuration.suite.countTestCases() != 1 and "s" or "")) - if configuration.suite.countTestCases() == 0: - logging.error("did not discover any matching tests") - exitTestSuite(1) - # Invoke the test runner. if configuration.count == 1: result = unittest2.TextTestRunner( From d6a5cce0e7d65562f081569a61595e53cdb8d5d0 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 3 Aug 2020 19:56:10 -0700 Subject: [PATCH 258/600] [lldb/Test] Fix skipTestIfFn for fucntions that return a value Sometimes the decorator is used on a common function rather than the test method, which can return a value. This fails with decorators that use skipTestIfFn under the hood. --- lldb/packages/Python/lldbsuite/test/decorators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 8c8f2509a8639..873952e4c91ab 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -132,7 +132,7 @@ def wrapper(*args, **kwargs): if reason is not None: self.skipTest(reason) else: - func(*args, **kwargs) + return func(*args, **kwargs) return wrapper # Some decorators can be called both with no arguments (e.g. @expectedFailureWindows) From 7647c2716e383c091b7063e150d48d5821bcaa67 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 4 Aug 2020 10:16:40 +0700 Subject: [PATCH 259/600] [SimpleLoopUnswitch][NFC] Add option to always drop make.implicit metadata in non-trivial unswitching and save compile time We might want this if we find out that using of MustExecute analysis is too expensive. By default we do the analysis because its complexity does not exceed the complexity of whole loop copying in unswitching. Follow-up for D84925. Differential Revision: https://reviews.llvm.org/D85001 Reviewed By: asbirlea --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index b1cbc714ec8e1..ab1945a1aff11 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -94,6 +94,11 @@ static cl::opt UnswitchGuards( "simple-loop-unswitch-guards", cl::init(true), cl::Hidden, cl::desc("If enabled, simple loop unswitching will also consider " "llvm.experimental.guard intrinsics as unswitch candidates.")); +static cl::opt DropNonTrivialImplicitNullChecks( + "simple-loop-unswitch-drop-non-trivial-implicit-null-checks", + cl::init(false), cl::Hidden, + cl::desc("If enabled, drop make.implicit metadata in unswitched implicit " + "null checks to save time analyzing if we can keep it.")); /// Collect all of the loop invariant input values transitively used by the /// homogeneous instruction graph from a given root. @@ -2074,12 +2079,18 @@ static void unswitchNontrivialInvariants( // Drop metadata if we may break its semantics by moving this instr into the // split block. if (TI.getMetadata(LLVMContext::MD_make_implicit)) { - // It is only legal to preserve make.implicit metadata if we are guaranteed - // to reach implicit null check block after following this branch. - ICFLoopSafetyInfo SafetyInfo; - SafetyInfo.computeLoopSafetyInfo(&L); - if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) + if (DropNonTrivialImplicitNullChecks) + // Do not spend time trying to understand if we can keep it, just drop it + // to save compile time. TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); + else { + // It is only legal to preserve make.implicit metadata if we are + // guaranteed no reach implicit null check after following this branch. + ICFLoopSafetyInfo SafetyInfo; + SafetyInfo.computeLoopSafetyInfo(&L); + if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) + TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); + } } // The stitching of the branched code back together depends on whether we're From e56626e43826c9d7c35113635d62b57c905ef3c0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 3 Aug 2020 20:35:50 -0700 Subject: [PATCH 260/600] [PGO] Move __profc_ and __profvp_ from their own comdat groups to __profd_'s comdat group D68041 placed `__profc_`, `__profd_` and (if exists) `__profvp_` in different comdat groups. There are some issues: * Cost: one or two additional section headers (`.group` section(s)): 64 or 128 bytes on ELF64. * `__profc_`, `__profd_` and (if exists) `__profvp_` should be retained or discarded. Placing them into separate comdat groups is conceptually inferior. * If the prevailing group does not include `__profvp_` (value profiling not used) but a non-prevailing group from another translation unit has `__profvp_` (the function is inlined into another and triggers value profiling), there will be a stray `__profvp_` if --gc-sections is not enabled. This has been fixed by 3d6f53018f845e893ad34f64ff2851a2e5c3ba1d. Actually, we can reuse an existing symbol (we choose `__profd_`) as the group signature to avoid a string in the string table (the sole reason that D68041 could improve code size is that `__profv_` was an otherwise unused symbol which wasted string table space). This saves one or two section headers. For a -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_INSTRUMENTED=IR build, `ninja clang lld`, the patch has saved 10.5MiB (2.2%) for the total .o size. Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D84723 --- .../Transforms/Instrumentation/InstrProfiling.cpp | 9 +++++---- .../Instrumentation/InstrProfiling/PR23499.ll | 4 ++-- .../test/Instrumentation/InstrProfiling/comdat.ll | 8 ++++---- llvm/test/Instrumentation/InstrProfiling/icall.ll | 15 ++++++++++++++- .../Instrumentation/InstrProfiling/linkage.ll | 4 ++-- .../test/Transforms/PGOProfile/comdat_internal.ll | 3 ++- 6 files changed, 29 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 75988893fdb85..623f463293cd9 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -882,9 +882,10 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { Visibility = GlobalValue::HiddenVisibility; } } + std::string DataVarName = getVarName(Inc, getInstrProfDataVarPrefix()); auto MaybeSetComdat = [=](GlobalVariable *GV) { if (NeedComdat) - GV->setComdat(M->getOrInsertComdat(GV->getName())); + GV->setComdat(M->getOrInsertComdat(DataVarName)); }; uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); @@ -949,9 +950,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, #include "llvm/ProfileData/InstrProfData.inc" }; - auto *Data = new GlobalVariable(*M, DataTy, false, Linkage, - ConstantStruct::get(DataTy, DataVals), - getVarName(Inc, getInstrProfDataVarPrefix())); + auto *Data = + new GlobalVariable(*M, DataTy, false, Linkage, + ConstantStruct::get(DataTy, DataVals), DataVarName); Data->setVisibility(Visibility); Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat())); Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); diff --git a/llvm/test/Instrumentation/InstrProfiling/PR23499.ll b/llvm/test/Instrumentation/InstrProfiling/PR23499.ll index 1a4c04947a2b1..098153f39e1db 100644 --- a/llvm/test/Instrumentation/InstrProfiling/PR23499.ll +++ b/llvm/test/Instrumentation/InstrProfiling/PR23499.ll @@ -14,13 +14,13 @@ $_Z3barIvEvv = comdat any @__profn__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", align 1 ; CHECK-NOT: __profn__Z3barIvEvv -; CHECK: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat, align 8 +; CHECK: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($__profd__Z3barIvEvv), align 8 ; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data{{.*}}", comdat, align 8 ; CHECK: @__llvm_prf_nm = private constant [{{.*}} x i8] c"{{.*}}", section "{{.*}}__llvm_prf_names" ; COFF-NOT: __profn__Z3barIvEvv -; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}prfc$M", comdat, align 8 +; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}prfc$M", comdat($__profd__Z3barIvEvv), align 8 ; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}prfd{{.*}}", comdat, align 8 diff --git a/llvm/test/Instrumentation/InstrProfiling/comdat.ll b/llvm/test/Instrumentation/InstrProfiling/comdat.ll index 3f169c4b73423..dfcd71fd06291 100644 --- a/llvm/test/Instrumentation/InstrProfiling/comdat.ll +++ b/llvm/test/Instrumentation/InstrProfiling/comdat.ll @@ -15,9 +15,9 @@ $foo_inline = comdat any @__profn_foo_inline = linkonce_odr hidden constant [10 x i8] c"foo_inline" -; ELF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat, align 8 +; ELF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat($__profd_foo_inline), align 8 ; ELF: @__profd_foo_inline = linkonce_odr hidden global{{.*}}, section "__llvm_prf_data", comdat, align 8 -; COFF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat, align 8 +; COFF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat($__profd_foo_inline), align 8 ; COFF: @__profd_foo_inline = linkonce_odr hidden global{{.*}}, section ".lprfd$M", comdat, align 8 define weak_odr void @foo_inline() comdat { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_inline, i32 0, i32 0), i64 0, i32 1, i32 0) @@ -28,9 +28,9 @@ $foo_extern = comdat any @__profn_foo_extern = linkonce_odr hidden constant [10 x i8] c"foo_extern" -; ELF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat, align 8 +; ELF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat($__profd_foo_extern), align 8 ; ELF: @__profd_foo_extern = linkonce_odr hidden global{{.*}}, section "__llvm_prf_data", comdat, align 8 -; COFF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat, align 8 +; COFF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat($__profd_foo_extern), align 8 ; COFF: @__profd_foo_extern = linkonce_odr hidden global{{.*}}, section ".lprfd$M", comdat, align 8 define available_externally void @foo_extern() { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_extern, i32 0, i32 0), i64 0, i32 1, i32 0) diff --git a/llvm/test/Instrumentation/InstrProfiling/icall.ll b/llvm/test/Instrumentation/InstrProfiling/icall.ll index d92de47421d40..9d45d7cd56358 100644 --- a/llvm/test/Instrumentation/InstrProfiling/icall.ll +++ b/llvm/test/Instrumentation/InstrProfiling/icall.ll @@ -14,6 +14,7 @@ @__profn_foo = private constant [3 x i8] c"foo" +@__profn_bar = private constant [3 x i8] c"bar" define i32 @foo(i32 ()* ) { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i32 1, i32 0) @@ -23,6 +24,17 @@ define i32 @foo(i32 ()* ) { ret i32 %3 } +$bar = comdat any + +define i32 @bar(i32 ()* ) comdat { +entry: + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 12884901887, i32 1, i32 0) + %1 = ptrtoint i32 ()* %0 to i64 + call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 12884901887, i64 %1, i32 0, i32 0) + %2 = tail call i32 %0() + ret i32 %2 +} + ; Function Attrs: nounwind declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #0 @@ -31,7 +43,8 @@ declare void @llvm.instrprof.value.profile(i8*, i64, i64, i32, i32) #0 attributes #0 = { nounwind } -; STATIC: @__profvp_foo +; STATIC: @__profvp_foo = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", align 8 +; STATIC: @__profvp_bar = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", comdat($__profd_bar), align 8 ; STATIC: @__llvm_prf_vnodes ; DYN-NOT: @__profvp_foo diff --git a/llvm/test/Instrumentation/InstrProfiling/linkage.ll b/llvm/test/Instrumentation/InstrProfiling/linkage.ll index be3af8e51a5f8..5e55ef716820e 100644 --- a/llvm/test/Instrumentation/InstrProfiling/linkage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/linkage.ll @@ -55,11 +55,11 @@ define linkonce_odr void @foo_inline() { ret void } -; LINUX: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_cnts", comdat, align 8 +; LINUX: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_cnts", comdat($__profd_foo_extern), align 8 ; LINUX: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_data", comdat, align 8 ; MACHO: @__profc_foo_extern = linkonce_odr hidden global ; MACHO: @__profd_foo_extern = linkonce_odr hidden global -; COFF: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfc$M", comdat, align 8 +; COFF: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfc$M", comdat($__profd_foo_extern), align 8 ; COFF: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfd$M", comdat, align 8 define available_externally void @foo_extern() { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_extern, i32 0, i32 0), i64 0, i32 1, i32 0) diff --git a/llvm/test/Transforms/PGOProfile/comdat_internal.ll b/llvm/test/Transforms/PGOProfile/comdat_internal.ll index f0906bf2b3720..e5915d906a086 100644 --- a/llvm/test/Transforms/PGOProfile/comdat_internal.ll +++ b/llvm/test/Transforms/PGOProfile/comdat_internal.ll @@ -7,12 +7,13 @@ $foo = comdat any ; CHECK: $foo = comdat any ; CHECK: $__llvm_profile_raw_version = comdat any +; CHECK: $__profd__stdin__foo.[[FOO_HASH:[0-9]+]] = comdat any @bar = global i32 ()* @foo, align 8 ; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat ; CHECK-NOT: __profn__stdin__foo -; CHECK: @__profc__stdin__foo.[[FOO_HASH:[0-9]+]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 +; CHECK: @__profc__stdin__foo.[[FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profd__stdin__foo.[[FOO_HASH]]), align 8 ; CHECK: @__profd__stdin__foo.[[FOO_HASH]] = private global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[FOO_HASH]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo.[[FOO_HASH]], i32 0, i32 0), i8* null ; CHECK-NOT: bitcast (i32 ()* @foo to i8*) ; CHECK-SAME: , i8* null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat, align 8 From bcea3a7a288e0b5ac977f90c46e4eef7946467e7 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 3 Aug 2020 10:17:55 -0700 Subject: [PATCH 261/600] Add test utility 'split-file' See https://lists.llvm.org/pipermail/llvm-dev/2020-July/143373.html "[llvm-dev] Multiple documents in one test file" for some discussions. This patch has explored several alternatives. The current semantics are similar to what @dblaikie proposed. `split-file filename output` splits the input file into multiple parts separated by regex `^(.|//)--- filename` and write each part to the file `output/filename` (`filename` can include path separators). Use case A (organizing input of different formats (e.g. linker script+assembly) in one file). ``` # RUN: split-file %s %t # RUN: llvm-mc %t/asm -o %t.o # RUN: ld.lld -T %t/lds %t.o -o %t This is sometimes better than the %S/Inputs/ approach because the user can see the auxiliary files immediately and don't have to open another file. # asm ... # lds ... ``` Use case B (for utilities which don't have built-in input splitting feature): ``` // RUN: split-file %s %t // RUN: llc < %t/1.ll | FileCheck %s --check-prefix=CASE1 // RUN: llc < %t/2.ll | FileCheck %s --check-prefix=CASE2 Combing tests prudently can improve readability. For example, when testing parsing errors if the recovery mechanism isn't possible, grouping the tests in one file can more readily see test coverage/strategy. //--- 1.ll ... //--- 2.ll ... ``` Since this is a new utility, there is no git history concerns for UpperCase variable names. I use lowerCase variable names like mlir/lld. Reviewed By: jhenderson, lattner Differential Revision: https://reviews.llvm.org/D83834 --- lld/test/CMakeLists.txt | 2 +- lld/test/ELF/linkerscript/noload.s | 21 ++- llvm/docs/TestingGuide.rst | 23 ++- llvm/test/CMakeLists.txt | 1 + llvm/test/lit.cfg.py | 1 + llvm/test/tools/gold/X86/multiple-sections.ll | 14 +- llvm/test/tools/llvm-strings/radix.test | 45 ++--- .../test/tools/split-file/Inputs/basic-aa.txt | 2 + .../test/tools/split-file/Inputs/basic-bb.txt | 6 + .../test/tools/split-file/Inputs/basic-cc.txt | 8 + llvm/test/tools/split-file/basic.test | 40 ++++ llvm/test/tools/split-file/empty.test | 4 + llvm/test/tools/split-file/error.test | 16 ++ llvm/test/tools/split-file/help.test | 6 + .../tools/split-file/no-leading-lines.test | 10 + .../tools/split-file/output-is-special.test | 8 + llvm/tools/split-file/.clang-tidy | 19 ++ llvm/tools/split-file/CMakeLists.txt | 7 + llvm/tools/split-file/split-file.cpp | 172 ++++++++++++++++++ llvm/utils/gn/secondary/lld/test/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/test/BUILD.gn | 1 + .../secondary/llvm/tools/split-file/BUILD.gn | 4 + 22 files changed, 374 insertions(+), 37 deletions(-) create mode 100644 llvm/test/tools/split-file/Inputs/basic-aa.txt create mode 100644 llvm/test/tools/split-file/Inputs/basic-bb.txt create mode 100644 llvm/test/tools/split-file/Inputs/basic-cc.txt create mode 100644 llvm/test/tools/split-file/basic.test create mode 100644 llvm/test/tools/split-file/empty.test create mode 100644 llvm/test/tools/split-file/error.test create mode 100644 llvm/test/tools/split-file/help.test create mode 100644 llvm/test/tools/split-file/no-leading-lines.test create mode 100644 llvm/test/tools/split-file/output-is-special.test create mode 100644 llvm/tools/split-file/.clang-tidy create mode 100644 llvm/tools/split-file/CMakeLists.txt create mode 100644 llvm/tools/split-file/split-file.cpp create mode 100644 llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt index e7d1133307393..52e6118ba876b 100644 --- a/lld/test/CMakeLists.txt +++ b/lld/test/CMakeLists.txt @@ -28,7 +28,7 @@ if (NOT LLD_BUILT_STANDALONE) FileCheck count llc llvm-ar llvm-as llvm-bcanalyzer llvm-config llvm-cvtres llvm-dis llvm-dwarfdump llvm-lib llvm-lipo llvm-mc llvm-nm llvm-objcopy llvm-objdump llvm-pdbutil llvm-readelf llvm-readobj llvm-strip not obj2yaml - opt yaml2obj + opt split-file yaml2obj ) endif() diff --git a/lld/test/ELF/linkerscript/noload.s b/lld/test/ELF/linkerscript/noload.s index 2f52b465854e2..20b07b2b185a2 100644 --- a/lld/test/ELF/linkerscript/noload.s +++ b/lld/test/ELF/linkerscript/noload.s @@ -1,12 +1,8 @@ # REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o -# RUN: echo "SECTIONS { \ -# RUN: .data_noload_a (NOLOAD) : { *(.data_noload_a) } \ -# RUN: .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) } \ -# RUN: .no_input_sec_noload (NOLOAD) : { . += 1; } \ -# RUN: .text (0x20000) : { *(.text) } };" > %t.script -# RUN: ld.lld -o %t --script %t.script %t.o -# RUN: llvm-readelf -S -l %t | FileCheck %s +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/asm -o %t.o +# RUN: ld.lld --script %t/lds %t.o -o %t/out +# RUN: llvm-readelf -S -l %t/out | FileCheck %s # CHECK: Name Type Address Off Size # CHECK: .data_noload_a NOBITS 0000000000000000 [[OFF:[0-9a-f]+]] 001000 @@ -16,6 +12,7 @@ # CHECK: Type Offset VirtAddr PhysAddr # CHECK-NEXT: LOAD 0x001000 0x0000000000020000 0x0000000000020000 +#--- asm .section .text,"ax",@progbits nop @@ -24,3 +21,11 @@ .section .data_noload_b,"aw",@progbits .zero 4096 + +#--- lds +SECTIONS { + .data_noload_a (NOLOAD) : { *(.data_noload_a) } + .data_noload_b (0x10000) (NOLOAD) : { *(.data_noload_b) } + .no_input_sec_noload (NOLOAD) : { . += 1; } + .text (0x20000) : { *(.text) } +} diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst index 2e937f0006272..4ca1a359b64c9 100644 --- a/llvm/docs/TestingGuide.rst +++ b/llvm/docs/TestingGuide.rst @@ -271,8 +271,27 @@ adding your code there instead of creating a new file. Extra files ----------- -If your test requires extra files besides the file containing the ``RUN:`` -lines, the idiomatic place to put them is in a subdirectory ``Inputs``. +If your test requires extra files besides the file containing the ``RUN:`` lines +and the extra files are small, consider specifying them in the same file and +using ``split-file`` to extract them. For example, + +.. code-block:: llvm + + ; RUN: split-file %s %t + ; RUN: llvm-link -S %t/a.ll %t/b.ll | FileCheck %s + + ; CHECK: ... + + ;--- a.ll + ... + ;--- b.ll + ... + +The parts are separated by the regex ``^(.|//)--- ``. By default the +extracted content has leading empty lines to preserve line numbers. Specify +``--no-leading-lines`` to drop leading lines. + +If the extra files are large, the idiomatic place to put them is in a subdirectory ``Inputs``. You can then refer to the extra files as ``%S/Inputs/foo.bar``. For example, consider ``test/Linker/ident.ll``. The directory structure is diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index 91215b3ca0ef0..cde80035a09bf 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -119,6 +119,7 @@ set(LLVM_TEST_DEPENDS opt sancov sanstats + split-file verify-uselistorder yaml-bench yaml2obj diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 0a3289fcc4ad4..4502ac58c45ac 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -141,6 +141,7 @@ def get_asan_rtlib(): ToolSubst('%llvm-objcopy', FindTool('llvm-objcopy')), ToolSubst('%llvm-strip', FindTool('llvm-strip')), ToolSubst('%llvm-install-name-tool', FindTool('llvm-install-name-tool')), + ToolSubst('%split-file', FindTool('split-file')), ] # FIXME: Why do we have both `lli` and `%lli` that do slightly different things? diff --git a/llvm/test/tools/gold/X86/multiple-sections.ll b/llvm/test/tools/gold/X86/multiple-sections.ll index facbd8d992ed7..575fb81fcd6f4 100644 --- a/llvm/test/tools/gold/X86/multiple-sections.ll +++ b/llvm/test/tools/gold/X86/multiple-sections.ll @@ -1,10 +1,8 @@ -; RUN: echo ".text.tin" > %t_order_lto.txt -; RUN: echo ".text._start" >> %t_order_lto.txt -; RUN: echo ".text.pat" >> %t_order_lto.txt -; RUN: llvm-as %s -o %t.o +; RUN: split-file %s %t +; RUN: llvm-as %t/a.ll -o %t.o ; RUN: %gold -plugin %llvmshlibdir/LLVMgold%shlibext \ ; RUN: -m elf_x86_64 -o %t.exe %t.o \ -; RUN: --section-ordering-file=%t_order_lto.txt +; RUN: --section-ordering-file=%t/order ; RUN: llvm-readelf -s %t.exe | FileCheck %s ; Check that the order of the sections is tin -> _start -> pat. @@ -13,6 +11,12 @@ ; CHECK: 00000000004000b0 1 FUNC LOCAL DEFAULT 1 tin ; CHECK: 00000000004000c0 15 FUNC GLOBAL DEFAULT 1 _start +;--- order +.text.tin +.text._start +.text.pat + +;--- a.ll target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/tools/llvm-strings/radix.test b/llvm/test/tools/llvm-strings/radix.test index d23fb3cddc8f8..4dafbd1c84fc9 100644 --- a/llvm/test/tools/llvm-strings/radix.test +++ b/llvm/test/tools/llvm-strings/radix.test @@ -1,29 +1,32 @@ ## Show that llvm-strings can handle the -t/--radix switch properly. -RUN: echo one > %t -RUN: echo two >> %t -RUN: echo three >> %t -RUN: echo four >> %t -RUN: echo five >> %t -RUN: echo six >> %t -RUN: echo seven >> %t -RUN: echo eight >> %t -RUN: echo nine >> %t -RUN: echo ten >> %t - -RUN: llvm-strings %t | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}} -RUN: llvm-strings -t d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}} -RUN: llvm-strings -t o %t | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace --implicit-check-not={{.}} -RUN: llvm-strings -t x %t | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace --implicit-check-not={{.}} +RUN: split-file --no-leading-lines %s %t +#--- a.txt +one +two +three +four +five +six +seven +eight +nine +ten +#--- end + +RUN: llvm-strings %t/a.txt | FileCheck %s -check-prefix CHECK-NONE --implicit-check-not={{.}} +RUN: llvm-strings -t d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace --implicit-check-not={{.}} +RUN: llvm-strings -t o %t/a.txt | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace --implicit-check-not={{.}} +RUN: llvm-strings -t x %t/a.txt | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace --implicit-check-not={{.}} ## Show --radix works too. -RUN: llvm-strings --radix d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace -RUN: llvm-strings --radix o %t | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace -RUN: llvm-strings --radix x %t | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace +RUN: llvm-strings --radix d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace +RUN: llvm-strings --radix o %t/a.txt | FileCheck %s -check-prefix CHECK-OCT --strict-whitespace +RUN: llvm-strings --radix x %t/a.txt | FileCheck %s -check-prefix CHECK-HEX --strict-whitespace ## Show different syntaxes work. -RUN: llvm-strings --radix=d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace -RUN: llvm-strings -t=d %t | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace +RUN: llvm-strings --radix=d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace +RUN: llvm-strings -t=d %t/a.txt | FileCheck %s -check-prefix CHECK-DEC --strict-whitespace CHECK-NONE: {{^}}three CHECK-NONE: {{^}}four @@ -54,5 +57,5 @@ CHECK-HEX: {{^}} 22 eight CHECK-HEX: {{^}} 28 nine ## Show that an invalid value is rejected. -RUN: not llvm-strings --radix z %t 2>&1 | FileCheck %s --check-prefix=INVALID +RUN: not llvm-strings --radix z %t/a.txt 2>&1 | FileCheck %s --check-prefix=INVALID INVALID: llvm-strings{{.*}}: for the --radix option: Cannot find option named 'z'! diff --git a/llvm/test/tools/split-file/Inputs/basic-aa.txt b/llvm/test/tools/split-file/Inputs/basic-aa.txt new file mode 100644 index 0000000000000..0b9ddeb2fc12a --- /dev/null +++ b/llvm/test/tools/split-file/Inputs/basic-aa.txt @@ -0,0 +1,2 @@ + +aa diff --git a/llvm/test/tools/split-file/Inputs/basic-bb.txt b/llvm/test/tools/split-file/Inputs/basic-bb.txt new file mode 100644 index 0000000000000..0f20b8cf755bc --- /dev/null +++ b/llvm/test/tools/split-file/Inputs/basic-bb.txt @@ -0,0 +1,6 @@ + + + +; Comments are preserved. +bb + diff --git a/llvm/test/tools/split-file/Inputs/basic-cc.txt b/llvm/test/tools/split-file/Inputs/basic-cc.txt new file mode 100644 index 0000000000000..dc815bf4b7dc2 --- /dev/null +++ b/llvm/test/tools/split-file/Inputs/basic-cc.txt @@ -0,0 +1,8 @@ + + + + + + + +cc diff --git a/llvm/test/tools/split-file/basic.test b/llvm/test/tools/split-file/basic.test new file mode 100644 index 0000000000000..5d32c3429ed37 --- /dev/null +++ b/llvm/test/tools/split-file/basic.test @@ -0,0 +1,40 @@ +#--- aa +aa +;--- bb +; Comments are preserved. +bb + +//--- subdir/cc +cc +//--- end + +# RUN: rm -rf %t +# RUN: split-file %s %t +# RUN: diff %S/Inputs/basic-aa.txt %t/aa +# RUN: diff %S/Inputs/basic-bb.txt %t/bb +# RUN: diff %S/Inputs/basic-cc.txt %t/subdir/cc +# RUN: FileCheck %s --check-prefix=END < %t/end + +## Can be called on a non-empty directory. +# RUN: split-file %s %t +# RUN: diff %S/Inputs/basic-aa.txt %t/aa + +## Test that we will delete the output if it is a file, so that we can create +## a directory. +# RUN: rm -rf %t && touch %t +# RUN: split-file %s %t +# RUN: diff %S/Inputs/basic-aa.txt %t/aa + +# END: RUN: split-file %s %t + +# RUN: not %split-file 2>&1 | FileCheck %s --check-prefix=NO_INPUT + +# NO_INPUT: split-file: error: input filename is not specified + +# RUN: not %split-file %s '' 2>&1 | FileCheck %s --check-prefix=NO_OUTPUT + +# NO_OUTPUT: split-file: error: output directory is not specified + +# RUN: not %split-file %S/Inputs/basic-aa.txt %t 2>&1 | FileCheck %s --check-prefix=NOT_EXIST + +# NOT_EXIST: split-file: error: {{.*}}.txt: no part separator was found diff --git a/llvm/test/tools/split-file/empty.test b/llvm/test/tools/split-file/empty.test new file mode 100644 index 0000000000000..e76bea93bc2e1 --- /dev/null +++ b/llvm/test/tools/split-file/empty.test @@ -0,0 +1,4 @@ +# RUN: split-file --no-leading-lines %s %t +# RUN: count 0 < %t/empty + +#--- empty diff --git a/llvm/test/tools/split-file/error.test b/llvm/test/tools/split-file/error.test new file mode 100644 index 0000000000000..9efa5adca49ac --- /dev/null +++ b/llvm/test/tools/split-file/error.test @@ -0,0 +1,16 @@ +# RUN: not %split-file %s %t 2>&1 | FileCheck %s +# RUN: not ls %t/dup + +# CHECK: {{.*}}.test:[[#@LINE+1]]: error: empty part name +//--- + +# CHECK: {{.*}}.test:[[#@LINE+1]]: error: part name cannot have leading or trailing space +//--- leading_space + +# CHECK: {{.*}}.test:[[#@LINE+1]]: error: part name cannot have leading or trailing space +//--- trailing_space + +;--- dup + +# CHECK: {{.*}}.test:[[#@LINE+1]]: error: ';--- dup' occurs more than once +;--- dup diff --git a/llvm/test/tools/split-file/help.test b/llvm/test/tools/split-file/help.test new file mode 100644 index 0000000000000..27c450aeac3a7 --- /dev/null +++ b/llvm/test/tools/split-file/help.test @@ -0,0 +1,6 @@ +RUN: split-file --help 2>&1 | FileCheck --implicit-check-not='General Options:' %s +CHECK: OVERVIEW: Split input {{.*}} +CHECK: USAGE: split-file [options] filename directory +CHECK: Generic Options: +CHECK: split-file Options: +CHECK: --no-leading-lines diff --git a/llvm/test/tools/split-file/no-leading-lines.test b/llvm/test/tools/split-file/no-leading-lines.test new file mode 100644 index 0000000000000..d4de34f33d1e8 --- /dev/null +++ b/llvm/test/tools/split-file/no-leading-lines.test @@ -0,0 +1,10 @@ +## With --no-leading-lines, don't add leading lines (which is used to preserve line numbers). + +# RUN: split-file --no-leading-lines %s %t +# RUN: count 1 < %t/a.txt +# RUN: FileCheck %s < %t/a.txt + +# CHECK: input + +#--- a.txt +input diff --git a/llvm/test/tools/split-file/output-is-special.test b/llvm/test/tools/split-file/output-is-special.test new file mode 100644 index 0000000000000..98bb4d36a4ff3 --- /dev/null +++ b/llvm/test/tools/split-file/output-is-special.test @@ -0,0 +1,8 @@ +# UNSUPPORTED: system-windows +# REQUIRES: shell + +## Don't delete the output if it is special, otherwise root may accidentally +## remove important special files. +# RUN: not split-file %s /dev/null 2>&1 | FileCheck %s + +# CHECK: error: /dev/null: output cannot be a special file diff --git a/llvm/tools/split-file/.clang-tidy b/llvm/tools/split-file/.clang-tidy new file mode 100644 index 0000000000000..87ec2ff53af6e --- /dev/null +++ b/llvm/tools/split-file/.clang-tidy @@ -0,0 +1,19 @@ +# Almost identical to the top-level .clang-tidy, except that {Member,Parameter,Variable}Case use camelBack. +Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming' +CheckOptions: + - key: readability-identifier-naming.ClassCase + value: CamelCase + - key: readability-identifier-naming.EnumCase + value: CamelCase + - key: readability-identifier-naming.FunctionCase + value: camelBack + - key: readability-identifier-naming.MemberCase + value: camelBack + - key: readability-identifier-naming.ParameterCase + value: camelBack + - key: readability-identifier-naming.UnionCase + value: CamelCase + - key: readability-identifier-naming.VariableCase + value: camelBack + - key: readability-identifier-naming.IgnoreMainLikeFunctions + value: 1 diff --git a/llvm/tools/split-file/CMakeLists.txt b/llvm/tools/split-file/CMakeLists.txt new file mode 100644 index 0000000000000..ba998483c22aa --- /dev/null +++ b/llvm/tools/split-file/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_llvm_tool(split-file + split-file.cpp + ) diff --git a/llvm/tools/split-file/split-file.cpp b/llvm/tools/split-file/split-file.cpp new file mode 100644 index 0000000000000..772a19164dc48 --- /dev/null +++ b/llvm/tools/split-file/split-file.cpp @@ -0,0 +1,172 @@ +//===- split-file.cpp - Input splitting utility ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Split input into multipe parts separated by regex '^(.|//)--- ' and extract +// the specified part. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/WithColor.h" +#include +#include + +using namespace llvm; + +static cl::OptionCategory cat("split-file Options"); + +static cl::opt input(cl::Positional, cl::desc("filename"), + cl::cat(cat)); + +static cl::opt output(cl::Positional, cl::desc("directory"), + cl::value_desc("directory"), cl::cat(cat)); + +static cl::opt noLeadingLines("no-leading-lines", + cl::desc("Don't preserve line numbers"), + cl::cat(cat)); + +static StringRef toolName; +static int errorCount; + +LLVM_ATTRIBUTE_NORETURN static void fatal(StringRef filename, + const Twine &message) { + if (filename.empty()) + WithColor::error(errs(), toolName) << message << '\n'; + else + WithColor::error(errs(), toolName) << filename << ": " << message << '\n'; + exit(1); +} + +static void error(StringRef filename, int64_t line, const Twine &message) { + ++errorCount; + errs() << filename << ':' << line << ": "; + WithColor::error(errs()) << message << '\n'; +} + +namespace { +struct Part { + const char *begin = nullptr; + const char *end = nullptr; + int64_t leadingLines = 0; +}; +} // namespace + +static int handle(MemoryBuffer &inputBuf, StringRef input) { + DenseMap partToBegin; + StringRef lastPart, separator; + for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) { + const int64_t lineNo = i.line_number(); + const StringRef line = *i++; + const size_t markerLen = line.startswith("//") ? 6 : 5; + if (!(line.size() >= markerLen && + line.substr(markerLen - 4).startswith("--- "))) + continue; + separator = line.substr(0, markerLen); + const StringRef partName = line.substr(markerLen); + if (partName.empty()) { + error(input, lineNo, "empty part name"); + continue; + } + if (isSpace(partName.front()) || isSpace(partName.back())) { + error(input, lineNo, "part name cannot have leading or trailing space"); + continue; + } + + auto res = partToBegin.try_emplace(partName); + if (!res.second) { + error(input, lineNo, + "'" + separator + partName + "' occurs more than once"); + continue; + } + if (!lastPart.empty()) + partToBegin[lastPart].end = line.data(); + Part &cur = res.first->second; + if (!i.is_at_eof()) + cur.begin = i->data(); + // If --no-leading-lines is not specified, numEmptyLines is 0. Append + // newlines so that the extracted part preserves line numbers. + cur.leadingLines = noLeadingLines ? 0 : i.line_number() - 1; + + lastPart = partName; + } + if (lastPart.empty()) + fatal(input, "no part separator was found"); + if (errorCount) + return 1; + partToBegin[lastPart].end = inputBuf.getBufferEnd(); + + std::vector> outputFiles; + SmallString<256> partPath; + for (auto &keyValue : partToBegin) { + partPath.clear(); + sys::path::append(partPath, output, keyValue.first); + std::error_code ec = + sys::fs::create_directories(sys::path::parent_path(partPath)); + if (ec) + fatal(input, ec.message()); + auto f = std::make_unique(partPath.str(), ec, + llvm::sys::fs::OF_None); + if (!f) + fatal(input, ec.message()); + + Part &part = keyValue.second; + for (int64_t i = 0; i != part.leadingLines; ++i) + (*f).os().write('\n'); + if (part.begin) + (*f).os().write(part.begin, part.end - part.begin); + outputFiles.push_back(std::move(f)); + } + + for (std::unique_ptr &outputFile : outputFiles) + outputFile->keep(); + return 0; +} + +int main(int argc, const char **argv) { + toolName = sys::path::stem(argv[0]); + cl::HideUnrelatedOptions({&cat}); + cl::ParseCommandLineOptions( + argc, argv, + "Split input into multiple parts separated by regex '^(.|//)--- ' and " + "extract the part specified by '^(.|//)--- '\n", + nullptr, + /*EnvVar=*/nullptr, + /*LongOptionsUseDoubleDash=*/true); + + if (input.empty()) + fatal("", "input filename is not specified"); + if (output.empty()) + fatal("", "output directory is not specified"); + ErrorOr> bufferOrErr = + MemoryBuffer::getFileOrSTDIN(input); + if (std::error_code ec = bufferOrErr.getError()) + fatal(input, ec.message()); + + // Delete output if it is a file or an empty directory, so that we can create + // a directory. + sys::fs::file_status status; + if (std::error_code ec = sys::fs::status(output, status)) + if (ec.value() != static_cast(std::errc::no_such_file_or_directory)) + fatal(output, ec.message()); + if (status.type() != sys::fs::file_type::file_not_found && + status.type() != sys::fs::file_type::directory_file && + status.type() != sys::fs::file_type::regular_file) + fatal(output, "output cannot be a special file"); + if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true)) + if (ec.value() != static_cast(std::errc::directory_not_empty)) + fatal(output, ec.message()); + return handle(**bufferOrErr, input); +} diff --git a/llvm/utils/gn/secondary/lld/test/BUILD.gn b/llvm/utils/gn/secondary/lld/test/BUILD.gn index 581cc5482578c..bfb63a39ba65a 100644 --- a/llvm/utils/gn/secondary/lld/test/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/test/BUILD.gn @@ -94,6 +94,7 @@ group("test") { "//llvm/tools/llvm-readobj:symlinks", "//llvm/tools/obj2yaml", "//llvm/tools/opt", + "//llvm/tools/split-file", "//llvm/tools/yaml2obj", "//llvm/utils/FileCheck", "//llvm/utils/count", diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn index 2c4a23ffbaacb..c714d9b5ba7b1 100644 --- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn @@ -260,6 +260,7 @@ group("test") { "//llvm/tools/opt", "//llvm/tools/sancov", "//llvm/tools/sanstats", + "//llvm/tools/split-file", "//llvm/tools/verify-uselistorder", "//llvm/tools/yaml2obj", "//llvm/unittests", diff --git a/llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn new file mode 100644 index 0000000000000..4bf9269c3c38d --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/tools/split-file/BUILD.gn @@ -0,0 +1,4 @@ +executable("split-file") { + deps = [ "//llvm/lib/Support" ] + sources = [ "split-file.cpp" ] +} From 57899934eab18bbcab3482cc3ef862b0a1617ad0 Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Tue, 4 Aug 2020 12:20:12 +0900 Subject: [PATCH 262/600] [AMDGPU] Make GCNRegBankReassign assign based on subreg banks When scavenging consider the sub-register of the source operand to determine the bank of a candidate register (not just sub0). Without this it is possible to introduce an infinite loop, e.g. $sgpr15_sgpr16_sgpr17 can be assigned for a conflict between $sgpr0 and SGPR_96:sub1. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D84910 --- llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp | 97 ++++++++++++------- .../AMDGPU/GlobalISel/insertelement.ll | 74 +++++++------- .../AMDGPU/regbank-reassign-wave64.mir | 69 +++++++++++++ llvm/test/CodeGen/AMDGPU/regbank-reassign.mir | 78 +++++++++++++++ 4 files changed, 245 insertions(+), 73 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir diff --git a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp index 98d971630ca4f..79b33e24c8302 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -83,9 +83,10 @@ class GCNRegBankReassign : public MachineFunctionPass { class Candidate { public: - Candidate(MachineInstr *mi, unsigned reg, unsigned freebanks, - unsigned weight) - : MI(mi), Reg(reg), FreeBanks(freebanks), Weight(weight) {} + Candidate(MachineInstr *mi, unsigned reg, unsigned subreg, + unsigned freebanks, unsigned weight) + : MI(mi), Reg(reg), SubReg(subreg), FreeBanks(freebanks), + Weight(weight) {} bool operator< (const Candidate& RHS) const { return Weight < RHS.Weight; } @@ -100,6 +101,7 @@ class GCNRegBankReassign : public MachineFunctionPass { MachineInstr *MI; unsigned Reg; + unsigned SubReg; unsigned FreeBanks; unsigned Weight; }; @@ -162,7 +164,7 @@ class GCNRegBankReassign : public MachineFunctionPass { const MCPhysReg *CSRegs; // Returns bank for a phys reg. - unsigned getPhysRegBank(unsigned Reg) const; + unsigned getPhysRegBank(unsigned Reg, unsigned SubReg) const; // Return a bit set for each register bank used. 4 banks for VGPRs and // 8 banks for SGPRs. @@ -176,7 +178,7 @@ class GCNRegBankReassign : public MachineFunctionPass { // a register chosen from Bank. std::pair analyzeInst(const MachineInstr &MI, unsigned Reg = AMDGPU::NoRegister, - int Bank = -1); + unsigned SubReg = 0, int Bank = -1); // Return true if register is regular VGPR or SGPR or their tuples. // Returns false for special registers like m0, vcc etc. @@ -216,11 +218,12 @@ class GCNRegBankReassign : public MachineFunctionPass { // candidates are collected and added to work list. unsigned computeStallCycles(unsigned SrcReg, unsigned Reg = AMDGPU::NoRegister, - int Bank = -1, bool Collect = false); + unsigned SubReg = 0, int Bank = -1, + bool Collect = false); // Search for a register in Bank unused within LI. // Returns phys reg or NoRegister. - unsigned scavengeReg(LiveInterval& LI, unsigned Bank) const; + unsigned scavengeReg(LiveInterval &LI, unsigned Bank, unsigned SubReg) const; // Try to reassign candidate. Returns number or stall cycles saved. unsigned tryReassign(Candidate &C); @@ -277,15 +280,24 @@ char GCNRegBankReassign::ID = 0; char &llvm::GCNRegBankReassignID = GCNRegBankReassign::ID; -unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const { +unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg, + unsigned SubReg) const { assert(Register::isPhysicalRegister(Reg)); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); unsigned Size = TRI->getRegSizeInBits(*RC); if (Size == 16) Reg = TRI->get32BitRegister(Reg); - else if (Size > 32) - Reg = TRI->getSubReg(Reg, AMDGPU::sub0); + else if (Size > 32) { + if (SubReg) { + const TargetRegisterClass *SubRC = TRI->getSubRegClass(RC, SubReg); + Reg = TRI->getSubReg(Reg, SubReg); + if (TRI->getRegSizeInBits(*SubRC) > 32) + Reg = TRI->getSubReg(Reg, AMDGPU::sub0); + } else { + Reg = TRI->getSubReg(Reg, AMDGPU::sub0); + } + } if (TRI->hasVGPRs(RC)) { Reg -= AMDGPU::VGPR0; @@ -360,7 +372,7 @@ uint32_t GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg, std::pair GCNRegBankReassign::analyzeInst(const MachineInstr &MI, unsigned Reg, - int Bank) { + unsigned SubReg, int Bank) { unsigned StallCycles = 0; unsigned UsedBanks = 0; @@ -375,26 +387,39 @@ GCNRegBankReassign::analyzeInst(const MachineInstr &MI, unsigned Reg, if (!Op.isReg() || Op.isUndef()) continue; - Register R = Op.getReg(); - if (TRI->hasAGPRs(TRI->getRegClassForReg(*MRI, R))) - continue; + const Register R = Op.getReg(); + const TargetRegisterClass *RC = TRI->getRegClassForReg(*MRI, R); - unsigned ShiftedBank = Bank; + // Do not compute stalls for AGPRs + if (TRI->hasAGPRs(RC)) + continue; - if (Bank != -1 && R == Reg && Op.getSubReg()) { - unsigned Offset = TRI->getChannelFromSubReg(Op.getSubReg()); + // Do not compute stalls if sub-register covers all banks + if (Op.getSubReg()) { LaneBitmask LM = TRI->getSubRegIndexLaneMask(Op.getSubReg()); - if (Offset && Bank < NUM_VGPR_BANKS) { - // If a register spans all banks we cannot shift it to avoid conflict. + if (TRI->hasVGPRs(RC)) { if (TRI->getNumCoveredRegs(LM) >= NUM_VGPR_BANKS) continue; - ShiftedBank = (Bank + Offset) % NUM_VGPR_BANKS; - } else if (Offset > 1 && Bank >= SGPR_BANK_OFFSET) { - // If a register spans all banks we cannot shift it to avoid conflict. + } else { if (TRI->getNumCoveredRegs(LM) / 2 >= NUM_SGPR_BANKS) continue; + } + } + + unsigned ShiftedBank = Bank; + + if (Bank != -1 && R == Reg && (Op.getSubReg() || SubReg)) { + unsigned RegOffset = + TRI->getChannelFromSubReg(SubReg ? SubReg : (unsigned)AMDGPU::sub0); + unsigned Offset = TRI->getChannelFromSubReg( + Op.getSubReg() ? Op.getSubReg() : (unsigned)AMDGPU::sub0); + if (Bank < NUM_VGPR_BANKS) { + unsigned Shift = ((NUM_VGPR_BANKS + Offset) - RegOffset); + ShiftedBank = (Bank + Shift) % NUM_VGPR_BANKS; + } else if (Bank >= SGPR_BANK_OFFSET) { + unsigned Shift = (NUM_SGPR_BANKS + (Offset >> 1)) - (RegOffset >> 1); ShiftedBank = SGPR_BANK_OFFSET + - (Bank - SGPR_BANK_OFFSET + (Offset >> 1)) % NUM_SGPR_BANKS; + (Bank - SGPR_BANK_OFFSET + Shift) % NUM_SGPR_BANKS; } } @@ -576,17 +601,17 @@ void GCNRegBankReassign::collectCandidates(MachineInstr& MI, unsigned FreeBanks1 = getFreeBanks(Reg1, SubReg1, Mask1, UsedBanks); unsigned FreeBanks2 = getFreeBanks(Reg2, SubReg2, Mask2, UsedBanks); if (FreeBanks1) - Candidates.push(Candidate(&MI, Reg1, FreeBanks1, Weight - + ((Size2 > Size1) ? 1 : 0))); + Candidates.push(Candidate(&MI, Reg1, SubReg1, FreeBanks1, + Weight + ((Size2 > Size1) ? 1 : 0))); if (FreeBanks2) - Candidates.push(Candidate(&MI, Reg2, FreeBanks2, Weight - + ((Size1 > Size2) ? 1 : 0))); + Candidates.push(Candidate(&MI, Reg2, SubReg2, FreeBanks2, + Weight + ((Size1 > Size2) ? 1 : 0))); } } } -unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg, - unsigned Reg, int Bank, +unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg, unsigned Reg, + unsigned SubReg, int Bank, bool Collect) { unsigned TotalStallCycles = 0; SmallSet Visited; @@ -598,7 +623,7 @@ unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg, continue; unsigned StallCycles; unsigned UsedBanks; - std::tie(StallCycles, UsedBanks) = analyzeInst(MI, Reg, Bank); + std::tie(StallCycles, UsedBanks) = analyzeInst(MI, Reg, SubReg, Bank); TotalStallCycles += StallCycles; if (Collect) collectCandidates(MI, UsedBanks, StallCycles); @@ -607,8 +632,8 @@ unsigned GCNRegBankReassign::computeStallCycles(unsigned SrcReg, return TotalStallCycles; } -unsigned GCNRegBankReassign::scavengeReg(LiveInterval& LI, - unsigned Bank) const { +unsigned GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank, + unsigned SubReg) const { const TargetRegisterClass *RC = MRI->getRegClass(LI.reg); unsigned MaxNumRegs = (Bank < NUM_VGPR_BANKS) ? MaxNumVGPRs : MaxNumSGPRs; @@ -620,7 +645,7 @@ unsigned GCNRegBankReassign::scavengeReg(LiveInterval& LI, if (TRI->isSubRegisterEq(Reg, MaxReg)) break; - if (!MRI->isAllocatable(Reg) || getPhysRegBank(Reg) != Bank) + if (!MRI->isAllocatable(Reg) || getPhysRegBank(Reg, SubReg) != Bank) continue; for (unsigned I = 0; CSRegs[I]; ++I) @@ -669,7 +694,7 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) { for (int Bank = 0; Bank < NUM_BANKS; ++Bank) { if (C.FreeBanks & (1 << Bank)) { LLVM_DEBUG(dbgs() << "Trying bank " << printBank(Bank) << '\n'); - unsigned Stalls = computeStallCycles(C.Reg, C.Reg, Bank); + unsigned Stalls = computeStallCycles(C.Reg, C.Reg, C.SubReg, Bank); if (Stalls < OrigStalls) { LLVM_DEBUG(dbgs() << "With bank " << printBank(Bank) << " -> " << Stalls << '\n'); @@ -683,7 +708,7 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) { LRM->unassign(LI); while (!BankStalls.empty()) { BankStall BS = BankStalls.pop_back_val(); - unsigned Reg = scavengeReg(LI, BS.Bank); + unsigned Reg = scavengeReg(LI, BS.Bank, C.SubReg); if (Reg == AMDGPU::NoRegister) { LLVM_DEBUG(dbgs() << "No free registers in bank " << printBank(BS.Bank) << '\n'); @@ -801,7 +826,7 @@ bool GCNRegBankReassign::runOnMachineFunction(MachineFunction &MF) { Candidates.pop_back(); if (LocalCyclesSaved) { removeCandidates(C.Reg); - computeStallCycles(C.Reg, AMDGPU::NoRegister, -1, true); + computeStallCycles(C.Reg, AMDGPU::NoRegister, 0, -1, true); Candidates.sort(); LLVM_DEBUG(dbgs() << "\nCandidates:\n\n"; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 8e4a071701b35..b4afc48f98ba8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -1492,7 +1492,7 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double % ; MOVREL-NEXT: v_cmp_eq_u32_e64 s4, 5, v18 ; MOVREL-NEXT: v_mov_b32_e32 v19, v0 ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18 -; MOVREL-NEXT: v_mov_b32_e32 v20, v1 +; MOVREL-NEXT: v_mov_b32_e32 v23, v1 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, 2, v18 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s2, 3, v18 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s5, 7, v18 @@ -1501,7 +1501,7 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double % ; MOVREL-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0 ; MOVREL-NEXT: v_cndmask_b32_e64 v8, v8, v16, s3 ; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4 -; MOVREL-NEXT: v_cndmask_b32_e32 v1, v20, v17, vcc_lo +; MOVREL-NEXT: v_cndmask_b32_e32 v1, v23, v17, vcc_lo ; MOVREL-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0 ; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, v17, s3 ; MOVREL-NEXT: v_cndmask_b32_e64 v11, v11, v17, s4 @@ -2123,7 +2123,7 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: v_add_nc_u32_e32 v18, 1, v18 ; MOVREL-NEXT: v_mov_b32_e32 v19, v0 -; MOVREL-NEXT: v_mov_b32_e32 v20, v1 +; MOVREL-NEXT: v_mov_b32_e32 v23, v1 ; MOVREL-NEXT: ; implicit-def: $vcc_hi ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v18 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 1, v18 @@ -2137,7 +2137,7 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do ; MOVREL-NEXT: v_cndmask_b32_e64 v2, v2, v16, s0 ; MOVREL-NEXT: v_cndmask_b32_e64 v8, v8, v16, s3 ; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4 -; MOVREL-NEXT: v_cndmask_b32_e32 v1, v20, v17, vcc_lo +; MOVREL-NEXT: v_cndmask_b32_e32 v1, v23, v17, vcc_lo ; MOVREL-NEXT: v_cndmask_b32_e64 v3, v3, v17, s0 ; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, v17, s3 ; MOVREL-NEXT: v_cndmask_b32_e64 v11, v11, v17, s4 @@ -4111,7 +4111,7 @@ define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_v(<7 x double> %vec, ; MOVREL-NEXT: v_cmp_eq_u32_e64 s3, 4, v16 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s4, 5, v16 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s5, 6, v16 -; MOVREL-NEXT: v_mov_b32_e32 v17, v2 +; MOVREL-NEXT: v_mov_b32_e32 v19, v2 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, 1, v16 ; MOVREL-NEXT: v_mov_b32_e32 v18, v3 ; MOVREL-NEXT: v_cndmask_b32_e64 v6, v6, v14, s2 @@ -4119,7 +4119,7 @@ define amdgpu_ps <7 x double> @dyn_insertelement_v7f64_v_v_v(<7 x double> %vec, ; MOVREL-NEXT: v_cndmask_b32_e64 v10, v10, v14, s4 ; MOVREL-NEXT: v_cndmask_b32_e64 v12, v12, v14, s5 ; MOVREL-NEXT: v_cndmask_b32_e64 v7, v7, v15, s2 -; MOVREL-NEXT: v_cndmask_b32_e64 v2, v17, v14, s0 +; MOVREL-NEXT: v_cndmask_b32_e64 v2, v19, v14, s0 ; MOVREL-NEXT: v_cndmask_b32_e64 v9, v9, v15, s3 ; MOVREL-NEXT: v_cndmask_b32_e64 v3, v18, v15, s0 ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v14, vcc_lo @@ -4251,42 +4251,42 @@ define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_s_v_s(<5 x double> inreg ; MOVREL-NEXT: s_mov_b32 s7, s9 ; MOVREL-NEXT: s_mov_b32 s8, s10 ; MOVREL-NEXT: s_mov_b32 s9, s11 -; MOVREL-NEXT: v_mov_b32_e32 v18, s15 -; MOVREL-NEXT: v_mov_b32_e32 v17, s14 -; MOVREL-NEXT: v_mov_b32_e32 v16, s13 -; MOVREL-NEXT: v_mov_b32_e32 v15, s12 -; MOVREL-NEXT: v_mov_b32_e32 v14, s11 -; MOVREL-NEXT: v_mov_b32_e32 v13, s10 -; MOVREL-NEXT: v_mov_b32_e32 v12, s9 -; MOVREL-NEXT: v_mov_b32_e32 v11, s8 -; MOVREL-NEXT: v_mov_b32_e32 v10, s7 -; MOVREL-NEXT: v_mov_b32_e32 v9, s6 -; MOVREL-NEXT: v_mov_b32_e32 v8, s5 -; MOVREL-NEXT: v_mov_b32_e32 v7, s4 -; MOVREL-NEXT: v_mov_b32_e32 v6, s3 -; MOVREL-NEXT: v_mov_b32_e32 v5, s2 -; MOVREL-NEXT: v_mov_b32_e32 v4, s1 -; MOVREL-NEXT: v_mov_b32_e32 v3, s0 +; MOVREL-NEXT: v_mov_b32_e32 v20, s15 +; MOVREL-NEXT: v_mov_b32_e32 v19, s14 +; MOVREL-NEXT: v_mov_b32_e32 v18, s13 +; MOVREL-NEXT: v_mov_b32_e32 v17, s12 +; MOVREL-NEXT: v_mov_b32_e32 v16, s11 +; MOVREL-NEXT: v_mov_b32_e32 v15, s10 +; MOVREL-NEXT: v_mov_b32_e32 v14, s9 +; MOVREL-NEXT: v_mov_b32_e32 v13, s8 +; MOVREL-NEXT: v_mov_b32_e32 v12, s7 +; MOVREL-NEXT: v_mov_b32_e32 v11, s6 +; MOVREL-NEXT: v_mov_b32_e32 v10, s5 +; MOVREL-NEXT: v_mov_b32_e32 v9, s4 +; MOVREL-NEXT: v_mov_b32_e32 v8, s3 +; MOVREL-NEXT: v_mov_b32_e32 v7, s2 +; MOVREL-NEXT: v_mov_b32_e32 v6, s1 +; MOVREL-NEXT: v_mov_b32_e32 v5, s0 ; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 0 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, s12, 1 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s1, s12, 4 ; MOVREL-NEXT: ; implicit-def: $vcc_hi -; MOVREL-NEXT: v_cndmask_b32_e32 v2, v3, v0, vcc_lo -; MOVREL-NEXT: v_cndmask_b32_e32 v3, v4, v1, vcc_lo -; MOVREL-NEXT: v_cndmask_b32_e64 v4, v5, v0, s0 -; MOVREL-NEXT: v_cndmask_b32_e64 v5, v6, v1, s0 +; MOVREL-NEXT: v_cndmask_b32_e32 v2, v5, v0, vcc_lo +; MOVREL-NEXT: v_cndmask_b32_e32 v3, v6, v1, vcc_lo +; MOVREL-NEXT: v_cndmask_b32_e64 v4, v7, v0, s0 ; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s12, 2 +; MOVREL-NEXT: v_cndmask_b32_e64 v5, v8, v1, s0 ; MOVREL-NEXT: v_cmp_eq_u32_e64 s0, s12, 3 ; MOVREL-NEXT: v_readfirstlane_b32 s2, v4 -; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 -; MOVREL-NEXT: v_cndmask_b32_e32 v6, v7, v0, vcc_lo -; MOVREL-NEXT: v_cndmask_b32_e32 v7, v8, v1, vcc_lo -; MOVREL-NEXT: v_cndmask_b32_e64 v8, v9, v0, s0 -; MOVREL-NEXT: v_cndmask_b32_e64 v9, v10, v1, s0 -; MOVREL-NEXT: v_cndmask_b32_e64 v0, v11, v0, s1 -; MOVREL-NEXT: v_cndmask_b32_e64 v1, v12, v1, s1 +; MOVREL-NEXT: v_cndmask_b32_e32 v6, v9, v0, vcc_lo +; MOVREL-NEXT: v_cndmask_b32_e32 v7, v10, v1, vcc_lo +; MOVREL-NEXT: v_cndmask_b32_e64 v8, v11, v0, s0 +; MOVREL-NEXT: v_cndmask_b32_e64 v9, v12, v1, s0 +; MOVREL-NEXT: v_cndmask_b32_e64 v0, v13, v0, s1 +; MOVREL-NEXT: v_cndmask_b32_e64 v1, v14, v1, s1 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v2 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 ; MOVREL-NEXT: v_readfirstlane_b32 s4, v6 ; MOVREL-NEXT: v_readfirstlane_b32 s5, v7 ; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 @@ -4448,7 +4448,7 @@ define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_s(<5 x double> %vec, ; MOVREL-LABEL: dyn_insertelement_v5f64_v_v_s: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 0 -; MOVREL-NEXT: v_mov_b32_e32 v13, v2 +; MOVREL-NEXT: v_mov_b32_e32 v15, v2 ; MOVREL-NEXT: v_mov_b32_e32 v14, v3 ; MOVREL-NEXT: ; implicit-def: $vcc_hi ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo @@ -4457,7 +4457,7 @@ define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_s(<5 x double> %vec, ; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v1 ; MOVREL-NEXT: v_cndmask_b32_e32 v3, v14, v11, vcc_lo -; MOVREL-NEXT: v_cndmask_b32_e32 v2, v13, v10, vcc_lo +; MOVREL-NEXT: v_cndmask_b32_e32 v2, v15, v10, vcc_lo ; MOVREL-NEXT: v_cmp_eq_u32_e64 vcc_lo, s2, 2 ; MOVREL-NEXT: v_readfirstlane_b32 s3, v3 ; MOVREL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc_lo @@ -4514,7 +4514,7 @@ define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, ; MOVREL-LABEL: dyn_insertelement_v5f64_v_v_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v12 -; MOVREL-NEXT: v_mov_b32_e32 v13, v2 +; MOVREL-NEXT: v_mov_b32_e32 v15, v2 ; MOVREL-NEXT: v_mov_b32_e32 v14, v3 ; MOVREL-NEXT: ; implicit-def: $vcc_hi ; MOVREL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc_lo @@ -4522,7 +4522,7 @@ define amdgpu_ps <5 x double> @dyn_insertelement_v5f64_v_v_v(<5 x double> %vec, ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12 ; MOVREL-NEXT: v_readfirstlane_b32 s0, v0 ; MOVREL-NEXT: v_readfirstlane_b32 s1, v1 -; MOVREL-NEXT: v_cndmask_b32_e32 v2, v13, v10, vcc_lo +; MOVREL-NEXT: v_cndmask_b32_e32 v2, v15, v10, vcc_lo ; MOVREL-NEXT: v_cndmask_b32_e32 v3, v14, v11, vcc_lo ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 2, v12 ; MOVREL-NEXT: v_readfirstlane_b32 s2, v2 diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir new file mode 100644 index 0000000000000..49d6a9ad19717 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir @@ -0,0 +1,69 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -verify-machineinstrs -run-pass greedy,amdgpu-regbanks-reassign,virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s + + +# Test that subreg reassignments are correctly handled when whole register also +# conflicts. If this is mishandled stall counts will be incorrect and cause an +# infinite loop. +# GCN-LABEL: vgpr64_mixed_use{{$}} +# GCN: $vgpr0_vgpr1 = IMPLICIT_DEF +# GCN: $vgpr4_vgpr5 = IMPLICIT_DEF +# GCN: $vcc = IMPLICIT_DEF +# GCN: $vgpr2_vgpr3 = IMPLICIT_DEF +# GCN: $vgpr6_vgpr7 = IMPLICIT_DEF +# GCN: $vgpr8_vgpr9_vgpr10_vgpr11 = IMPLICIT_DEF +# GCN: $vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF +# GCN: $vgpr16_vgpr17_vgpr18_vgpr19 = IMPLICIT_DEF +# GCN: $vgpr20_vgpr21_vgpr22_vgpr23 = IMPLICIT_DEF +# GCN: $vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF +# GCN: $vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF +# GCN: $vgpr32_vgpr33_vgpr34_vgpr35 = IMPLICIT_DEF +# GCN: $vgpr36_vgpr37_vgpr38_vgpr39 = IMPLICIT_DEF +# GCN: $vgpr40_vgpr41_vgpr42_vgpr43 = IMPLICIT_DEF +# GCN: $vgpr44_vgpr45_vgpr46_vgpr47 = IMPLICIT_DEF +# GCN: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr5, $vcc, implicit $exec +# GCN: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr4, killed $vcc, implicit $exec +# GCN: $sgpr0_sgpr1 = V_CMP_LT_U64_e64 $vgpr4_vgpr5, $vgpr0_vgpr1, implicit $exec +--- +name: vgpr64_mixed_use +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_64, preferred-register: '$vgpr0_vgpr1' } + - { id: 1, class: vreg_64, preferred-register: '$vgpr4_vgpr5' } + - { id: 2, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: sreg_64_xexec } + - { id: 6, class: vreg_64, preferred-register: '$vgpr2_vgpr3' } + - { id: 7, class: vreg_64, preferred-register: '$vgpr6_vgpr7' } + - { id: 8, class: vreg_128, preferred-register: '$vgpr8_vgpr9_vgpr10_vgpr11' } + - { id: 9, class: vreg_128, preferred-register: '$vgpr12_vgpr13_vgpr14_vgpr15' } + - { id: 10, class: vreg_128, preferred-register: '$vgpr16_vgpr17_vgpr18_vgpr19' } + - { id: 11, class: vreg_128, preferred-register: '$vgpr20_vgpr21_vgpr22_vgpr23' } + - { id: 12, class: vreg_128, preferred-register: '$vgpr24_vgpr25_vgpr26_vgpr27' } + - { id: 13, class: vreg_128, preferred-register: '$vgpr28_vgpr29_vgpr30_vgpr31' } + - { id: 14, class: vreg_128, preferred-register: '$vgpr32_vgpr33_vgpr34_vgpr35' } + - { id: 15, class: vreg_128, preferred-register: '$vgpr36_vgpr37_vgpr38_vgpr39' } + - { id: 16, class: vreg_128, preferred-register: '$vgpr40_vgpr41_vgpr42_vgpr43' } + - { id: 17, class: vreg_128, preferred-register: '$vgpr44_vgpr45_vgpr46_vgpr47' } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + %6 = IMPLICIT_DEF + %7 = IMPLICIT_DEF + %8 = IMPLICIT_DEF + %9 = IMPLICIT_DEF + %10 = IMPLICIT_DEF + %11 = IMPLICIT_DEF + %12 = IMPLICIT_DEF + %13 = IMPLICIT_DEF + %14 = IMPLICIT_DEF + %15 = IMPLICIT_DEF + %16 = IMPLICIT_DEF + %17 = IMPLICIT_DEF + %3 = V_CNDMASK_B32_e64 0, %0.sub1, 0, %1.sub1, %2, implicit $exec + %4 = V_CNDMASK_B32_e64 0, %0.sub0, 0, %1.sub0, %2, implicit $exec + %5 = V_CMP_LT_U64_e64 %1, %0, implicit $exec + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir index 0020e17a0b6fe..2078d8c22922b 100644 --- a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir +++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir @@ -494,3 +494,81 @@ body: | %2 = V_AND_B32_e32 %1, %0, implicit $exec S_ENDPGM 0 ... + +# Test that bank of subreg is considered during scavenging. +# If handled incorrectly an infinite loop occurs. +# GCN-LABEL: s0_vs_s15_16_17_sub1{{$}} +# GCN: S_AND_B32 renamable $sgpr13, $sgpr0, +--- +name: s0_vs_s15_16_17_sub1 +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_96, preferred-register: '$sgpr15_sgpr16_sgpr17' } + - { id: 1, class: sgpr_32 } +body: | + bb.0: + %0 = IMPLICIT_DEF + $sgpr0 = IMPLICIT_DEF + %1 = S_AND_B32 %0.sub1, $sgpr0, implicit-def $scc + S_ENDPGM 0 +... + +# Test that the size of subreg is correctly handled in bank calculation. +# If handled incorrectly an infinite loop occurs. +# GCN-LABEL: vgpr_sub_dependence{{$}} +# GCN: $vgpr9_vgpr10_vgpr11_vgpr12 = IMPLICIT_DEF +# GCN: $vgpr16_vgpr17 = IMPLICIT_DEF +# GCN: $vgpr14_vgpr15 = IMPLICIT_DEF +# GCN: $vgpr0_vgpr1 = IMPLICIT_DEF +# GCN: $vgpr7_vgpr8 = IMPLICIT_DEF +# GCN: $vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF +# GCN: $vgpr18_vgpr19 = IMPLICIT_DEF +# GCN: $vgpr20_vgpr21_vgpr22_vgpr23 = IMPLICIT_DEF +# GCN: $vgpr24_vgpr25_vgpr26_vgpr27 = IMPLICIT_DEF +# GCN: $vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF +# GCN: $vgpr32_vgpr33_vgpr34_vgpr35 = IMPLICIT_DEF +# GCN: $vgpr36_vgpr37_vgpr38_vgpr39 = IMPLICIT_DEF +# GCN: $vgpr40_vgpr41_vgpr42_vgpr43 = IMPLICIT_DEF +# GCN: $vgpr44_vgpr45_vgpr46_vgpr47 = IMPLICIT_DEF +# GCN: $vgpr0_vgpr1 = V_ADD_F64 0, $vgpr11_vgpr12, 0, killed $vgpr16_vgpr17, 0, 0, implicit $mode, implicit $exec +# GCN: $vgpr0_vgpr1 = V_ADD_F64 0, $vgpr9_vgpr10, 0, killed $vgpr14_vgpr15, 0, 0, implicit $mode, implicit $exec +--- +name: vgpr_sub_dependence +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_128, preferred-register: '$vgpr10_vgpr11_vgpr12_vgpr13' } + - { id: 1, class: vreg_64, preferred-register: '$vgpr16_vgpr17' } + - { id: 2, class: vreg_64, preferred-register: '$vgpr14_vgpr15' } + - { id: 3, class: vreg_64 } + - { id: 4, class: vreg_64 } + - { id: 5, class: vreg_64, preferred-register: '$vgpr0_vgpr1' } + - { id: 6, class: vreg_64, preferred-register: '$vgpr7_vgpr8' } + - { id: 7, class: vreg_128, preferred-register: '$vgpr3_vgpr4_vgpr5_vgpr6' } + - { id: 8, class: vreg_64, preferred-register: '$vgpr18_vgpr19' } + - { id: 9, class: vreg_128, preferred-register: '$vgpr20_vgpr21_vgpr22_vgpr23' } + - { id: 10, class: vreg_128, preferred-register: '$vgpr24_vgpr25_vgpr26_vgpr27' } + - { id: 11, class: vreg_128, preferred-register: '$vgpr28_vgpr29_vgpr30_vgpr31' } + - { id: 12, class: vreg_128, preferred-register: '$vgpr32_vgpr33_vgpr34_vgpr35' } + - { id: 13, class: vreg_128, preferred-register: '$vgpr36_vgpr37_vgpr38_vgpr39' } + - { id: 14, class: vreg_128, preferred-register: '$vgpr40_vgpr41_vgpr42_vgpr43' } + - { id: 15, class: vreg_128, preferred-register: '$vgpr44_vgpr45_vgpr46_vgpr47' } +body: | + bb.0: + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + %5 = IMPLICIT_DEF + %6 = IMPLICIT_DEF + %7 = IMPLICIT_DEF + %8 = IMPLICIT_DEF + %9 = IMPLICIT_DEF + %10 = IMPLICIT_DEF + %11 = IMPLICIT_DEF + %12 = IMPLICIT_DEF + %13 = IMPLICIT_DEF + %14 = IMPLICIT_DEF + %15 = IMPLICIT_DEF + %3 = V_ADD_F64 0, %0.sub2_sub3:vreg_128, 0, %1:vreg_64, 0, 0, implicit $mode, implicit $exec + %4 = V_ADD_F64 0, %0.sub0_sub1:vreg_128, 0, %2:vreg_64, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0 +... From 45c46d180e15da9974fb0177f8e4b5a293ac241a Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Mon, 3 Aug 2020 23:46:52 -0400 Subject: [PATCH 263/600] [PowerPC] mark r+i as legal address mode for vector type after pwr9 Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D84735 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 ++++++-- llvm/test/CodeGen/PowerPC/prefer-dqform.ll | 33 +++++++++++---------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 85f1630d8e223..dfe0c2dd5c9cc 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15128,9 +15128,15 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS, Instruction *I) const { - // PPC does not allow r+i addressing modes for vectors! - if (Ty->isVectorTy() && AM.BaseOffs != 0) + unsigned AS, + Instruction *I) const { + // Vector type r+i form is supported since power9 as DQ form. We don't check + // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC, + // imm form is preferred and the offset can be adjusted to use imm form later + // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and + // max offset to check legal addressing mode, we should be a little aggressive + // to contain other offsets for that LSRUse. + if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector()) return false; // PPC allows a sign-extended 16-bit immediate field. diff --git a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll index 0d1992763d0ff..79e6026365ba7 100644 --- a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll +++ b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll @@ -12,26 +12,27 @@ target triple = "powerpc64le-unknown-linux-gnu" define void @test(i32* dereferenceable(4) %.ial, i32* noalias dereferenceable(4) %.m, i32* noalias dereferenceable(4) %.n, [0 x %_elem_type_of_a]* %.a, i32* noalias dereferenceable(4) %.lda, [0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_y]* noalias %.y) { ; CHECK-P9-LABEL: test: ; CHECK-P9: .LBB0_2: # %_loop_2_do_ -; CHECK-P9: lxvx -; CHECK-P9: lxvx -; CHECK-P9-DAG: lxvx -; CHECK-P9-DAG: lxvx -; CHECK-P9-DAG: xvmaddadp -; CHECK-P9-DAG: xvmaddadp -; CHECK-P9-DAG: stxvx -; CHECK-P9: stxvx +; CHECK-P9: lxv vs1, -16(r4) +; CHECK-P9: lxv vs2, 0(r4) +; CHECK-P9-DAG: lxv vs3, -16(r3) +; CHECK-P9-DAG: lxv vs4, 0(r3) +; CHECK-P9-DAG: xvmaddadp vs1, vs3, vs1 +; CHECK-P9-DAG: stxv vs1, -16(r4) +; CHECK-P9-DAG: xvmaddadp vs2, vs4, vs0 +; CHECK-P9: stxv vs2, 0(r4) ; CHECK-P9: bdnz .LBB0_2 ; +; FIXME: use pair load/store instructions lxvp/stxvp ; CHECK-P10-LABEL: test: ; CHECK-P10: .LBB0_2: # %_loop_2_do_ -; CHECK-P10: lxvx -; CHECK-P10: lxvx -; CHECK-P10-DAG: lxvx -; CHECK-P10-DAG: lxvx -; CHECK-P10-DAG: xvmaddadp -; CHECK-P10-DAG: xvmaddadp -; CHECK-P10-DAG: stxvx -; CHECK-P10: stxvx +; CHECK-P10: lxv vs1, -16(r4) +; CHECK-P10: lxv vs2, 0(r4) +; CHECK-P10-DAG: lxv vs3, -16(r3) +; CHECK-P10-DAG: lxv vs4, 0(r3) +; CHECK-P10-DAG: xvmaddadp vs1, vs3, vs1 +; CHECK-P10-DAG: xvmaddadp vs2, vs4, vs0 +; CHECK-P10-DAG: stxv vs1, -16(r4) +; CHECK-P10: stxv vs2, 0(r4) ; CHECK-P10: bdnz .LBB0_2 test_entry: %_conv5 = ptrtoint [0 x %_elem_type_of_a]* %.a to i64 From 6bc7ea2d8d8638f5bb753715e9023df514e814e9 Mon Sep 17 00:00:00 2001 From: "Wang, Pengfei" Date: Tue, 4 Aug 2020 11:39:21 +0800 Subject: [PATCH 264/600] [X86][AVX512] Fix build fail after D81548 Test function mask_cmp_128 failed during ISEL LLVM ERROR: Cannot select: t37: v8i1 = X86ISD::KSHIFTL t48, TargetConstant:i8<4> due to v8i1 only available under AVX512DQ. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D84922 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++ llvm/test/CodeGen/X86/vector-shuffle-v1.ll | 58 ++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e9bb50aacec0e..b80a23f5a608e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38351,6 +38351,10 @@ static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL, case ISD::SHL: { // If we find a suitable source, a SHL becomes a KSHIFTL. SDValue Src0 = V.getOperand(0); + if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) || + ((VT == MVT::v32i1 || VT == MVT::v64i1) && !Subtarget.hasBWI())) + break; + if (auto *Amt = dyn_cast(V.getOperand(1))) if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget)) return DAG.getNode( diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll index 8c746a3425f51..37ada5e66bbac 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -977,5 +977,63 @@ define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x f tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3) #4 ret void } + +define void @mask_cmp_128(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %p) { +; AVX512F-LABEL: mask_cmp_128: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 +; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 +; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k0 +; AVX512F-NEXT: shlb $4, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: korw %k1, %k0, %k0 +; AVX512F-NEXT: kshiftlw $8, %k0, %k0 +; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vmovaps %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: mask_cmp_128: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vcmpltps %xmm1, %xmm0, %k0 +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: vcmpltps %xmm3, %xmm2, %k0 +; AVX512VL-NEXT: shlb $4, %al +; AVX512VL-NEXT: kmovw %eax, %k1 +; AVX512VL-NEXT: korw %k1, %k0, %k1 +; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vmovaps %ymm0, (%rdi) {%k1} +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; VL_BW_DQ-LABEL: mask_cmp_128: +; VL_BW_DQ: # %bb.0: # %entry +; VL_BW_DQ-NEXT: vcmpltps %xmm1, %xmm0, %k0 +; VL_BW_DQ-NEXT: vcmpltps %xmm3, %xmm2, %k1 +; VL_BW_DQ-NEXT: kshiftlb $4, %k0, %k0 +; VL_BW_DQ-NEXT: korb %k0, %k1, %k1 +; VL_BW_DQ-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; VL_BW_DQ-NEXT: vmovaps %ymm0, (%rdi) {%k1} +; VL_BW_DQ-NEXT: vzeroupper +; VL_BW_DQ-NEXT: retq +entry: + %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 1, i8 -1) + %1 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %c, <4 x float> %d, i32 1, i8 -1) + %shl = shl nuw i8 %0, 4 + %or = or i8 %1, %shl + %2 = bitcast float* %p to <8 x float>* + %3 = bitcast i8 %or to <8 x i1> + tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %2, i32 64, <8 x i1> %3) + ret void +} +declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, i8) declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, i8) +declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>) declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) From b959906cb9e79f844ea4e34aa701f21bad007253 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 3 Aug 2020 21:32:44 -0700 Subject: [PATCH 265/600] [PGO] Use multiple comdat groups for COFF D84723 caused multiple definition issues (related to comdat) on Windows: http://lab.llvm.org:8011/builders/sanitizer-windows/builds/67465 --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 3 ++- llvm/test/Instrumentation/InstrProfiling/PR23499.ll | 2 +- llvm/test/Instrumentation/InstrProfiling/comdat.ll | 4 ++-- llvm/test/Instrumentation/InstrProfiling/linkage.ll | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 623f463293cd9..42939c8dbc017 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -885,7 +885,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { std::string DataVarName = getVarName(Inc, getInstrProfDataVarPrefix()); auto MaybeSetComdat = [=](GlobalVariable *GV) { if (NeedComdat) - GV->setComdat(M->getOrInsertComdat(DataVarName)); + GV->setComdat(M->getOrInsertComdat(TT.isOSBinFormatCOFF() ? GV->getName() + : DataVarName)); }; uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); diff --git a/llvm/test/Instrumentation/InstrProfiling/PR23499.ll b/llvm/test/Instrumentation/InstrProfiling/PR23499.ll index 098153f39e1db..88e8426ef6cb5 100644 --- a/llvm/test/Instrumentation/InstrProfiling/PR23499.ll +++ b/llvm/test/Instrumentation/InstrProfiling/PR23499.ll @@ -20,7 +20,7 @@ $_Z3barIvEvv = comdat any ; COFF-NOT: __profn__Z3barIvEvv -; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}prfc$M", comdat($__profd__Z3barIvEvv), align 8 +; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}prfc$M", comdat, align 8 ; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i64, i64, i64*, i8*, i8*, i32, [2 x i16] } { i64 4947693190065689389, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8*{{.*}}, i8* null, i32 1, [2 x i16] zeroinitializer }, section "{{.*}}prfd{{.*}}", comdat, align 8 diff --git a/llvm/test/Instrumentation/InstrProfiling/comdat.ll b/llvm/test/Instrumentation/InstrProfiling/comdat.ll index dfcd71fd06291..81c6db7f730d0 100644 --- a/llvm/test/Instrumentation/InstrProfiling/comdat.ll +++ b/llvm/test/Instrumentation/InstrProfiling/comdat.ll @@ -17,7 +17,7 @@ $foo_inline = comdat any ; ELF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat($__profd_foo_inline), align 8 ; ELF: @__profd_foo_inline = linkonce_odr hidden global{{.*}}, section "__llvm_prf_data", comdat, align 8 -; COFF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat($__profd_foo_inline), align 8 +; COFF: @__profc_foo_inline = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat, align 8 ; COFF: @__profd_foo_inline = linkonce_odr hidden global{{.*}}, section ".lprfd$M", comdat, align 8 define weak_odr void @foo_inline() comdat { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_inline, i32 0, i32 0), i64 0, i32 1, i32 0) @@ -30,7 +30,7 @@ $foo_extern = comdat any ; ELF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section "__llvm_prf_cnts", comdat($__profd_foo_extern), align 8 ; ELF: @__profd_foo_extern = linkonce_odr hidden global{{.*}}, section "__llvm_prf_data", comdat, align 8 -; COFF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat($__profd_foo_extern), align 8 +; COFF: @__profc_foo_extern = linkonce_odr hidden global{{.*}}, section ".lprfc$M", comdat, align 8 ; COFF: @__profd_foo_extern = linkonce_odr hidden global{{.*}}, section ".lprfd$M", comdat, align 8 define available_externally void @foo_extern() { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_extern, i32 0, i32 0), i64 0, i32 1, i32 0) diff --git a/llvm/test/Instrumentation/InstrProfiling/linkage.ll b/llvm/test/Instrumentation/InstrProfiling/linkage.ll index 5e55ef716820e..6320060afe42d 100644 --- a/llvm/test/Instrumentation/InstrProfiling/linkage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/linkage.ll @@ -59,7 +59,7 @@ define linkonce_odr void @foo_inline() { ; LINUX: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_data", comdat, align 8 ; MACHO: @__profc_foo_extern = linkonce_odr hidden global ; MACHO: @__profd_foo_extern = linkonce_odr hidden global -; COFF: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfc$M", comdat($__profd_foo_extern), align 8 +; COFF: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfc$M", comdat, align 8 ; COFF: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfd$M", comdat, align 8 define available_externally void @foo_extern() { call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_extern, i32 0, i32 0), i64 0, i32 1, i32 0) From 25abd1994ed209c1bf4139946a42e36a42143a85 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Tue, 4 Aug 2020 12:42:09 +0800 Subject: [PATCH 266/600] [YAMLParser] Fix a typo: iff -> if. NFC. --- llvm/include/llvm/Support/YAMLParser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h index 53009d7ff4aad..44daf7850904a 100644 --- a/llvm/include/llvm/Support/YAMLParser.h +++ b/llvm/include/llvm/Support/YAMLParser.h @@ -222,7 +222,7 @@ class ScalarNode final : public Node { /// Gets the value of this node as a StringRef. /// - /// \param Storage is used to store the content of the returned StringRef iff + /// \param Storage is used to store the content of the returned StringRef if /// it requires any modification from how it appeared in the source. /// This happens with escaped characters and multi-line literals. StringRef getValue(SmallVectorImpl &Storage) const; From 4ede3968498174f35f8456cd4bf95d14811d40d1 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Mon, 3 Aug 2020 03:21:01 -0400 Subject: [PATCH 267/600] [clang] Include trailing-requires-clause in FunctionDecl's source range Fixes https://github.com/clangd/clangd/issues/476 Differential Revision: https://reviews.llvm.org/D85108 --- .../clangd/unittests/FindTargetTests.cpp | 19 ++++++++++++++++--- clang/include/clang/Sema/DeclSpec.h | 2 ++ clang/test/AST/ast-dump-concepts.cpp | 9 +++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index 8b872d6314d45..3421b9cec2d30 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -405,6 +405,11 @@ TEST_F(TargetDeclTest, ClassTemplate) { } TEST_F(TargetDeclTest, Concept) { + Flags.push_back("-std=c++20"); + + // FIXME: Should we truncate the pretty-printed form of a concept decl + // somewhere? + Code = R"cpp( template concept Fooable = requires (T t) { t.foo(); }; @@ -414,12 +419,20 @@ TEST_F(TargetDeclTest, Concept) { t.foo(); } )cpp"; - Flags.push_back("-std=c++20"); EXPECT_DECLS( "ConceptSpecializationExpr", - // FIXME: Should we truncate the pretty-printed form of a concept decl - // somewhere? {"template concept Fooable = requires (T t) { t.foo(); };"}); + + // trailing requires clause + Code = R"cpp( + template + concept Fooable = true; + + template + void foo() requires [[Fooable]]; + )cpp"; + EXPECT_DECLS("ConceptSpecializationExpr", + {"template concept Fooable = true;"}); } TEST_F(TargetDeclTest, FunctionTemplate) { diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h index 0a22b5af7c644..93a9126096557 100644 --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -2448,6 +2448,8 @@ class Declarator { /// \brief Sets a trailing requires clause for this declarator. void setTrailingRequiresClause(Expr *TRC) { TrailingRequiresClause = TRC; + + SetRangeEnd(TRC->getEndLoc()); } /// \brief Sets a trailing requires clause for this declarator. diff --git a/clang/test/AST/ast-dump-concepts.cpp b/clang/test/AST/ast-dump-concepts.cpp index 3429fa6b46be5..7050ee0fb4492 100644 --- a/clang/test/AST/ast-dump-concepts.cpp +++ b/clang/test/AST/ast-dump-concepts.cpp @@ -24,4 +24,13 @@ struct Foo { // CHECK-NEXT: `-ConceptSpecializationExpr {{.*}} 'bool' template Foo(R); + + // CHECK: FunctionTemplateDecl {{.*}} {{.*}} Foo + template + Foo(R, int) requires unary_concept; + + // CHECK: FunctionTemplateDecl {{.*}} {{.*}} Foo + template + Foo(R, char) requires unary_concept { + } }; From 689096965d9af1b00cd90f117c57fd9e7d07a479 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Mon, 3 Aug 2020 07:57:06 +0000 Subject: [PATCH 268/600] [mlir][Linalg] Conv ops lowering to std calls added. Lowering of newly defined Conv ops in TC syntax to standard dialect is not supported and therefore this commit adds support for it. Differential Revision: https://reviews.llvm.org/D84840 --- .../Conversion/LinalgToStandard/LinalgToStandard.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp index 55ffa3f8b6e61..d56dffdd0dc17 100644 --- a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp +++ b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp @@ -244,7 +244,16 @@ void mlir::populateLinalgToStandardConversionPatterns( LinalgOpConversion, LinalgOpConversion, LinalgOpConversion, - LinalgOpConversion>(ctx); + LinalgOpConversion, + LinalgOpConversion, + LinalgOpConversion, + LinalgOpConversion, + LinalgOpConversion, + LinalgOpConversion, + LinalgOpConversion, + LinalgOpConversion, + LinalgOpConversion, + LinalgOpConversion>(ctx); // clang-format on } From 6f97103b561cb14e26aafa3b90ecec97f1d08944 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 30 Jul 2020 23:46:26 +0900 Subject: [PATCH 269/600] [JumpThreading] Don't limit the type of an operand Compared to the optimized code with branch conditions never frozen, limiting the type of freeze's operand causes generation of suboptimal code in some cases. I would like to suggest removing the constraint, as this patch does. If the number of freeze instructions becomes significant, this can be revisited. Differential Revision: https://reviews.llvm.org/D84949 --- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 4 ---- llvm/test/Transforms/JumpThreading/freeze.ll | 16 +++++----------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 2a5bbfff1e1e7..f42d4841f7939 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -693,12 +693,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( return true; } - // Handle Freeze instructions, in a manner similar to Cast. if (FreezeInst *FI = dyn_cast(I)) { Value *Source = FI->getOperand(0); - if (!isa(Source) && !isa(Source) && - !isa(Source)) - return false; ComputeValueKnownInPredecessorsImpl(Source, BB, Result, Preference, RecursionSet, CxtI); diff --git a/llvm/test/Transforms/JumpThreading/freeze.ll b/llvm/test/Transforms/JumpThreading/freeze.ll index 99df46f38a8ec..99fa058ade814 100644 --- a/llvm/test/Transforms/JumpThreading/freeze.ll +++ b/llvm/test/Transforms/JumpThreading/freeze.ll @@ -85,20 +85,14 @@ F2: define i32 @test1_cast2(i1 %cond) { ; CHECK-LABEL: @test1_cast2( -; CHECK-NEXT: br i1 [[COND:%.*]], label [[MERGE_THREAD:%.*]], label [[MERGE:%.*]] -; CHECK: Merge.thread: -; CHECK-NEXT: [[V1:%.*]] = call i32 @f1() -; CHECK-NEXT: br label [[T2:%.*]] -; CHECK: Merge: -; CHECK-NEXT: [[V2:%.*]] = call i32 @f2() -; CHECK-NEXT: [[A0_FR:%.*]] = freeze i32 0 -; CHECK-NEXT: [[A_FR:%.*]] = trunc i32 [[A0_FR]] to i1 -; CHECK-NEXT: br i1 [[A_FR]], label [[T2]], label [[F2:%.*]] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[T2:%.*]], label [[F2:%.*]] ; CHECK: T2: -; CHECK-NEXT: [[B5:%.*]] = phi i32 [ [[V1]], [[MERGE_THREAD]] ], [ [[V2]], [[MERGE]] ] +; CHECK-NEXT: [[V1:%.*]] = call i32 @f1() ; CHECK-NEXT: call void @f3() -; CHECK-NEXT: ret i32 [[B5]] +; CHECK-NEXT: ret i32 [[V1]] ; CHECK: F2: +; CHECK-NEXT: [[V2:%.*]] = call i32 @f2() +; CHECK-NEXT: [[A0_FR:%.*]] = freeze i32 0 ; CHECK-NEXT: ret i32 [[V2]] ; br i1 %cond, label %T1, label %F1 From 6a78a8dd378b77be002f0243d80322bbdd6513c5 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 4 Aug 2020 15:48:16 +0800 Subject: [PATCH 270/600] [NFC] [PowerPC] Refactor fp/int conversion lowering For FP_TO_INT and INT_TO_FP lowering, we have direct-move and non-direct-move methods. But they share some conversion logic, so we can reduce redundant code by introducing new methods. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D81818 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 169 ++++++++------------ 1 file changed, 66 insertions(+), 103 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index dfe0c2dd5c9cc..ac7f4f9c34f92 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7915,36 +7915,41 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { return Op; } -void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, - SelectionDAG &DAG, - const SDLoc &dl) const { - assert(Op.getOperand(0).getValueType().isFloatingPoint()); +static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + SDLoc dl(Op); + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT; SDValue Src = Op.getOperand(0); + assert(Src.getValueType().isFloatingPoint()); if (Src.getValueType() == MVT::f32) Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); - - SDValue Tmp; + SDValue Conv; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: - Tmp = DAG.getNode( - Op.getOpcode() == ISD::FP_TO_SINT - ? PPCISD::FCTIWZ - : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), + Conv = DAG.getNode( + IsSigned ? PPCISD::FCTIWZ + : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: - assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && + assert((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); - Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : - PPCISD::FCTIDUZ, - dl, MVT::f64, Src); - break; + Conv = DAG.getNode(IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, dl, + MVT::f64, Src); } + return Conv; +} + +void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, + SelectionDAG &DAG, + const SDLoc &dl) const { + SDValue Tmp = convertFPToInt(Op, DAG, Subtarget); + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT; // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && - (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); + (IsSigned || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast(FIPtr)->getIndex(); MachinePointerInfo MPI = @@ -7985,51 +7990,25 @@ SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); - SDValue Src = Op.getOperand(0); - - if (Src.getValueType() == MVT::f32) - Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); - - SDValue Tmp; - switch (Op.getSimpleValueType().SimpleTy) { - default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); - case MVT::i32: - Tmp = DAG.getNode( - Op.getOpcode() == ISD::FP_TO_SINT - ? PPCISD::FCTIWZ - : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), - dl, MVT::f64, Src); - Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); - break; - case MVT::i64: - assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && - "i64 FP_TO_UINT is supported only with FPCVT"); - Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : - PPCISD::FCTIDUZ, - dl, MVT::f64, Src); - Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); - break; - } - return Tmp; + return DAG.getNode(PPCISD::MFVSR, dl, Op.getSimpleValueType().SimpleTy, + convertFPToInt(Op, DAG, Subtarget)); } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - + SDValue Src = Op.getOperand(0); // FP to INT conversions are legal for f128. - if (Op->getOperand(0).getValueType() == MVT::f128) + if (Src.getValueType() == MVT::f128) return Op; // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). - if (Op.getOperand(0).getValueType() == MVT::ppcf128) { + if (Src.getValueType() == MVT::ppcf128) { if (Op.getValueType() == MVT::i32) { if (Op.getOpcode() == ISD::FP_TO_SINT) { - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, - MVT::f64, Op.getOperand(0), + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(0, dl)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, - MVT::f64, Op.getOperand(0), + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(1, dl)); // Add the two halves of the long double in round-to-zero mode. @@ -8045,15 +8024,12 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. // TODO: Are there fast-math-flags to propagate to this FSUB? - SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, - Op.getOperand(0), Tmp); + SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Tmp); True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, DAG.getConstant(0x80000000, dl, MVT::i32)); - SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, - Op.getOperand(0)); - return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False, - ISD::SETGE); + SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); + return DAG.getSelectCC(dl, Src, Tmp, True, False, ISD::SETGE); } } @@ -8172,6 +8148,19 @@ bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const { return false; } +static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP; + SDLoc dl(Op); + // If we have FCFIDS, then use it when converting to single-precision. + // Otherwise, convert to double-precision and then round. + bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT(); + unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS) + : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU); + EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64; + return DAG.getNode(ConvOpc, dl, ConvTy, Src); +} + /// Custom lowers integer to floating point conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. @@ -8183,25 +8172,12 @@ SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, "Invalid floating point type as target of conversion"); assert(Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"); - SDValue FP; SDValue Src = Op.getOperand(0); - bool SinglePrec = Op.getValueType() == MVT::f32; bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; bool Signed = Op.getOpcode() == ISD::SINT_TO_FP; - unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) : - (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU); - - if (WordInt) { - FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ, - dl, MVT::f64, Src); - FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); - } - else { - FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src); - FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP); - } - - return FP; + unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA; + SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src); + return convertIntToFP(Op, Mov, DAG, Subtarget); } static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) { @@ -8277,8 +8253,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + SDValue Src = Op.getOperand(0); + bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP; - EVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Src.getValueType(); EVT OutVT = Op.getValueType(); if (OutVT.isVector() && OutVT.isFloatingPoint() && isOperationCustom(Op.getOpcode(), InVT)) @@ -8292,8 +8270,8 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); - if (Op.getOperand(0).getValueType() == MVT::i1) - return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), + if (Src.getValueType() == MVT::i1) + return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src, DAG.getConstantFP(1.0, dl, Op.getValueType()), DAG.getConstantFP(0.0, dl, Op.getValueType())); @@ -8303,22 +8281,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, Subtarget.isPPC64() && Subtarget.hasFPCVT()) return LowerINT_TO_FPDirectMove(Op, DAG, dl); - assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && + assert((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); - // If we have FCFIDS, then use it when converting to single-precision. - // Otherwise, convert to double-precision and then round. - unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) - ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS - : PPCISD::FCFIDS) - : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU - : PPCISD::FCFID); - MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) - ? MVT::f32 - : MVT::f64; - - if (Op.getOperand(0).getValueType() == MVT::i64) { - SDValue SINT = Op.getOperand(0); + if (Src.getValueType() == MVT::i64) { + SDValue SINT = Src; // When converting to single-precision, we actually need to convert // to double-precision first and then round to single-precision. // To avoid double-rounding effects during that operation, we have @@ -8431,7 +8398,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, } else Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); - SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); + SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, @@ -8439,7 +8406,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, return FP; } - assert(Op.getOperand(0).getValueType() == MVT::i32 && + assert(Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the @@ -8453,15 +8420,13 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { ReuseLoadInfo RLI; bool ReusingLoad; - if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, - DAG))) { + if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) { int FrameIdx = MFI.CreateStackObject(4, Align(4), false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, - MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), FrameIdx)); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Src, FIdx, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FrameIdx)); assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); @@ -8477,10 +8442,9 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; - Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? - PPCISD::LFIWZX : PPCISD::LFIWAX, - dl, DAG.getVTList(MVT::f64, MVT::Other), - Ops, MVT::i32, MMO); + Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl, + DAG.getVTList(MVT::f64, MVT::Other), Ops, + MVT::i32, MMO); if (ReusingLoad) spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); } else { @@ -8490,8 +8454,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, int FrameIdx = MFI.CreateStackObject(8, Align(8), false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, - Op.getOperand(0)); + SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src); // STD the extended value into the stack slot. SDValue Store = DAG.getStore( @@ -8505,7 +8468,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, } // FCFID it and return it. - SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); + SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); From 1ea84653378132091b5b6d31d4f6bf3ec7da7b56 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 4 Aug 2020 17:07:28 +0900 Subject: [PATCH 271/600] [JumpThreading] Merge/rename thread-two-bbsN.ll tests; NFC --- ...ead-two-bbs5.ll => thread-two-bbs-cuda.ll} | 0 ...ead-two-bbs3.ll => thread-two-bbs-msvc.ll} | 0 .../JumpThreading/thread-two-bbs.ll | 184 ++++++++++++++++++ .../JumpThreading/thread-two-bbs1.ll | 59 ------ .../JumpThreading/thread-two-bbs2.ll | 56 ------ .../JumpThreading/thread-two-bbs4.ll | 43 ---- .../JumpThreading/thread-two-bbs6.ll | 42 ---- 7 files changed, 184 insertions(+), 200 deletions(-) rename llvm/test/Transforms/JumpThreading/{thread-two-bbs5.ll => thread-two-bbs-cuda.ll} (100%) rename llvm/test/Transforms/JumpThreading/{thread-two-bbs3.ll => thread-two-bbs-msvc.ll} (100%) create mode 100644 llvm/test/Transforms/JumpThreading/thread-two-bbs.ll delete mode 100644 llvm/test/Transforms/JumpThreading/thread-two-bbs1.ll delete mode 100644 llvm/test/Transforms/JumpThreading/thread-two-bbs2.ll delete mode 100644 llvm/test/Transforms/JumpThreading/thread-two-bbs4.ll delete mode 100644 llvm/test/Transforms/JumpThreading/thread-two-bbs6.ll diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs5.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs-cuda.ll similarity index 100% rename from llvm/test/Transforms/JumpThreading/thread-two-bbs5.ll rename to llvm/test/Transforms/JumpThreading/thread-two-bbs-cuda.ll diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs3.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs-msvc.ll similarity index 100% rename from llvm/test/Transforms/JumpThreading/thread-two-bbs3.ll rename to llvm/test/Transforms/JumpThreading/thread-two-bbs-msvc.ll diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs.ll new file mode 100644 index 0000000000000..94832b6502ee9 --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/thread-two-bbs.ll @@ -0,0 +1,184 @@ +; RUN: opt < %s -jump-threading -S -verify | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = global i32 0, align 4 + +define void @foo(i32 %cond1, i32 %cond2) { +; CHECK-LABEL: @foo +; CHECK-LABEL: entry +entry: + %tobool = icmp eq i32 %cond1, 0 + br i1 %tobool, label %bb.cond2, label %bb.f1 + +bb.f1: + call void @f1() + br label %bb.cond2 +; Verify that we branch on cond2 without checking ptr. +; CHECK: call void @f1() +; CHECK-NEXT: icmp eq i32 %cond2, 0 +; CHECK-NEXT: label %bb.f4, label %bb.f2 + +bb.cond2: + %ptr = phi i32* [ null, %bb.f1 ], [ @a, %entry ] + %tobool1 = icmp eq i32 %cond2, 0 + br i1 %tobool1, label %bb.file, label %bb.f2 +; Verify that we branch on cond2 without checking ptr. +; CHECK: icmp eq i32 %cond2, 0 +; CHECK-NEXT: label %bb.f3, label %bb.f2 + +bb.f2: + call void @f2() + br label %exit + +; Verify that we eliminate this basic block. +; CHECK-NOT: bb.file: +bb.file: + %cmp = icmp eq i32* %ptr, null + br i1 %cmp, label %bb.f4, label %bb.f3 + +bb.f3: + call void @f3() + br label %exit + +bb.f4: + call void @f4() + br label %exit + +exit: + ret void +} + +declare void @f1() + +declare void @f2() + +declare void @f3() + +declare void @f4() + + +define void @foo2(i32 %cond1, i32 %cond2) { +; CHECK-LABEL: @foo2 +; CHECK-LABEL: entry +entry: + %tobool = icmp ne i32 %cond1, 0 + br i1 %tobool, label %bb.f1, label %bb.f2 + +bb.f1: + call void @f1() + br label %bb.cond2 +; Verify that we branch on cond2 without checking tobool again. +; CHECK: call void @f1() +; CHECK-NEXT: icmp eq i32 %cond2, 0 +; CHECK-NEXT: label %exit, label %bb.f3 + +bb.f2: + call void @f2() + br label %bb.cond2 +; Verify that we branch on cond2 without checking tobool again. +; CHECK: call void @f2() +; CHECK-NEXT: icmp eq i32 %cond2, 0 +; CHECK-NEXT: label %exit, label %bb.f4 + +bb.cond2: + %tobool1 = icmp eq i32 %cond2, 0 + br i1 %tobool1, label %exit, label %bb.cond1again + +; Verify that we eliminate this basic block. +; CHECK-NOT: bb.cond1again: +bb.cond1again: + br i1 %tobool, label %bb.f3, label %bb.f4 + +bb.f3: + call void @f3() + br label %exit + +bb.f4: + call void @f4() + br label %exit + +exit: + ret void +} + + +; Verify that we do *not* thread any edge. We used to evaluate +; constant expressions like: +; +; icmp ugt i8* null, inttoptr (i64 4 to i8*) +; +; as "true", causing jump threading to a wrong destination. +define void @foo3(i8* %arg1, i8* %arg2) { +; CHECK-LABEL: @foo +; CHECK-NOT: bb_{{[^ ]*}}.thread: +entry: + %cmp1 = icmp eq i8* %arg1, null + br i1 %cmp1, label %bb_bar1, label %bb_end + +bb_bar1: + call void @bar(i32 1) + br label %bb_end + +bb_end: + %cmp2 = icmp ne i8* %arg2, null + br i1 %cmp2, label %bb_cont, label %bb_bar2 + +bb_bar2: + call void @bar(i32 2) + br label %bb_exit + +bb_cont: + %cmp3 = icmp ule i8* %arg1, inttoptr (i64 4 to i8*) + br i1 %cmp3, label %bb_exit, label %bb_bar3 + +bb_bar3: + call void @bar(i32 3) + br label %bb_exit + +bb_exit: + ret void +} + +declare void @bar(i32) + + +;; Test that we skip unconditional PredBB when threading jumps through two +;; successive basic blocks. + +define i32 @foo4(i32* %0) { +; CHECK-LABEL: @f +; CHECK: br i1 %good, label %pred.bb, label %pred.pred.bb +entry: + %size = call i64 @get_size(i32* %0) + %good = icmp ugt i64 %size, 3 + br i1 %good, label %pred.bb, label %pred.pred.bb + +; CHECK: pred.pred.bb: +; CHECK: br label %pred.bb +; CHECK: pred.bb: +; CHECK: br label %bb +; CHECK: bb: +pred.pred.bb: ; preds = %entry + call void @effect() + br label %pred.bb +pred.bb: ; preds = %pred.pred.bb, %entry + %v = load i32, i32* %0 + br label %bb + +bb: ; preds = %pred.bb + call void @effect1(i8* blockaddress(@foo4, %bb)) + br i1 %good, label %cont2, label %cont1 + +cont1: ; preds = %bb + br i1 %good, label %exit, label %cont2 +cont2: ; preds = %bb + br label %exit +exit: ; preds = %cont1, %cont2 + ret i32 %v +} + +declare i64 @get_size(i32*) +declare void @effect() +declare void @effect1(i8*) diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs1.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs1.ll deleted file mode 100644 index 1b5f5cb14aeeb..0000000000000 --- a/llvm/test/Transforms/JumpThreading/thread-two-bbs1.ll +++ /dev/null @@ -1,59 +0,0 @@ -; RUN: opt < %s -jump-threading -S -verify | FileCheck %s - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@a = global i32 0, align 4 - -define void @foo(i32 %cond1, i32 %cond2) { -; CHECK-LABEL: @foo -; CHECK-LABEL: entry -entry: - %tobool = icmp eq i32 %cond1, 0 - br i1 %tobool, label %bb.cond2, label %bb.f1 - -bb.f1: - call void @f1() - br label %bb.cond2 -; Verify that we branch on cond2 without checking ptr. -; CHECK: call void @f1() -; CHECK-NEXT: icmp eq i32 %cond2, 0 -; CHECK-NEXT: label %bb.f4, label %bb.f2 - -bb.cond2: - %ptr = phi i32* [ null, %bb.f1 ], [ @a, %entry ] - %tobool1 = icmp eq i32 %cond2, 0 - br i1 %tobool1, label %bb.file, label %bb.f2 -; Verify that we branch on cond2 without checking ptr. -; CHECK: icmp eq i32 %cond2, 0 -; CHECK-NEXT: label %bb.f3, label %bb.f2 - -bb.f2: - call void @f2() - br label %exit - -; Verify that we eliminate this basic block. -; CHECK-NOT: bb.file: -bb.file: - %cmp = icmp eq i32* %ptr, null - br i1 %cmp, label %bb.f4, label %bb.f3 - -bb.f3: - call void @f3() - br label %exit - -bb.f4: - call void @f4() - br label %exit - -exit: - ret void -} - -declare void @f1() - -declare void @f2() - -declare void @f3() - -declare void @f4() diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs2.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs2.ll deleted file mode 100644 index ebb7ce013eb07..0000000000000 --- a/llvm/test/Transforms/JumpThreading/thread-two-bbs2.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt < %s -jump-threading -S -verify | FileCheck %s - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define void @foo(i32 %cond1, i32 %cond2) { -; CHECK-LABEL: @foo -; CHECK-LABEL: entry -entry: - %tobool = icmp ne i32 %cond1, 0 - br i1 %tobool, label %bb.f1, label %bb.f2 - -bb.f1: - call void @f1() - br label %bb.cond2 -; Verify that we branch on cond2 without checking tobool again. -; CHECK: call void @f1() -; CHECK-NEXT: icmp eq i32 %cond2, 0 -; CHECK-NEXT: label %exit, label %bb.f3 - -bb.f2: - call void @f2() - br label %bb.cond2 -; Verify that we branch on cond2 without checking tobool again. -; CHECK: call void @f2() -; CHECK-NEXT: icmp eq i32 %cond2, 0 -; CHECK-NEXT: label %exit, label %bb.f4 - -bb.cond2: - %tobool1 = icmp eq i32 %cond2, 0 - br i1 %tobool1, label %exit, label %bb.cond1again - -; Verify that we eliminate this basic block. -; CHECK-NOT: bb.cond1again: -bb.cond1again: - br i1 %tobool, label %bb.f3, label %bb.f4 - -bb.f3: - call void @f3() - br label %exit - -bb.f4: - call void @f4() - br label %exit - -exit: - ret void -} - -declare void @f1() local_unnamed_addr - -declare void @f2() local_unnamed_addr - -declare void @f3() local_unnamed_addr - -declare void @f4() local_unnamed_addr diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs4.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs4.ll deleted file mode 100644 index 6ab757fc191de..0000000000000 --- a/llvm/test/Transforms/JumpThreading/thread-two-bbs4.ll +++ /dev/null @@ -1,43 +0,0 @@ -; RUN: opt < %s -jump-threading -S -verify | FileCheck %s - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Verify that we do *not* thread any edge. We used to evaluate -; constant expressions like: -; -; icmp ugt i8* null, inttoptr (i64 4 to i8*) -; -; as "true", causing jump threading to a wrong destination. -define void @foo(i8* %arg1, i8* %arg2) { -; CHECK-LABEL: @foo -; CHECK-NOT: bb_{{[^ ]*}}.thread: -entry: - %cmp1 = icmp eq i8* %arg1, null - br i1 %cmp1, label %bb_bar1, label %bb_end - -bb_bar1: - call void @bar(i32 1) - br label %bb_end - -bb_end: - %cmp2 = icmp ne i8* %arg2, null - br i1 %cmp2, label %bb_cont, label %bb_bar2 - -bb_bar2: - call void @bar(i32 2) - br label %bb_exit - -bb_cont: - %cmp3 = icmp ule i8* %arg1, inttoptr (i64 4 to i8*) - br i1 %cmp3, label %bb_exit, label %bb_bar3 - -bb_bar3: - call void @bar(i32 3) - br label %bb_exit - -bb_exit: - ret void -} - -declare void @bar(i32) diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs6.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs6.ll deleted file mode 100644 index 0d8d2f058b895..0000000000000 --- a/llvm/test/Transforms/JumpThreading/thread-two-bbs6.ll +++ /dev/null @@ -1,42 +0,0 @@ -;; Test that we skip unconditional PredBB when threading jumps through two -;; successive basic blocks. -; RUN: opt -S -passes='function(jump-threading)' < %s | FileCheck %s - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @f(i32* %0) { -; CHECK-LABEL: @f -; CHECK: br i1 %good, label %pred.bb, label %pred.pred.bb -entry: - %size = call i64 @get_size(i32* %0) - %good = icmp ugt i64 %size, 3 - br i1 %good, label %pred.bb, label %pred.pred.bb - -; CHECK: pred.pred.bb: -; CHECK: br label %pred.bb -; CHECK: pred.bb: -; CHECK: br label %bb -; CHECK: bb: -pred.pred.bb: ; preds = %entry - call void @effect() - br label %pred.bb -pred.bb: ; preds = %pred.pred.bb, %entry - %v = load i32, i32* %0 - br label %bb - -bb: ; preds = %pred.bb - call void @effect1(i8* blockaddress(@f, %bb)) - br i1 %good, label %cont2, label %cont1 - -cont1: ; preds = %bb - br i1 %good, label %exit, label %cont2 -cont2: ; preds = %bb - br label %exit -exit: ; preds = %cont1, %cont2 - ret i32 %v -} - -declare i64 @get_size(i32*) -declare void @effect() -declare void @effect1(i8*) From 8f576a75661668594ff0ac795e31cdb8df90780b Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 3 Aug 2020 17:21:58 +0300 Subject: [PATCH 272/600] [llvm-readobj] - Simplify findSectionByName(). NFCI. It turns out that findSectionByName can return const Elf_Shdr * instead of Expected<>, because its code never returns an error currently (it reports warnings instead). Differential revision: https://reviews.llvm.org/D85135 --- llvm/tools/llvm-readobj/ELFDumper.cpp | 44 ++++++++------------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 2edca5aaa0b9d..046ade9abf86d 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -353,7 +353,7 @@ template class ELFDumper : public ObjDumper { void printSymbolsHelper(bool IsDynamic) const; std::string getDynamicEntry(uint64_t Type, uint64_t Value) const; - Expected findSectionByName(StringRef Name) const; + const Elf_Shdr *findSectionByName(StringRef Name) const; const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; } const Elf_Shdr *getDotCGProfileSec() const { return DotCGProfileSec; } @@ -2454,7 +2454,7 @@ void printFlags(T Value, ArrayRef> Flags, raw_ostream &OS) { } template -Expected +const typename ELFT::Shdr * ELFDumper::findSectionByName(StringRef Name) const { const ELFFile *Obj = ObjF->getELFFile(); for (const Elf_Shdr &Shdr : cantFail(Obj->sections())) { @@ -3009,12 +3009,7 @@ Error MipsGOTParser::findGOT(Elf_Dyn_Range DynTable, // Find static GOT secton. if (IsStatic) { - Expected GotOrErr = Dumper.findSectionByName(".got"); - if (!GotOrErr) - return GotOrErr.takeError(); - else - GotSec = *GotOrErr; - + GotSec = Dumper.findSectionByName(".got"); if (!GotSec) return Error::success(); @@ -3333,19 +3328,14 @@ static void printMipsReginfoData(ScopedPrinter &W, template void ELFDumper::printMipsReginfo() { const ELFFile *Obj = ObjF->getELFFile(); - Expected RegInfoOrErr = findSectionByName(".reginfo"); - if (!RegInfoOrErr) { - reportUniqueWarning(RegInfoOrErr.takeError()); - return; - } - - if ((*RegInfoOrErr) == nullptr) { + const Elf_Shdr *RegInfo = findSectionByName(".reginfo"); + if (!RegInfo) { W.startLine() << "There is no .reginfo section in the file.\n"; return; } ArrayRef Sec = unwrapOrError(ObjF->getFileName(), - Obj->getSectionContents(*RegInfoOrErr)); + Obj->getSectionContents(RegInfo)); if (Sec.size() != sizeof(Elf_Mips_RegInfo)) { W.startLine() << "The .reginfo section has a wrong size.\n"; return; @@ -3358,21 +3348,16 @@ template void ELFDumper::printMipsReginfo() { template void ELFDumper::printMipsOptions() { const ELFFile *Obj = ObjF->getELFFile(); - Expected MipsOptOrErr = findSectionByName(".MIPS.options"); - if (!MipsOptOrErr) { - reportUniqueWarning(MipsOptOrErr.takeError()); - return; - } - - if ((*MipsOptOrErr) == nullptr) { + const Elf_Shdr *MipsOpts = findSectionByName(".MIPS.options"); + if (!MipsOpts) { W.startLine() << "There is no .MIPS.options section in the file.\n"; return; } DictScope GS(W, "MIPS Options"); - ArrayRef Sec = unwrapOrError(ObjF->getFileName(), - Obj->getSectionContents(*MipsOptOrErr)); + ArrayRef Sec = + unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(MipsOpts)); while (!Sec.empty()) { if (Sec.size() < sizeof(Elf_Mips_Options)) { W.startLine() << "The .MIPS.options section has a wrong size.\n"; @@ -5982,16 +5967,13 @@ template Expected *> getMipsAbiFlagsSection(const ELFObjectFile *ObjF, const ELFDumper &Dumper) { - Expected SecOrErr = - Dumper.findSectionByName(".MIPS.abiflags"); - if (!SecOrErr) - return SecOrErr.takeError(); - if (*SecOrErr == nullptr) + const typename ELFT::Shdr *Sec = Dumper.findSectionByName(".MIPS.abiflags"); + if (Sec == nullptr) return nullptr; const ELFFile *Obj = ObjF->getELFFile(); constexpr StringRef ErrPrefix = "unable to read the .MIPS.abiflags section: "; - Expected> DataOrErr = Obj->getSectionContents(*SecOrErr); + Expected> DataOrErr = Obj->getSectionContents(Sec); if (!DataOrErr) return createError(ErrPrefix + toString(DataOrErr.takeError())); From 7c4782ce91d66a8447a851362b99bb86a42b7c08 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Tue, 4 Aug 2020 09:27:01 +0100 Subject: [PATCH 273/600] [clang-tidy] Fix regression in RenamerClangTidy See bug https://bugs.llvm.org/show_bug.cgi\?id\=46976 --- clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp | 5 +++-- .../clang-tidy/checkers/readability-identifier-naming.cpp | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp index 040378d980f1a..2d67ca4a16180 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -157,6 +157,9 @@ void RenamerClangTidyCheck::addUsage( RenamerClangTidyCheck::NamingCheckFailure &Failure = NamingCheckFailures[Decl]; + if (!Failure.RawUsageLocs.insert(FixLocation.getRawEncoding()).second) + return; + if (!Failure.ShouldFix()) return; @@ -165,8 +168,6 @@ void RenamerClangTidyCheck::addUsage( if (!utils::rangeCanBeFixed(Range, SourceMgr)) Failure.FixStatus = RenamerClangTidyCheck::ShouldFixStatus::InsideMacro; - - Failure.RawUsageLocs.insert(FixLocation.getRawEncoding()); } void RenamerClangTidyCheck::addUsage(const NamedDecl *Decl, SourceRange Range, diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming.cpp index 24c1c4270dec8..fed362bbecdec 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability-identifier-naming.cpp @@ -578,3 +578,8 @@ void Foo() { #undef M1 #undef DUP } // namespace scratchspace + +template +auto GetRes(type_t& Param) -> decltype(Param.res()); +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: invalid case style for parameter 'Param' +// CHECK-FIXES: auto GetRes(type_t& a_param) -> decltype(a_param.res()); From 4bfbf74e57ad02c1b6dabe099148e079613cac54 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 4 Aug 2020 04:28:11 -0400 Subject: [PATCH 274/600] [MLIR] Add an integration test for 2 D vector.transfer_read Added a "clone" of the 1D vector's test_transfer_read and added a second dimensionality. The test is not as generic as I would like it to be, because more generic versions appear to break the compiler or the runtime at this stage. As bug are fixed, I will be happy to add another more complete test. Differential Revision: https://reviews.llvm.org/D83096 --- .../Vector/CPU/test-transfer-read-2d.mlir | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir new file mode 100644 index 0000000000000..b6146137f4e1f --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read-2d.mlir @@ -0,0 +1,59 @@ +// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @transfer_read_2d(%A : memref, %base1: index, %base2: index) { + %fm42 = constant -42.0: f32 + %f = vector.transfer_read %A[%base1, %base2], %fm42 + {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : + memref, vector<4x9xf32> + vector.print %f: vector<4x9xf32> + return +} + +func @transfer_write_2d(%A : memref, %base1: index, %base2: index) { + %fn1 = constant -1.0 : f32 + %vf0 = splat %fn1 : vector<1x4xf32> + vector.transfer_write %vf0, %A[%base1, %base2] + {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : + vector<1x4xf32>, memref + return +} + +func @entry() { + %c0 = constant 0: index + %c1 = constant 1: index + %c2 = constant 2: index + %c3 = constant 3: index + %c4 = constant 4: index + %c5 = constant 5: index + %c8 = constant 5: index + %f10 = constant 10.0: f32 + // work with dims of 4, not of 3 + %first = constant 3: index + %second = constant 4: index + %A = alloc(%first, %second) : memref + scf.for %i = %c0 to %first step %c1 { + %i32 = index_cast %i : index to i32 + %fi = sitofp %i32 : i32 to f32 + %fi10 = mulf %fi, %f10 : f32 + scf.for %j = %c0 to %second step %c1 { + %j32 = index_cast %j : index to i32 + %fj = sitofp %j32 : i32 to f32 + %fres = addf %fi10, %fj : f32 + store %fres, %A[%i, %j] : memref + } + } + // On input, memory contains [[ 0, 1, 2, ...], [10, 11, 12, ...], ...] + // Read shifted by 2 and pad with -42: + call @transfer_read_2d(%A, %c1, %c2) : (memref, index, index) -> () + // Write into memory shifted by 3 + call @transfer_write_2d(%A, %c3, %c1) : (memref, index, index) -> () + // Read shifted by 0 and pad with -42: + call @transfer_read_2d(%A, %c0, %c0) : (memref, index, index) -> () + return +} + +// CHECK: ( ( 12, 13, -42, -42, -42, -42, -42, -42, -42 ), ( 22, 23, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) +// CHECK: ( ( 0, 1, 2, 3, -42, -42, -42, -42, -42 ), ( 10, 11, 12, 13, -42, -42, -42, -42, -42 ), ( 20, 21, 22, 23, -42, -42, -42, -42, -42 ), ( -42, -42, -42, -42, -42, -42, -42, -42, -42 ) ) From 5d597e20dfdb8eea4c11375f1ae5940e151246c4 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Tue, 4 Aug 2020 16:37:20 +0800 Subject: [PATCH 275/600] [DebugInfo][unittest] Use YAML to generate the .debug_loclists section. DWARFYAML supports generating the .debug_loclists section. We can use it to simplify tests. Reviewed By: jhenderson, grimar Differential Revision: https://reviews.llvm.org/D85179 --- .../DebugInfo/DWARF/DWARFDieTest.cpp | 47 +++++++------------ 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp index 9fb6a48157cfb..1d468a956e2b7 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp @@ -46,42 +46,27 @@ TEST(DWARFDie, getLocations) { BlockData: [ 0x47 ] - Value: 20 - Value: 25 + debug_loclists: + - AddressSize: 4 + OffsetEntryCount: 0 + Lists: + - Entries: + - Operator: DW_LLE_start_length + Values: [ 0x01, 0x02 ] + - Operator: DW_LLE_end_of_list + - Entries: + - Operator: DW_LLE_startx_length + Values: [ 0x01, 0x02 ] + - Operator: DW_LLE_end_of_list + - Entries: + - Operator: DW_LLE_start_length + Values: [ 0x01, 0x02 ] + ## end_of_list intentionally missing. )"; Expected>> Sections = DWARFYAML::emitDebugSections(StringRef(yamldata), /*IsLittleEndian=*/true); ASSERT_THAT_EXPECTED(Sections, Succeeded()); - std::vector Loclists{ - // Header - 0, 0, 0, 0, // Length - 5, 0, // Version - 4, // Address size - 0, // Segment selector size - 0, 0, 0, 0, // Offset entry count - // First location list. - DW_LLE_start_length, // First entry - 1, 0, 0, 0, // Start offset - 2, // Length - 0, // Expression length - DW_LLE_end_of_list, - // Second location list. - DW_LLE_startx_length, // First entry - 1, // Start index - 2, // Length - 0, // Expression length - DW_LLE_end_of_list, - // Third location list. - DW_LLE_start_length, // First entry - 1, 0, 0, 0, // Start offset - 2, // Length - 0, // Expression length - // end_of_list intentionally missing - }; - Loclists[0] = Loclists.size() - 4; - Sections->try_emplace( - "debug_loclists", - MemoryBuffer::getMemBuffer(toStringRef(Loclists), "debug_loclists", - /*RequiresNullTerminator=*/false)); std::unique_ptr Ctx = DWARFContext::create(*Sections, 4, /*isLittleEndian=*/true); DWARFCompileUnit *CU = Ctx->getCompileUnitForOffset(0); From 94862018375f2330b070cbf36085919b13a0c7a4 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 29 Jul 2020 17:06:44 +0300 Subject: [PATCH 276/600] [llvm-readobj/readelf] - Refine the implementation of printMipsOptions(). `printMipsOptions()` and the test related has the following issues currently: 1) It does not check the value of Elf_Mips_Options::size field. 2) For ODK_REGINFO options it is possible to read past the end of buffer, because there is no check against the `sizeof(Elf_Mips_RegInfo)`. 3) The error about the broken size is just printed to the standard output. 4) The binary input is used for the test. 5) There is no testing for multiple options in the .MIPS.options section, though the code supports it. 6) Only llvm-readobj is tested, but not llvm-readelf. 7) "Unsupported MIPS options tag" message does not reveal the tag ID/name. This patch fixes all of these points. Differential revision: https://reviews.llvm.org/D84854 --- .../ELF/Inputs/options.obj.elf-mipsel | Bin 1720 -> 0 bytes .../llvm-readobj/ELF/mips-options-sec.test | 96 +++++++++++++++++- llvm/tools/llvm-readobj/ELFDumper.cpp | 72 ++++++++++--- 3 files changed, 149 insertions(+), 19 deletions(-) delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/options.obj.elf-mipsel diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/options.obj.elf-mipsel b/llvm/test/tools/llvm-readobj/ELF/Inputs/options.obj.elf-mipsel deleted file mode 100644 index 6309d87c592bd1db51a22b1ba5d863cd7ece3bbf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1720 zcmbtT-D(q25S~ri8vlbR1`)eLLQ|o~w3Rdgg`j8=e<+9`^d_5RvxaVV!|t}!8+`%a zz?-~*FJQHJ<%N$RB$r;QSG|ifn;Ca|h&MWL&NuVTH)m((?7q4G_(8@nK+J#~B$2QH zN13UA0-)K5-3FXT<#=-76yVb&4<9Ev_%JE`7-j*C+bg*`s;+kc>fZp04q<6*!pS2O z^JI&~Ir#j(lpkkH-w(4`JHj(Yc-F{-11AS#)=kyv-NfJ;u0>q;EY~%cy1u`33ZU&; zo@4IWLD=&>b6wR{#Vkj`uD4^ZRo6Ev)n&8nx_kYbDXzARrzB>Sr~YMtUkN+QYZ#eK zGOAfKOINipn1i2_G)MjhE(36?Y-boZ5ibA_9sMTr+lYDg({Pc4cxdy5KL=h-^9u=Y zGS5$taws0>FL4Mw7JP@;P75!{mz6ZE0^7AzWba3y+J3)pd+4;n5Y&^0ThEj~i13ai zvRb{aYdIklRTu>@tsQg%z>5Xt`H`(QpFUp+BdfgwgP`X{T}r7okUp|nh_CuFcC5$( z)$iD?T}R=$)mwO+Pb^7S)v9#-fA|v1XMLbAcmnUV;Ro$Adk&FW%5hv^e2u90M5!TdGfdasMUBo;*^o9gGv1R;@AVF*bdJ;wo}ctd z{m=EYA-=Vp;Jv~)it)xV+q zU+VSyryrAM@~`z*u=Xs#0LLw(MFzh!ettQQl1)U?9_4RL>k|=rH5SPINmA;CA7Fy7 SS|l-UaXrn4j6!-xz5X}SI(fAK diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-options-sec.test b/llvm/test/tools/llvm-readobj/ELF/mips-options-sec.test index ef5ece472821e..19cae2627e750 100644 --- a/llvm/test/tools/llvm-readobj/ELF/mips-options-sec.test +++ b/llvm/test/tools/llvm-readobj/ELF/mips-options-sec.test @@ -1,11 +1,22 @@ ## Check that we are able to dump the SHT_MIPS_OPTIONS section using -A properly. -# RUN: llvm-readobj -A %p/Inputs/options.obj.elf-mipsel | FileCheck %s +## Check we are able to dump multiple MIPS options properly. +# RUN: yaml2obj %s -o %t1 +# RUN: llvm-readobj -A %t1 | FileCheck %s +# RUN: llvm-readelf -A %t1 | FileCheck %s # CHECK: MIPS Options { # CHECK-NEXT: ODK_REGINFO { +# CHECK-NEXT: GP: 0x807060504030201 +# CHECK-NEXT: General Mask: 0xD0C0B0A +# CHECK-NEXT: Co-Proc Mask0: 0x88776655 +# CHECK-NEXT: Co-Proc Mask1: 0xCCBBAA99 +# CHECK-NEXT: Co-Proc Mask2: 0x1EFFEEDD +# CHECK-NEXT: Co-Proc Mask3: 0x5E4E3E2E +# CHECK-NEXT: } +# CHECK-NEXT: ODK_REGINFO { # CHECK-NEXT: GP: 0x0 -# CHECK-NEXT: General Mask: 0xF2000017 +# CHECK-NEXT: General Mask: 0x0 # CHECK-NEXT: Co-Proc Mask0: 0x0 # CHECK-NEXT: Co-Proc Mask1: 0x0 # CHECK-NEXT: Co-Proc Mask2: 0x0 @@ -14,7 +25,7 @@ # CHECK-NEXT: } ## Check that we try to dump the .MIPS.options section when we are able to locate it by name. -# RUN: yaml2obj --docnum=1 -DNAME=0xffff %s -o %t.err1 +# RUN: yaml2obj -DNAME=0xffff %s -o %t.err1 # RUN: llvm-readelf -A %t.err1 2>&1 | \ # RUN: FileCheck %s -DFILE=%t.err1 --check-prefix=NAME-ERR-FOUND --implicit-check-not=warning: # RUN: llvm-readobj -A %t.err1 2>&1 | \ @@ -23,6 +34,22 @@ # NAME-ERR-FOUND: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 1: a section [index 1] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table # NAME-ERR-FOUND-NEXT: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 3: a section [index 3] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table # NAME-ERR-FOUND: MIPS Options { +# NAME-ERR-FOUND-NEXT: ODK_REGINFO { +# NAME-ERR-FOUND-NEXT: GP: 0x807060504030201 +# NAME-ERR-FOUND-NEXT: General Mask: 0xD0C0B0A +# NAME-ERR-FOUND-NEXT: Co-Proc Mask0: 0x88776655 +# NAME-ERR-FOUND-NEXT: Co-Proc Mask1: 0xCCBBAA99 +# NAME-ERR-FOUND-NEXT: Co-Proc Mask2: 0x1EFFEEDD +# NAME-ERR-FOUND-NEXT: Co-Proc Mask3: 0x5E4E3E2E +# NAME-ERR-FOUND-NEXT: } +# NAME-ERR-FOUND-NEXT: ODK_REGINFO { +# NAME-ERR-FOUND-NEXT: GP: 0x0 +# NAME-ERR-FOUND-NEXT: General Mask: 0x0 +# NAME-ERR-FOUND-NEXT: Co-Proc Mask0: 0x0 +# NAME-ERR-FOUND-NEXT: Co-Proc Mask1: 0x0 +# NAME-ERR-FOUND-NEXT: Co-Proc Mask2: 0x0 +# NAME-ERR-FOUND-NEXT: Co-Proc Mask3: 0x0 +# NAME-ERR-FOUND-NEXT: } # NAME-ERR-FOUND-NEXT: } --- !ELF @@ -37,14 +64,75 @@ Sections: - Name: .MIPS.options Type: SHT_MIPS_OPTIONS ShName: [[OPTNAME=]] + ShSize: [[SECSIZE=]] + ContentArray: [ [[KIND=0x1]], ## Kind. ODK_REGINFO == 1. + [[DESCSIZE=0x28]], ## Byte size of descriptor, including this header. + 0x0, 0x0, ## Section header index of section affected or 0 for global options. + 0x0, 0x0, 0x0, 0x0, ## Kind-specific information. + 0xA, 0xB, 0xC, 0xD, ## ODK_REGINFO: bit-mask of used general registers. + 0x11, 0x22, 0x33, 0x44, ## ODK_REGINFO: unused padding field. + 0x55, 0x66, 0x77, 0x88, ## ODK_REGINFO: bit-mask of used co-processor registers (0). + 0x99, 0xAA, 0xBB, 0xCC, ## ODK_REGINFO: bit-mask of used co-processor registers (1). + 0xDD, 0xEE, 0xFF, 0x1E, ## ODK_REGINFO: bit-mask of used co-processor registers (2). + 0x2E, 0x3E, 0x4E, 0x5E, ## ODK_REGINFO: bit-mask of used co-processor registers (3). + 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, ## ODK_REGINFO: gp register value. +## A descriptor for one more arbirtary supported option. + 0x1, 0x28, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ] - Type: SHT_PROGBITS ShName: [[NAME=]] ## Check we report a warning when we are unable to find the .MIPS.options section due to an error. -# RUN: yaml2obj --docnum=1 -DOPTNAME=0xffff %s -o %t.err2 +# RUN: yaml2obj -DOPTNAME=0xffff %s -o %t.err2 # RUN: llvm-readelf -A %t.err2 2>&1 | \ # RUN: FileCheck %s -DFILE=%t.err2 --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: --implicit-check-not="MIPS Options" # RUN: llvm-readobj -A %t.err2 2>&1 | \ # RUN: FileCheck %s -DFILE=%t.err2 --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: --implicit-check-not="MIPS Options" # NAME-ERR-NOTFOUND: warning: '[[FILE]]': unable to read the name of SHT_MIPS_OPTIONS section with index 2: a section [index 2] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table + +## Check we report a warning when the .MIPS.options section has a size that is less than the +## size of the .MIPS.options description header. + +# RUN: yaml2obj %s -DSECSIZE=0x1 -o %t2 +# RUN: llvm-readelf -A %t2 2>&1 | FileCheck %s -DFILE=%t2 --check-prefix=SEC-SIZE +# RUN: llvm-readobj -A %t2 2>&1 | FileCheck %s -DFILE=%t2 --check-prefix=SEC-SIZE + +# SEC-SIZE: MIPS Options { +# SEC-SIZE-NEXT: warning: '[[FILE]]': the .MIPS.options section has an invalid size (0x1) +# SEC-SIZE-NEXT: } + +## Check we report a warning when the .MIPS.options description header has a size +## that goes past the end of the section. + +# RUN: yaml2obj %s -DDESCSIZE=0x51 -o %t3 +# RUN: llvm-readelf -A %t3 2>&1 | FileCheck %s -DFILE=%t3 --check-prefix=DESC-SIZE +# RUN: llvm-readobj -A %t3 2>&1 | FileCheck %s -DFILE=%t3 --check-prefix=DESC-SIZE + +# DESC-SIZE: IPS Options { +# DESC-SIZE-NEXT: warning: '[[FILE]]': a descriptor of size 0x51 at offset 0x0 goes past the end of the .MIPS.options section of size 0x50 +# DESC-SIZE-NEXT: } + +## Check we are able to skip unsupported options and continue dumping. + +# RUN: yaml2obj %s -DKIND=0x2 -o %t4 +# RUN: llvm-readelf -A %t4 2>&1 | FileCheck %s -DFILE=%t4 -DTAG="ODK_EXCEPTIONS (2)" --check-prefix=KIND +# RUN: llvm-readobj -A %t4 2>&1 | FileCheck %s -DFILE=%t4 -DTAG="ODK_EXCEPTIONS (2)" --check-prefix=KIND + +# RUN: yaml2obj %s -DKIND=0xFF -o %t5 +# RUN: llvm-readelf -A %t5 2>&1 | FileCheck %s -DFILE=%t5 -DTAG="Unknown (255)" --check-prefix=KIND +# RUN: llvm-readobj -A %t5 2>&1 | FileCheck %s -DFILE=%t5 -DTAG="Unknown (255)" --check-prefix=KIND + +# KIND: MIPS Options { +# KIND-NEXT: Unsupported MIPS options tag: [[TAG]] +# KIND-NEXT: ODK_REGINFO { +# KIND-NEXT: GP: 0x0 +# KIND-NEXT: General Mask: 0x0 +# KIND-NEXT: Co-Proc Mask0: 0x0 +# KIND-NEXT: Co-Proc Mask1: 0x0 +# KIND-NEXT: Co-Proc Mask2: 0x0 +# KIND-NEXT: Co-Proc Mask3: 0x0 +# KIND-NEXT: } +# KIND-NEXT: } diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 046ade9abf86d..2b7a103df9752 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -3346,6 +3346,41 @@ template void ELFDumper::printMipsReginfo() { printMipsReginfoData(W, *Reginfo); } +template +static Expected *> +readMipsOptions(const uint8_t *SecBegin, ArrayRef &SecData, + bool &IsSupported) { + if (SecData.size() < sizeof(Elf_Mips_Options)) + return createError("the .MIPS.options section has an invalid size (0x" + + Twine::utohexstr(SecData.size()) + ")"); + + auto *O = reinterpret_cast *>(SecData.data()); + if (O->size > SecData.size()) { + const uint64_t Offset = SecData.data() - SecBegin; + const uint64_t SecSize = Offset + SecData.size(); + return createError("a descriptor of size 0x" + Twine::utohexstr(O->size) + + " at offset 0x" + Twine::utohexstr(Offset) + + " goes past the end of the .MIPS.options " + "section of size 0x" + + Twine::utohexstr(SecSize)); + } + + IsSupported = O->kind == ODK_REGINFO; + size_t ExpectedSize = + sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); + + if (IsSupported) + if (O->size < ExpectedSize) + return createError( + "a .MIPS.options entry of kind " + + Twine(getElfMipsOptionsOdkType(O->kind)) + + " has an invalid size (0x" + Twine::utohexstr(O->size) + + "), the expected size is 0x" + Twine::utohexstr(ExpectedSize)); + + SecData = SecData.drop_front(O->size); + return O; +} + template void ELFDumper::printMipsOptions() { const ELFFile *Obj = ObjF->getELFFile(); const Elf_Shdr *MipsOpts = findSectionByName(".MIPS.options"); @@ -3356,24 +3391,31 @@ template void ELFDumper::printMipsOptions() { DictScope GS(W, "MIPS Options"); - ArrayRef Sec = + ArrayRef Data = unwrapOrError(ObjF->getFileName(), Obj->getSectionContents(MipsOpts)); - while (!Sec.empty()) { - if (Sec.size() < sizeof(Elf_Mips_Options)) { - W.startLine() << "The .MIPS.options section has a wrong size.\n"; - return; - } - auto *O = reinterpret_cast *>(Sec.data()); - DictScope GS(W, getElfMipsOptionsOdkType(O->kind)); - switch (O->kind) { - case ODK_REGINFO: - printMipsReginfoData(W, O->getRegInfo()); - break; - default: - W.startLine() << "Unsupported MIPS options tag.\n"; + const uint8_t *const SecBegin = Data.begin(); + while (!Data.empty()) { + bool IsSupported; + Expected *> OptsOrErr = + readMipsOptions(SecBegin, Data, IsSupported); + if (!OptsOrErr) { + reportUniqueWarning(OptsOrErr.takeError()); break; } - Sec = Sec.slice(O->size); + + unsigned Kind = (*OptsOrErr)->kind; + const char *Type = getElfMipsOptionsOdkType(Kind); + if (!IsSupported) { + W.startLine() << "Unsupported MIPS options tag: " << Type << " (" << Kind + << ")\n"; + continue; + } + + DictScope GS(W, Type); + if (Kind == ODK_REGINFO) + printMipsReginfoData(W, (*OptsOrErr)->getRegInfo()); + else + llvm_unreachable("unexpected .MIPS.options section descriptor kind"); } } From d9d22105792e08267583961a66ab6a53992fff89 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 4 Aug 2020 11:56:58 +0300 Subject: [PATCH 277/600] [llvm-readobj] - An attempt to fix BB. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit http://lab.llvm.org:8011/builders/clang-cmake-x86_64-avx2-linux/builds/15710/steps/build%20stage%201/logs/stdio fails with: /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp: In function ‘llvm::Expected*> readMipsOptions(const uint8_t*, llvm::ArrayRef&, bool&)’: /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3373:19: error: the value of ‘ExpectedSize’ is not usable in a constant expression if (O->size < ExpectedSize) ^ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3369:10: note: ‘size_t ExpectedSize’ is not const size_t ExpectedSize = ^ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3373:12: error: parse error in template argument list if (O->size < ExpectedSize) ^ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp: In instantiation of ‘llvm::Expected*> readMipsOptions(const uint8_t*, llvm::ArrayRef&, bool&) [with ELFT = llvm::object::ELFType<(llvm::support::endianness)0u, true>; uint8_t = unsigned char]’: /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3400:30: required from ‘void {anonymous}::ELFDumper::printMipsOptions() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)0u, true>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:2878:21: required from ‘void {anonymous}::ELFDumper::printArchSpecificInfo() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)0u, true>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:6999:1: required from here /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3373:5: error: ‘size’ is not a member template function Fix: add 2 `const` words to variables. --- llvm/tools/llvm-readobj/ELFDumper.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 2b7a103df9752..3aee021748cb6 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -3354,7 +3354,7 @@ readMipsOptions(const uint8_t *SecBegin, ArrayRef &SecData, return createError("the .MIPS.options section has an invalid size (0x" + Twine::utohexstr(SecData.size()) + ")"); - auto *O = reinterpret_cast *>(SecData.data()); + const auto *O = reinterpret_cast *>(SecData.data()); if (O->size > SecData.size()) { const uint64_t Offset = SecData.data() - SecBegin; const uint64_t SecSize = Offset + SecData.size(); @@ -3366,7 +3366,7 @@ readMipsOptions(const uint8_t *SecBegin, ArrayRef &SecData, } IsSupported = O->kind == ODK_REGINFO; - size_t ExpectedSize = + const size_t ExpectedSize = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); if (IsSupported) From 141cb8a1eecc0c843cdd4e788a28d2b6715e4dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Endre=20F=C3=BCl=C3=B6p?= Date: Tue, 21 Jul 2020 09:31:42 +0200 Subject: [PATCH 278/600] [analyzer] Model iterator random incrementation symmetrically Summary: In case a pointer iterator is incremented in a binary plus expression (operator+), where the iterator is on the RHS, IteratorModeling should now detect, and track the resulting value. Reviewers: Szelethus, baloghadamsoftware Reviewed By: baloghadamsoftware Subscribers: rnkovacs, whisperity, xazax.hun, baloghadamsoftware, szepet, a.sidorin, mikhail.ramalho, Szelethus, donat.nagy, dkrupp, Charusso, steakhal, martong, ASDenysPetrov, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D83190 --- .../Checkers/IteratorModeling.cpp | 73 ++++++++++++------- clang/test/Analysis/iterator-modeling.cpp | 42 ++++++++++- 2 files changed, 87 insertions(+), 28 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp index 632de9e5dc832..ab5e6a1c9991f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp @@ -109,7 +109,7 @@ class IteratorModeling bool Postfix) const; void handleRandomIncrOrDecr(CheckerContext &C, const Expr *CE, OverloadedOperatorKind Op, const SVal &RetVal, - const SVal &LHS, const SVal &RHS) const; + const SVal &Iterator, const SVal &Amount) const; void handlePtrIncrOrDecr(CheckerContext &C, const Expr *Iterator, OverloadedOperatorKind OK, SVal Offset) const; void handleAdvance(CheckerContext &C, const Expr *CE, SVal RetVal, SVal Iter, @@ -262,20 +262,30 @@ void IteratorModeling::checkPostStmt(const UnaryOperator *UO, void IteratorModeling::checkPostStmt(const BinaryOperator *BO, CheckerContext &C) const { - ProgramStateRef State = C.getState(); - BinaryOperatorKind OK = BO->getOpcode(); - SVal RVal = State->getSVal(BO->getRHS(), C.getLocationContext()); + const ProgramStateRef State = C.getState(); + const BinaryOperatorKind OK = BO->getOpcode(); + const Expr *const LHS = BO->getLHS(); + const Expr *const RHS = BO->getRHS(); + const SVal LVal = State->getSVal(LHS, C.getLocationContext()); + const SVal RVal = State->getSVal(RHS, C.getLocationContext()); if (isSimpleComparisonOperator(BO->getOpcode())) { - SVal LVal = State->getSVal(BO->getLHS(), C.getLocationContext()); SVal Result = State->getSVal(BO, C.getLocationContext()); handleComparison(C, BO, Result, LVal, RVal, BinaryOperator::getOverloadedOperator(OK)); } else if (isRandomIncrOrDecrOperator(OK)) { - if (!BO->getRHS()->getType()->isIntegralOrEnumerationType()) + // In case of operator+ the iterator can be either on the LHS (eg.: it + 1), + // or on the RHS (eg.: 1 + it). Both cases are modeled. + const bool IsIterOnLHS = BO->getLHS()->getType()->isPointerType(); + const Expr *const &IterExpr = IsIterOnLHS ? LHS : RHS; + const Expr *const &AmountExpr = IsIterOnLHS ? RHS : LHS; + + // The non-iterator side must have an integral or enumeration type. + if (!AmountExpr->getType()->isIntegralOrEnumerationType()) return; - handlePtrIncrOrDecr(C, BO->getLHS(), - BinaryOperator::getOverloadedOperator(OK), RVal); + const SVal &AmountVal = IsIterOnLHS ? RVal : LVal; + handlePtrIncrOrDecr(C, IterExpr, BinaryOperator::getOverloadedOperator(OK), + AmountVal); } } @@ -368,11 +378,24 @@ IteratorModeling::handleOverloadedOperator(CheckerContext &C, InstCall->getCXXThisVal(), Call.getArgSVal(0)); return; } - } else { - if (Call.getNumArgs() >= 2 && - Call.getArgExpr(1)->getType()->isIntegralOrEnumerationType()) { + } else if (Call.getNumArgs() >= 2) { + const Expr *FirstArg = Call.getArgExpr(0); + const Expr *SecondArg = Call.getArgExpr(1); + const QualType FirstType = FirstArg->getType(); + const QualType SecondType = SecondArg->getType(); + + if (FirstType->isIntegralOrEnumerationType() || + SecondType->isIntegralOrEnumerationType()) { + // In case of operator+ the iterator can be either on the LHS (eg.: + // it + 1), or on the RHS (eg.: 1 + it). Both cases are modeled. + const bool IsIterFirst = FirstType->isStructureOrClassType(); + const SVal FirstArg = Call.getArgSVal(0); + const SVal SecondArg = Call.getArgSVal(1); + const SVal &Iterator = IsIterFirst ? FirstArg : SecondArg; + const SVal &Amount = IsIterFirst ? SecondArg : FirstArg; + handleRandomIncrOrDecr(C, OrigExpr, Op, Call.getReturnValue(), - Call.getArgSVal(0), Call.getArgSVal(1)); + Iterator, Amount); return; } } @@ -564,35 +587,35 @@ void IteratorModeling::handleDecrement(CheckerContext &C, const SVal &RetVal, C.addTransition(State); } -void IteratorModeling::handleRandomIncrOrDecr(CheckerContext &C, - const Expr *CE, +void IteratorModeling::handleRandomIncrOrDecr(CheckerContext &C, const Expr *CE, OverloadedOperatorKind Op, const SVal &RetVal, - const SVal &LHS, - const SVal &RHS) const { + const SVal &Iterator, + const SVal &Amount) const { // Increment or decrement the symbolic expressions which represents the // position of the iterator auto State = C.getState(); - const auto *Pos = getIteratorPosition(State, LHS); + const auto *Pos = getIteratorPosition(State, Iterator); if (!Pos) return; - const auto *value = &RHS; - SVal val; - if (auto loc = RHS.getAs()) { - val = State->getRawSVal(*loc); - value = &val; + const auto *Value = &Amount; + SVal Val; + if (auto LocAmount = Amount.getAs()) { + Val = State->getRawSVal(*LocAmount); + Value = &Val; } - auto &TgtVal = (Op == OO_PlusEqual || Op == OO_MinusEqual) ? LHS : RetVal; + const auto &TgtVal = + (Op == OO_PlusEqual || Op == OO_MinusEqual) ? Iterator : RetVal; // `AdvancedState` is a state where the position of `LHS` is advanced. We // only need this state to retrieve the new position, but we do not want // to change the position of `LHS` (in every case). - auto AdvancedState = advancePosition(State, LHS, Op, *value); + auto AdvancedState = advancePosition(State, Iterator, Op, *Value); if (AdvancedState) { - const auto *NewPos = getIteratorPosition(AdvancedState, LHS); + const auto *NewPos = getIteratorPosition(AdvancedState, Iterator); assert(NewPos && "Iterator should have position after successful advancement"); diff --git a/clang/test/Analysis/iterator-modeling.cpp b/clang/test/Analysis/iterator-modeling.cpp index 0b76b0bfa7232..f1538839d06c8 100644 --- a/clang/test/Analysis/iterator-modeling.cpp +++ b/clang/test/Analysis/iterator-modeling.cpp @@ -149,7 +149,7 @@ void copy(const std::vector &v) { clang_analyzer_express(clang_analyzer_iterator_position(i2)); // expected-warning-re {{$v.end(){{$}}}} } -void plus(const std::vector &v) { +void plus_lhs(const std::vector &v) { auto i1 = v.begin(); clang_analyzer_denote(clang_analyzer_container_begin(v), "$v.begin()"); @@ -161,7 +161,19 @@ void plus(const std::vector &v) { clang_analyzer_express(clang_analyzer_iterator_position(i2)); // expected-warning-re{{$v.begin() + 2{{$}}}} } -void plus_negative(const std::vector &v) { +void plus_rhs(const std::vector &v) { + auto i1 = v.begin(); + + clang_analyzer_denote(clang_analyzer_container_begin(v), "$v.begin()"); + + auto i2 = 2 + i1; + + clang_analyzer_eval(clang_analyzer_iterator_container(i2) == &v); // expected-warning{{TRUE}} + clang_analyzer_express(clang_analyzer_iterator_position(i1)); // expected-warning-re{{$v.begin(){{$}}}} + clang_analyzer_express(clang_analyzer_iterator_position(i2)); // expected-warning-re{{$v.begin() + 2{{$}}}} +} + +void plus_lhs_negative(const std::vector &v) { auto i1 = v.end(); clang_analyzer_denote(clang_analyzer_container_end(v), "$v.end()"); @@ -173,6 +185,18 @@ void plus_negative(const std::vector &v) { clang_analyzer_express(clang_analyzer_iterator_position(i2)); // expected-warning-re {{$v.end() - 2{{$}}}} } +void plus_rhs_negative(const std::vector &v) { + auto i1 = v.end(); + + clang_analyzer_denote(clang_analyzer_container_end(v), "$v.end()"); + + auto i2 = (-2) + i1; + + clang_analyzer_eval(clang_analyzer_iterator_container(i2) == &v); // expected-warning{{TRUE}} + clang_analyzer_express(clang_analyzer_iterator_position(i1)); // expected-warning-re {{$v.end(){{$}}}} + clang_analyzer_express(clang_analyzer_iterator_position(i2)); // expected-warning-re {{$v.end() - 2{{$}}}} +} + void minus(const std::vector &v) { auto i1 = v.end(); @@ -1955,7 +1979,7 @@ void minus_equal_ptr_iterator_variable(const cont_with_ptr_iterator &c, i -= n; // no-crash } -void plus_ptr_iterator(const cont_with_ptr_iterator &c) { +void plus_lhs_ptr_iterator(const cont_with_ptr_iterator &c) { auto i1 = c.begin(); clang_analyzer_denote(clang_analyzer_container_begin(c), "$c.begin()"); @@ -1967,6 +1991,18 @@ void plus_ptr_iterator(const cont_with_ptr_iterator &c) { clang_analyzer_express(clang_analyzer_iterator_position(i2)); // expected-warning{{$c.begin() + 2}} } +void plus_rhs_ptr_iterator(const cont_with_ptr_iterator &c) { + auto i1 = c.begin(); + + clang_analyzer_denote(clang_analyzer_container_begin(c), "$c.begin()"); + + auto i2 = 2 + i1; + + clang_analyzer_eval(clang_analyzer_iterator_container(i2) == &c); // expected-warning{{TRUE}} + clang_analyzer_express(clang_analyzer_iterator_position(i1)); // expected-warning{{$c.begin()}} + clang_analyzer_express(clang_analyzer_iterator_position(i2)); // expected-warning{{$c.begin() + 2}} +} + void minus_ptr_iterator(const cont_with_ptr_iterator &c) { auto i1 = c.end(); From bb303a17260108db42b6a567febffb1e281ffe70 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 4 Aug 2020 12:13:43 +0300 Subject: [PATCH 279/600] [llvm-readobj] - A second attempt to fix BB. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The failure is: http://lab.llvm.org:8011/builders/clang-cmake-x86_64-avx2-linux/builds/15711/steps/build%20stage%201/logs/stdio FAILED: /usr/bin/c++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Itools/llvm-readobj -I/home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj -Iinclude -I/home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/include -march=broadwell -fPIC -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -fno-exceptions -fno-rtti -UNDEBUG -std=c++14 -MD -MT tools/llvm-readobj/CMakeFiles/llvm-readobj.dir/ELFDumper.cpp.o -MF tools/llvm-readobj/CMakeFiles/llvm-readobj.dir/ELFDumper.cpp.o.d -o tools/llvm-readobj/CMakeFiles/llvm-readobj.dir/ELFDumper.cpp.o -c /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp: In function ‘llvm::Expected*> readMipsOptions(const uint8_t*, llvm::ArrayRef&, bool&)’: /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3373:12: error: parse error in template argument list if (O->size < ExpectedSize) ^ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp: In instantiation of ‘llvm::Expected*> readMipsOptions(const uint8_t*, llvm::ArrayRef&, bool&) [with ELFT = llvm::object::ELFType<(llvm::support::endianness)0u, true>; uint8_t = unsigned char]’: /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3400:30: required from ‘void {anonymous}::ELFDumper::printMipsOptions() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)0u, true>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:2878:21: required from ‘void {anonymous}::ELFDumper::printArchSpecificInfo() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)0u, true>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:6999:1: required from here /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3373:5: error: ‘size’ is not a member template function if (O->size < ExpectedSize) ^ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp: In instantiation of ‘llvm::Expected*> readMipsOptions(const uint8_t*, llvm::ArrayRef&, bool&) [with ELFT = llvm::object::ELFType<(llvm::support::endianness)1u, true>; uint8_t = unsigned char]’: /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3400:30: required from ‘void {anonymous}::ELFDumper::printMipsOptions() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)1u, true>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:2878:21: required from ‘void {anonymous}::ELFDumper::printArchSpecificInfo() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)1u, true>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:6999:1: required from here /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3373:5: error: ‘size’ is not a member template function /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp: In instantiation of ‘llvm::Expected*> readMipsOptions(const uint8_t*, llvm::ArrayRef&, bool&) [with ELFT = llvm::object::ELFType<(llvm::support::endianness)0u, false>; uint8_t = unsigned char]’: /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3400:30: required from ‘void {anonymous}::ELFDumper::printMipsOptions() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)0u, false>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:2878:21: required from ‘void {anonymous}::ELFDumper::printArchSpecificInfo() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)0u, false>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:6999:1: required from here /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3373:5: error: ‘size’ is not a member template function /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp: In instantiation of ‘llvm::Expected*> readMipsOptions(const uint8_t*, llvm::ArrayRef&, bool&) [with ELFT = llvm::object::ELFType<(llvm::support::endianness)1u, false>; uint8_t = unsigned char]’: /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3400:30: required from ‘void {anonymous}::ELFDumper::printMipsOptions() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)1u, false>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:2878:21: required from ‘void {anonymous}::ELFDumper::printArchSpecificInfo() [with ELFT = llvm::object::ELFType<(llvm::support::endianness)1u, false>]’ /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:6999:1: required from here /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3373:5: error: ‘size’ is not a member template function ninja: build stopped: subcommand failed. Fix: stop using `auto` for `O` variable. --- llvm/tools/llvm-readobj/ELFDumper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 3aee021748cb6..19aae413acec4 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -3354,7 +3354,8 @@ readMipsOptions(const uint8_t *SecBegin, ArrayRef &SecData, return createError("the .MIPS.options section has an invalid size (0x" + Twine::utohexstr(SecData.size()) + ")"); - const auto *O = reinterpret_cast *>(SecData.data()); + const Elf_Mips_Options *O = + reinterpret_cast *>(SecData.data()); if (O->size > SecData.size()) { const uint64_t Offset = SecData.data() - SecBegin; const uint64_t SecSize = Offset + SecData.size(); From 0acef82480df05486b52618692ebcab1c9b1f77c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 10:16:20 +0100 Subject: [PATCH 280/600] [X86][AVX] Add v8f32 'reverse' HADD(SHUFFLE,SHUFFLE) test coverage Shows missed opportunity for HADD on AVX1 targets with a relatively simple lane crossing post shuffle --- llvm/test/CodeGen/X86/haddsub-4.ll | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/llvm/test/CodeGen/X86/haddsub-4.ll b/llvm/test/CodeGen/X86/haddsub-4.ll index 31b6617b45a96..720b63431a24c 100644 --- a/llvm/test/CodeGen/X86/haddsub-4.ll +++ b/llvm/test/CodeGen/X86/haddsub-4.ll @@ -50,6 +50,78 @@ define <8 x i16> @hadd_reverse2_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { ret <8 x i16> %add } +define <8 x float> @hadd_reverse_v8f32(<8 x float> %a0, <8 x float> %a1) { +; SSE-LABEL: hadd_reverse_v8f32: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, %xmm4 +; SSE-NEXT: haddps %xmm3, %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0,3,2] +; SSE-NEXT: haddps %xmm2, %xmm4 +; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,0,3,2] +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm4, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: hadd_reverse_v8f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm0[3,1],ymm1[3,1],ymm0[7,5],ymm1[7,5] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[2,3,0,1] +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0],ymm1[2,0],ymm0[6,4],ymm1[6,4] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: hadd_reverse_v8f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX2-NEXT: retq + %lhs = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> + %rhs = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> + %add = fadd <8 x float> %lhs, %rhs + ret <8 x float> %add +} + +define <8 x float> @hadd_reverse2_v8f32(<8 x float> %a0, <8 x float> %a1) { +; SSE-LABEL: hadd_reverse2_v8f32: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, %xmm4 +; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,2],xmm0[1,0] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,2,1,0] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,2,1,0] +; SSE-NEXT: haddps %xmm2, %xmm4 +; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,2,1,0] +; SSE-NEXT: haddps %xmm3, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm4, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: hadd_reverse2_v8f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: hadd_reverse2_v8f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX2-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] +; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq + %shuf0 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> + %shuf1 = shufflevector <8 x float> %a1, <8 x float> undef, <8 x i32> + %lhs = shufflevector <8 x float> %shuf0, <8 x float> %shuf1, <8 x i32> + %rhs = shufflevector <8 x float> %shuf0, <8 x float> %shuf1, <8 x i32> + %add = fadd <8 x float> %lhs, %rhs + ret <8 x float> %add +} + define <16 x i16> @hadd_reverse_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; SSE-LABEL: hadd_reverse_v16i16: ; SSE: # %bb.0: From 4f5ccc72f6a6eba61117806f049ca869233a6864 Mon Sep 17 00:00:00 2001 From: Tatyana Krasnukha Date: Mon, 3 Aug 2020 14:30:12 +0300 Subject: [PATCH 281/600] Partially revert "[cmake] Make MSVC generate appropriate __cplusplus macro definition" The /Zc:__cplusplus option fixes GTEST_LANG_CXX11 value but not GTEST_HAS_TR1_TUPLE, so we still need to force the latter off. Still pass the option since it is required by https://reviews.llvm.org/D78186 too. Differential Revision: https://reviews.llvm.org/D84023 --- llvm/cmake/modules/AddLLVM.cmake | 4 ++++ llvm/utils/unittest/CMakeLists.txt | 3 +++ 2 files changed, 7 insertions(+) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 5b5be953767b5..2ff5c29c6b1a5 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -1405,6 +1405,10 @@ function(add_unittest test_suite test_name) set(EXCLUDE_FROM_ALL ON) endif() + # Our current version of gtest uses tr1/tuple which is deprecated on MSVC. + # Since LLVM itself requires C++14, we can safely force it off. + add_definitions(-DGTEST_HAS_TR1_TUPLE=0) + include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include) if (NOT LLVM_ENABLE_THREADS) diff --git a/llvm/utils/unittest/CMakeLists.txt b/llvm/utils/unittest/CMakeLists.txt index 9127ebce7b4c3..e7caf37727fce 100644 --- a/llvm/utils/unittest/CMakeLists.txt +++ b/llvm/utils/unittest/CMakeLists.txt @@ -19,6 +19,9 @@ include_directories( googlemock ) +# Gtest 1.8.0 uses tr1/tuple which is deprecated on MSVC, so we force it off. +add_definitions(-DGTEST_HAS_TR1_TUPLE=0) + if(WIN32) add_definitions(-DGTEST_OS_WINDOWS=1) endif() From b7856f9d8ded99045fcc35da8db71bf1e41285f6 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 4 Aug 2020 09:55:38 +0100 Subject: [PATCH 282/600] [SCEV] Consolidate some smin/smax folding tests into single test file. This patch moves a few spread out smin/smax tests to smin-smax-folds.ll and adds additional test cases that expose further potential for folds. --- .../pr46939-trip-count-count-down.ll | 38 ------ llvm/test/Analysis/ScalarEvolution/smax.ll | 15 --- .../ScalarEvolution/smin-smax-folds.ll | 125 ++++++++++++++++++ 3 files changed, 125 insertions(+), 53 deletions(-) delete mode 100644 llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll delete mode 100644 llvm/test/Analysis/ScalarEvolution/smax.ll create mode 100644 llvm/test/Analysis/ScalarEvolution/smin-smax-folds.ll diff --git a/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll b/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll deleted file mode 100644 index 803652e47cf62..0000000000000 --- a/llvm/test/Analysis/ScalarEvolution/pr46939-trip-count-count-down.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: opt -analyze -scalar-evolution %s 2>&1 | FileCheck %s - -declare void @iteration() - -define void @reverse_loop(i32 %n) { -; CHECK-LABEL: 'reverse_loop' -; CHECK-NEXT: Classifying expressions for: @reverse_loop -; CHECK-NEXT: %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] -; CHECK-NEXT: --> {%n,+,-1}<%for.body> U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Computable } -; CHECK-NEXT: %dec = add nsw i32 %i.011, -1 -; CHECK-NEXT: --> {(-1 + %n),+,-1}<%for.body> U: full-set S: full-set Exits: -1 LoopDispositions: { %for.body: Computable } -; CHECK-NEXT: Determining loop execution counts for: @reverse_loop -; CHECK-NEXT: Loop %for.body: backedge-taken count is %n -; CHECK-NEXT: Loop %for.body: max backedge-taken count is 2147483647 -; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is %n -; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 1 -; -entry: - %cmp10 = icmp sgt i32 %n, -1 - br i1 %cmp10, label %for.body.lr.ph, label %for.cond.cleanup - -for.body.lr.ph: - br label %for.body - -for.body: - %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] - call void @iteration() - %dec = add nsw i32 %i.011, -1 - %cmp = icmp sgt i32 %i.011, 0 - br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit - -for.cond.cleanup.loopexit: - br label %for.cond.cleanup - -for.cond.cleanup: - ret void -} diff --git a/llvm/test/Analysis/ScalarEvolution/smax.ll b/llvm/test/Analysis/ScalarEvolution/smax.ll deleted file mode 100644 index 2b2c81c8e90df..0000000000000 --- a/llvm/test/Analysis/ScalarEvolution/smax.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s -; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s -; PR1614 - -; CHECK: --> (%a smax %b) -; CHECK: --> (%a smax %b smax %c) -; CHECK-NOT: smax - -define i32 @x(i32 %a, i32 %b, i32 %c) { - %A = icmp sgt i32 %a, %b - %B = select i1 %A, i32 %a, i32 %b - %C = icmp sle i32 %c, %B - %D = select i1 %C, i32 %B, i32 %c - ret i32 %D -} diff --git a/llvm/test/Analysis/ScalarEvolution/smin-smax-folds.ll b/llvm/test/Analysis/ScalarEvolution/smin-smax-folds.ll new file mode 100644 index 0000000000000..15ec72317d9ab --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/smin-smax-folds.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s +; RUN: opt -disable-output "-passes=print" < %s 2>&1 | FileCheck %s + +; Tests for smin & smax folds. + +; Test case from PR1614. +define i32 @test_PR1614(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: 'test_PR1614' +; CHECK: --> (%a smax %b) +; CHECK: --> (%a smax %b smax %c) +; CHECK-NOT: smax + + %A = icmp sgt i32 %a, %b + %B = select i1 %A, i32 %a, i32 %b + %C = icmp sle i32 %c, %B + %D = select i1 %C, i32 %B, i32 %c + ret i32 %D +} + +declare void @iteration() + +; Test case from PR46939. +; The information from the loop guard can be used to simplify the trip count expression. +define void @smin_simplify_with_guard(i32 %n) { +; CHECK-LABEL: 'smin_simplify_with_guard' +; CHECK-NEXT: Classifying expressions for: @smin_simplify_with_guard +; CHECK-NEXT: %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] +; CHECK-NEXT: --> {%n,+,-1}<%for.body> U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: %dec = add nsw i32 %i.011, -1 +; CHECK-NEXT: --> {(-1 + %n),+,-1}<%for.body> U: full-set S: full-set Exits: -1 LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: Determining loop execution counts for: @smin_simplify_with_guard +; CHECK-NEXT: Loop %for.body: backedge-taken count is %n +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 2147483647 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is %n +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + %cmp10 = icmp sgt i32 %n, -1 + br i1 %cmp10, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: + br label %for.body + +for.body: + %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] + call void @iteration() + %dec = add nsw i32 %i.011, -1 + %cmp = icmp sgt i32 %i.011, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + +define void @smin_to_smax(i32 %n) { +; FIXME: ((-1 * (0 smin %n)) + %n) is actually just (0 smax %n) + +; CHECK-LABEL: 'smin_to_smax' +; CHECK-NEXT: Classifying expressions for: @smin_to_smax +; CHECK-NEXT: %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] +; CHECK-NEXT: --> {%n,+,-1}<%for.body> U: full-set S: full-set Exits: (0 smin %n) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: %dec = add nsw i32 %i.011, -1 +; CHECK-NEXT: --> {(-1 + %n),+,-1}<%for.body> U: full-set S: full-set Exits: (-1 + (0 smin %n)) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: Determining loop execution counts for: @smin_to_smax +; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 * (0 smin %n)) + %n) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 2147483647 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((-1 * (0 smin %n)) + %n) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + br label %for.body.lr.ph + +for.body.lr.ph: + br label %for.body + +for.body: + %i.011 = phi i32 [ %n, %for.body.lr.ph ], [ %dec, %for.body ] + call void @iteration() + %dec = add nsw i32 %i.011, -1 + %cmp = icmp sgt i32 %i.011, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + +; The information from the loop guard can be used to simplify the trip count expression. +define void @smax_simplify_with_guard(i32 %start, i32 %n) { +; CHECK-LABEL: 'smax_simplify_with_guard' +; CHECK-NEXT: Classifying expressions for: @smax_simplify_with_guard +; CHECK-NEXT: %k.0.i26 = phi i32 [ %start, %loop.ph ], [ %inc.i, %loop ] +; CHECK-NEXT: --> {%start,+,1}<%loop> U: full-set S: full-set Exits: (%start smax %n) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: %inc.i = add nsw i32 %k.0.i26, 1 +; CHECK-NEXT: --> {(1 + %start),+,1}<%loop> U: full-set S: full-set Exits: (1 + (%start smax %n)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: Determining loop execution counts for: @smax_simplify_with_guard +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) + (%start smax %n)) +; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 +; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-1 * %start) + (%start smax %n)) +; CHECK-NEXT: Predicates: +; CHECK: Loop %loop: Trip multiple is 1 +entry: + %guard = icmp sge i32 %n, %start + br i1 %guard, label %loop.ph, label %exit + +loop.ph: + br label %loop + +loop: + %k.0.i26 = phi i32 [ %start, %loop.ph ], [ %inc.i, %loop ] + %inc.i = add nsw i32 %k.0.i26, 1 + %cmp26.not.i.not = icmp slt i32 %k.0.i26, %n + br i1 %cmp26.not.i.not, label %loop, label %exit + +exit: + ret void +} From f7658241cb27491b4160a1f7060ef883bc535d09 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 4 Aug 2020 10:25:16 +0100 Subject: [PATCH 283/600] [AArch64] Consider instruction-level contract FMFs in combiner patterns. Currently, instruction level fast math flags are not considered when generating patterns for the machine combiner. This currently leads to some missed opportunities to generate FMAs in combination with `#pragma clang fp contract (fast)`. For example, when building the example below with -O3 for AArch64, no FMADD is generated. If built with -O2 and the DAGCombiner is used instead of the MachineCombiner for FMAs, an FMADD is generated. With this patch, the same code is generated in both cases. float madd_contract(float a, float b, float c) { #pragma clang fp contract (fast) return (a * b) + c; } Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D84930 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 10 ++++-- .../AArch64/machine-combiner-instr-fmf.mir | 32 ++++++++----------- llvm/test/CodeGen/AArch64/neon-fma-FMF.ll | 1 + 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 323ac76e903fd..b6fda6b367bf2 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3861,7 +3861,7 @@ static bool isCombineInstrCandidate64(unsigned Opc) { return false; } -// FP Opcodes that can be combined with a FMUL +// FP Opcodes that can be combined with a FMUL. static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { default: @@ -3883,8 +3883,12 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; - return (Options.UnsafeFPMath || - Options.AllowFPOpFusion == FPOpFusion::Fast); + // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by + // the target options or if FADD/FSUB has the contract fast-math flag. + return Options.UnsafeFPMath || + Options.AllowFPOpFusion == FPOpFusion::Fast || + Inst.getFlag(MachineInstr::FmContract); + return true; } return false; } diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir index 23ed96a42b513..992e636011b6b 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir @@ -6,8 +6,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_fast alignment: 4 @@ -46,8 +45,7 @@ body: | # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = contract FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = contract FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_contract @@ -81,7 +79,7 @@ body: | ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do not create FMADD, because we don't have the contract flag on the FADD. # CHECK-LABEL: name: scalar_fmadd_contract_op0 # CHECK: [[C:%.*]]:fpr32 = COPY $s2 @@ -121,14 +119,13 @@ body: | ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do create FMADD, because we have the contract flag on the FADD. # # CHECK-LABEL: name: scalar_fmadd_contract_op1 # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = contract FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_contract_op1 @@ -203,14 +200,13 @@ body: | ... -# Can create FMADD, because both the fmul and fadd have all fast-math flags. +# Can create FMLA, because both the fmul and fadd have all fast-math flags. # # CHECK-LABEL: name: vector_fmadd_fast # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = nnan ninf nsz arcp contract afn reassoc FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = nnan ninf nsz arcp contract afn reassoc FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_fast alignment: 4 @@ -243,14 +239,13 @@ body: | ... -# Can create FMADD, because both the fmul and fadd have the contract fast-math flag. +# Can create FMLA, because both the fmul and fadd have the contract fast-math flag. # # CHECK-LABEL: name: vector_fmadd_contract # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = contract FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = contract FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_contract alignment: 4 @@ -283,7 +278,7 @@ body: | ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do not create FMLA, because we don't have the contract flag on the FADD. # # CHECK-LABEL: name: vector_fmadd_contract_op0 # CHECK: [[C:%.*]]:fpr128 = COPY $q2 @@ -323,14 +318,13 @@ body: | ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do create FMLA, because we have the contract flag on the FADD. # # CHECK-LABEL: name: vector_fmadd_contract_op1 # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = contract FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_contract_op1 @@ -364,7 +358,7 @@ body: | ... -# Do not create FMADD, as nsz flag does not allow it. +# Do not create FMLA, as nsz flag does not allow it. # # CHECK-LABEL: name: vector_fmadd_nsz # CHECK: [[C:%.*]]:fpr128 = COPY $q2 diff --git a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll index 893d153801a71..0eb173396ce97 100644 --- a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll +++ b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -O3 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; CHECK-LABEL: fma_1: From 47cea9e82dda941e49a2d9cc11f0414436336d46 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 10:32:27 +0100 Subject: [PATCH 284/600] Revert rG66e7dce714fab "Revert "[X86][SSE] Shuffle combine blends to OR(X,Y) if the relevant elements are known zero."" [X86][SSE] Shuffle combine blends to OR(X,Y) if the relevant elements are known zero (REAPPLIED) This allows us to remove the (depth violating) code in getFauxShuffleMask where we were combining the OR(SHUFFLE,SHUFFLE) shuffle inputs as well, and not just the OR(). This is a minor step toward being able to shuffle combine from/to SELECT/BLENDV as a faux shuffle. Reapplied with fixed signed/unsigned comparisons. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 65 ++++++++++++++----- llvm/test/CodeGen/X86/insertelement-ones.ll | 12 ++-- llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll | 8 +-- .../test/CodeGen/X86/vector-shuffle-128-v8.ll | 5 +- .../CodeGen/X86/vector-shuffle-256-v32.ll | 6 +- 5 files changed, 61 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b80a23f5a608e..79047c90ff999 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7401,8 +7401,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other // is a valid shuffle index. - SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); - SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1)); + SDValue N0 = peekThroughBitcasts(N.getOperand(0)); + SDValue N1 = peekThroughBitcasts(N.getOperand(1)); if (!N0.getValueType().isVector() || !N1.getValueType().isVector()) return false; SmallVector SrcMask0, SrcMask1; @@ -7413,34 +7413,24 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, true)) return false; - // Shuffle inputs must be the same size as the result. - if (llvm::any_of(SrcInputs0, [VT](SDValue Op) { - return VT.getSizeInBits() != Op.getValueSizeInBits(); - })) - return false; - if (llvm::any_of(SrcInputs1, [VT](SDValue Op) { - return VT.getSizeInBits() != Op.getValueSizeInBits(); - })) - return false; - size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0); narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1); - for (size_t i = 0; i != MaskSize; ++i) { + for (int i = 0; i != (int)MaskSize; ++i) { if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef) Mask.push_back(SM_SentinelUndef); else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) Mask.push_back(SM_SentinelZero); else if (Mask1[i] == SM_SentinelZero) - Mask.push_back(Mask0[i]); + Mask.push_back(i); else if (Mask0[i] == SM_SentinelZero) - Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size())); + Mask.push_back(i + MaskSize); else return false; } - Ops.append(SrcInputs0.begin(), SrcInputs0.end()); - Ops.append(SrcInputs1.begin(), SrcInputs1.end()); + Ops.push_back(N0); + Ops.push_back(N1); return true; } case ISD::INSERT_SUBVECTOR: { @@ -34219,6 +34209,7 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, bool IsUnary) { + unsigned NumMaskElts = Mask.size(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { @@ -34276,6 +34267,46 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, } } + // Attempt to match against a OR if we're performing a blend shuffle and the + // non-blended source element is zero in each case. + if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && + (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) { + bool IsBlend = true; + unsigned NumV1Elts = V1.getValueType().getVectorNumElements(); + unsigned NumV2Elts = V2.getValueType().getVectorNumElements(); + unsigned Scale1 = NumV1Elts / NumMaskElts; + unsigned Scale2 = NumV2Elts / NumMaskElts; + APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts); + APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts); + for (unsigned i = 0; i != NumMaskElts; ++i) { + int M = Mask[i]; + if (M == SM_SentinelUndef) + continue; + if (M == SM_SentinelZero) { + DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); + DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); + continue; + } + if (M == (int)i) { + DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); + continue; + } + if (M == (int)(i + NumMaskElts)) { + DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); + continue; + } + IsBlend = false; + break; + } + if (IsBlend && + DAG.computeKnownBits(V1, DemandedZeroV1).isZero() && + DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) { + Shuffle = ISD::OR; + SrcVT = DstVT = EVT(MaskVT).changeTypeToInteger().getSimpleVT(); + return true; + } + } + return false; } diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index 3d8e42b9c07d0..6a9a401264c56 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -389,11 +389,9 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; SSE2-NEXT: pand %xmm5, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE2-NEXT: pandn %xmm3, %xmm5 -; SSE2-NEXT: por %xmm5, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: por %xmm4, %xmm1 ; SSE2-NEXT: retq @@ -411,11 +409,9 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; SSE3-NEXT: movdqa %xmm3, %xmm4 ; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; SSE3-NEXT: por %xmm4, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; SSE3-NEXT: pand %xmm5, %xmm1 +; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE3-NEXT: pandn %xmm3, %xmm5 -; SSE3-NEXT: por %xmm5, %xmm1 +; SSE3-NEXT: por %xmm3, %xmm1 ; SSE3-NEXT: pand %xmm2, %xmm1 ; SSE3-NEXT: por %xmm4, %xmm1 ; SSE3-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll index 6b49f22f21f1f..9256a43f8e339 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll @@ -1314,10 +1314,10 @@ define void @trunc_v4i64_to_v4i8(<32 x i8>* %L, <4 x i8>* %S) nounwind { define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind { ; AVX1-LABEL: negative: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[u,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,zero,zero,zero,zero,zero,zero,xmm0[0,2,4,6,8,10,12,14] -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index f448f41cf522e..86423ce76065b 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1713,9 +1713,8 @@ define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { ; ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u] -; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u] -; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,u,u] +; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v8i16_XX4X8acX: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 82df05e5ae068..e5285aebda69e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -3358,9 +3358,9 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3] ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u] -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u] -; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7] ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255] ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 From 20283ff491a44b8d4526a0915afbe7643705b457 Mon Sep 17 00:00:00 2001 From: Meera Nakrani Date: Tue, 4 Aug 2020 09:38:17 +0000 Subject: [PATCH 285/600] [ARM] Generated SSAT and USAT instructions with shift Added patterns so that both SSAT and USAT instructions are generated with shifts. Added corresponding regression tests. Differential Review: https://reviews.llvm.org/D85120 --- llvm/lib/Target/ARM/ARMInstrInfo.td | 16 ++++++++++++++ llvm/lib/Target/ARM/ARMInstrThumb2.td | 9 ++++++++ llvm/test/CodeGen/ARM/ssat-with-shift.ll | 27 ++++++++++++++++++++++++ llvm/test/CodeGen/ARM/usat-with-shift.ll | 27 ++++++++++++++++++++++++ 4 files changed, 79 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/ssat-with-shift.ll create mode 100644 llvm/test/CodeGen/ARM/usat-with-shift.ll diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index d0c1e14989d32..675cf07328e0d 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -380,6 +380,11 @@ def imm_not_XFORM : SDNodeXFormgetTargetConstant(~(int)N->getZExtValue(), SDLoc(N), MVT::i32); }]>; +// asr_imm_XFORM - Returns a shift immediate with bit {5} set to 1 +def asr_imm_XFORM : SDNodeXFormgetTargetConstant(0x20 | N->getZExtValue(), SDLoc(N), MVT:: i32); +}]>; + /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. def imm16_31 : ImmLeaf= 16 && (int32_t)Imm < 32; @@ -446,6 +451,8 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ def imm_even : ImmLeaf; def imm_odd : ImmLeaf; +def asr_imm : ImmLeaf 0 && Imm <= 32; }], asr_imm_XFORM>; + //===----------------------------------------------------------------------===// // NEON/MVE pattern fragments // @@ -4083,6 +4090,15 @@ def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), (SSAT16 imm1_16:$pos, GPRnopc:$a)>; def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), (USAT16 imm0_15:$pos, GPRnopc:$a)>; +def : ARMV6Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos), + (SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>; +def : ARMV6Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos), + (SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>; +def : ARMV6Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos), + (USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>; +def : ARMV6Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos), + (USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>; + //===----------------------------------------------------------------------===// // Bitwise Instructions. diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index d5143adaac179..fc92230ac81df 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -2636,6 +2636,15 @@ def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos), (t2SSAT16 imm1_16:$pos, GPR:$a)>; def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos), (t2USAT16 imm0_15:$pos, GPR:$a)>; +def : T2Pat<(int_arm_ssat (shl GPRnopc:$a, imm0_31:$shft), imm1_32:$pos), + (t2SSAT imm1_32:$pos, GPRnopc:$a, imm0_31:$shft)>; +def : T2Pat<(int_arm_ssat (sra GPRnopc:$a, asr_imm:$shft), imm1_32:$pos), + (t2SSAT imm1_32:$pos, GPRnopc:$a, asr_imm:$shft)>; +def : T2Pat<(int_arm_usat (shl GPRnopc:$a, imm0_31:$shft), imm0_31:$pos), + (t2USAT imm0_31:$pos, GPRnopc:$a, imm0_31:$shft)>; +def : T2Pat<(int_arm_usat (sra GPRnopc:$a, asr_imm:$shft), imm0_31:$pos), + (t2USAT imm0_31:$pos, GPRnopc:$a, asr_imm:$shft)>; + //===----------------------------------------------------------------------===// // Shift and rotate Instructions. diff --git a/llvm/test/CodeGen/ARM/ssat-with-shift.ll b/llvm/test/CodeGen/ARM/ssat-with-shift.ll new file mode 100644 index 0000000000000..03670eb7e0075 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ssat-with-shift.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s + +define arm_aapcs_vfpcc i32 @ssat_lsl(i32 %num){ +; CHECK-LABEL: ssat_lsl +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ssat r0, #8, r0, lsl #7 +; CHECK-NEXT: bx lr +entry: + %shl = shl i32 %num, 7 + %0 = tail call i32 @llvm.arm.ssat(i32 %shl, i32 8) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @ssat_asr(i32 %num){ +; CHECK-LABEL: ssat_asr +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: ssat r0, #8, r0, asr #7 +; CHECK-NEXT: bx lr +entry: + %shr = ashr i32 %num, 7 + %0 = tail call i32 @llvm.arm.ssat(i32 %shr, i32 8) + ret i32 %0 +} + +declare i32 @llvm.arm.ssat(i32, i32) diff --git a/llvm/test/CodeGen/ARM/usat-with-shift.ll b/llvm/test/CodeGen/ARM/usat-with-shift.ll new file mode 100644 index 0000000000000..56444394157a8 --- /dev/null +++ b/llvm/test/CodeGen/ARM/usat-with-shift.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+dsp %s -o - | FileCheck %s + +define arm_aapcs_vfpcc i32 @usat_lsl(i32 %num){ +; CHECK-LABEL: usat_lsl +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: usat r0, #7, r0, lsl #2 +; CHECK-NEXT: bx lr +entry: + %shl = shl i32 %num, 2 + %0 = tail call i32 @llvm.arm.usat(i32 %shl, i32 7) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @usat_asr(i32 %num){ +; CHECK-LABEL: usat_asr +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: usat r0, #7, r0, asr #2 +; CHECK-NEXT: bx lr +entry: + %shr = ashr i32 %num, 2 + %0 = tail call i32 @llvm.arm.usat(i32 %shr, i32 7) + ret i32 %0 +} + +declare i32 @llvm.arm.usat(i32, i32) From 3c7e7d40a996f998dfe3c36abb70accdf4f20d79 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 4 Aug 2020 10:43:42 +0100 Subject: [PATCH 286/600] [BasicAA] Enable -basic-aa-recphi by default This option was added a while back, to help improve AA around pointer phi loops. It looks for phi(gep(phi, const), x) loops, checking if x can then prove more precise aliasing info. Differential Revision: https://reviews.llvm.org/D82998 --- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 2 +- llvm/test/Analysis/BasicAA/phi-loop.ll | 2 +- llvm/test/Analysis/BasicAA/recphi.ll | 2 +- llvm/test/Transforms/LoopIdiom/reuse-cast.ll | 5 +++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index b1578f4b88e07..2ede4baaf6832 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -66,7 +66,7 @@ using namespace llvm; /// Enable analysis of recursive PHI nodes. static cl::opt EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden, - cl::init(false)); + cl::init(true)); /// By default, even on 32-bit architectures we use 64-bit integers for /// calculations. This will allow us to more-aggressively decompose indexing diff --git a/llvm/test/Analysis/BasicAA/phi-loop.ll b/llvm/test/Analysis/BasicAA/phi-loop.ll index db3023c6560d0..e54752a9223f7 100644 --- a/llvm/test/Analysis/BasicAA/phi-loop.ll +++ b/llvm/test/Analysis/BasicAA/phi-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -basic-aa-recphi=1 -gvn -S | FileCheck %s +; RUN: opt < %s -basic-aa -gvn -S | FileCheck %s ; ; Check that section->word_ofs doesn't get reloaded in every iteration of the ; for loop. diff --git a/llvm/test/Analysis/BasicAA/recphi.ll b/llvm/test/Analysis/BasicAA/recphi.ll index dfc88937bf699..a6157f22a4f97 100644 --- a/llvm/test/Analysis/BasicAA/recphi.ll +++ b/llvm/test/Analysis/BasicAA/recphi.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -basic-aa-recphi -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; CHECK-LABEL: Function: simple: 5 pointers, 0 call sites ; CHECK: NoAlias: float* %src1, float* %src2 diff --git a/llvm/test/Transforms/LoopIdiom/reuse-cast.ll b/llvm/test/Transforms/LoopIdiom/reuse-cast.ll index 0b1bfeaa27302..0dda351515647 100644 --- a/llvm/test/Transforms/LoopIdiom/reuse-cast.ll +++ b/llvm/test/Transforms/LoopIdiom/reuse-cast.ll @@ -83,21 +83,22 @@ declare void @use.i1(i1) define void @reuse_cast_2(i32 %x, i32* %ptr.1.start) { ; CHECK-LABEL: @reuse_cast_2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_1_START1:%.*]] = bitcast i32* [[PTR_1_START:%.*]] to i8* ; CHECK-NEXT: [[STACK:%.*]] = alloca [2 x i32], align 4 ; CHECK-NEXT: [[CAST_TO_REUSE:%.*]] = bitcast [2 x i32]* [[STACK]] to i8* ; CHECK-NEXT: [[C_0:%.*]] = icmp sgt i32 [[X:%.*]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[STACK]] to i8* ; CHECK-NEXT: [[PTR_2_START:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[STACK]], i64 0, i64 0 ; CHECK-NEXT: call void @use.i8(i8* [[CAST_TO_REUSE]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[CAST_TO_REUSE]], i8* align 4 [[PTR_1_START1]], i64 8, i1 false) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PTR_1:%.*]] = phi i32* [ [[PTR_1_START:%.*]], [[ENTRY]] ], [ [[PTR_1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_1:%.*]] = phi i32* [ [[PTR_1_START]], [[ENTRY]] ], [ [[PTR_1_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[PTR_2:%.*]] = phi i32* [ [[PTR_2_START]], [[ENTRY]] ], [ [[PTR_2_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[PTR_1_NEXT]] = getelementptr inbounds i32, i32* [[PTR_1]], i64 1 ; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_1]], align 4 ; CHECK-NEXT: [[PTR_2_NEXT]] = getelementptr inbounds i32, i32* [[PTR_2]], i64 1 -; CHECK-NEXT: store i32 [[LV]], i32* [[PTR_2]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[C_1:%.*]] = icmp eq i32 [[IV]], 0 ; CHECK-NEXT: br i1 [[C_1]], label [[LOOP]], label [[EXIT:%.*]] From 4e4243848ece311d982d3fe5550b555e26709f9a Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 4 Aug 2020 12:40:10 +0300 Subject: [PATCH 287/600] [llvm-readobj] - A third attempt to fix BB. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit http://lab.llvm.org:8011/builders/clang-cmake-x86_64-avx2-linux/builds/15718/steps/build%20stage%201/logs/stdio: FAILED: /usr/bin/c++ -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Itools/llvm-readobj -I/home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj -Iinclude -I/home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/include -march=broadwell -fPIC -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor -Wno-comment -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -fno-exceptions -fno-rtti -UNDEBUG -std=c++14 -MD -MT tools/llvm-readobj/CMakeFiles/llvm-readobj.dir/ELFDumper.cpp.o -MF tools/llvm-readobj/CMakeFiles/llvm-readobj.dir/ELFDumper.cpp.o.d -o tools/llvm-readobj/CMakeFiles/llvm-readobj.dir/ELFDumper.cpp.o -c /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp: In function ‘llvm::Expected*> readMipsOptions(const uint8_t*, llvm::ArrayRef&, bool&)’: /home/ssglocal/clang-cmake-x86_64-avx2-linux/clang-cmake-x86_64-avx2-linux/llvm/llvm/tools/llvm-readobj/ELFDumper.cpp:3374:12: error: parse error in template argument list if (O->size < ExpectedSize) Note: I played with godbolt.org and was able to catch the similar "error in template argument list" error when used gcc 4.9.0 with this code. Fix: try to introduce a variable to store `O->size`, it helped to me in godbolt. --- llvm/tools/llvm-readobj/ELFDumper.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 19aae413acec4..53ebfd5663c94 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -3356,10 +3356,11 @@ readMipsOptions(const uint8_t *SecBegin, ArrayRef &SecData, const Elf_Mips_Options *O = reinterpret_cast *>(SecData.data()); - if (O->size > SecData.size()) { + const uint8_t Size = O->size; + if (Size > SecData.size()) { const uint64_t Offset = SecData.data() - SecBegin; const uint64_t SecSize = Offset + SecData.size(); - return createError("a descriptor of size 0x" + Twine::utohexstr(O->size) + + return createError("a descriptor of size 0x" + Twine::utohexstr(Size) + " at offset 0x" + Twine::utohexstr(Offset) + " goes past the end of the .MIPS.options " "section of size 0x" + @@ -3371,14 +3372,14 @@ readMipsOptions(const uint8_t *SecBegin, ArrayRef &SecData, sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo); if (IsSupported) - if (O->size < ExpectedSize) + if (Size < ExpectedSize) return createError( "a .MIPS.options entry of kind " + Twine(getElfMipsOptionsOdkType(O->kind)) + - " has an invalid size (0x" + Twine::utohexstr(O->size) + + " has an invalid size (0x" + Twine::utohexstr(Size) + "), the expected size is 0x" + Twine::utohexstr(ExpectedSize)); - SecData = SecData.drop_front(O->size); + SecData = SecData.drop_front(Size); return O; } From daa1c6d9d126ea1666463c44930f00df83f3acd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Tue, 4 Aug 2020 11:58:37 +0200 Subject: [PATCH 288/600] [lldb] fix typo --- lldb/source/Core/IOHandlerCursesGUI.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 262a19dc04b4c..144b2112183c2 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -916,9 +916,9 @@ void Menu::DrawMenuTitle(Window &window, bool highlight) { } else { const int shortcut_key = m_key_value; bool underlined_shortcut = false; - const attr_t hilgight_attr = A_REVERSE; + const attr_t highlight_attr = A_REVERSE; if (highlight) - window.AttributeOn(hilgight_attr); + window.AttributeOn(highlight_attr); if (llvm::isPrint(shortcut_key)) { size_t lower_pos = m_name.find(tolower(shortcut_key)); size_t upper_pos = m_name.find(toupper(shortcut_key)); @@ -945,7 +945,7 @@ void Menu::DrawMenuTitle(Window &window, bool highlight) { } if (highlight) - window.AttributeOff(hilgight_attr); + window.AttributeOff(highlight_attr); if (m_key_name.empty()) { if (!underlined_shortcut && llvm::isPrint(m_key_value)) { From e218da7ff39df3aeb7c923fbd75aaa8ace581e37 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 4 Aug 2020 18:15:00 +0900 Subject: [PATCH 289/600] [JumpThreading] Add a test for simplification of cast of any op; NFC --- .../JumpThreading/threadable-edge-cast.ll | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 llvm/test/Transforms/JumpThreading/threadable-edge-cast.ll diff --git a/llvm/test/Transforms/JumpThreading/threadable-edge-cast.ll b/llvm/test/Transforms/JumpThreading/threadable-edge-cast.ll new file mode 100644 index 0000000000000..71c34dea3a069 --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/threadable-edge-cast.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -jump-threading -S < %s | FileCheck %s + +declare i32 @f1() +declare i32 @f2() +declare void @f3() + +; trunc(and) should be simplified +define i32 @test(i1 %cond0) { +; CHECK-LABEL: @test( +; CHECK-NEXT: br i1 [[COND0:%.*]], label [[T1:%.*]], label [[F1:%.*]] +; CHECK: T1: +; CHECK-NEXT: [[V1:%.*]] = call i32 @f1() +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: F1: +; CHECK-NEXT: [[V2:%.*]] = call i32 @f2() +; CHECK-NEXT: br label [[MERGE]] +; CHECK: Merge: +; CHECK-NEXT: [[A:%.*]] = phi i32 [ 10, [[T1]] ], [ 0, [[F1]] ] +; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[V1]], [[T1]] ], [ [[V2]], [[F1]] ] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A]], 1 +; CHECK-NEXT: [[COND:%.*]] = trunc i32 [[AND]] to i1 +; CHECK-NEXT: br i1 [[COND]], label [[T2:%.*]], label [[F2:%.*]] +; CHECK: T2: +; CHECK-NEXT: call void @f3() +; CHECK-NEXT: ret i32 [[B]] +; CHECK: F2: +; CHECK-NEXT: ret i32 [[B]] +; + br i1 %cond0, label %T1, label %F1 +T1: + %v1 = call i32 @f1() + br label %Merge + +F1: + %v2 = call i32 @f2() + br label %Merge + +Merge: + %A = phi i32 [10, %T1], [0, %F1] + %B = phi i32 [%v1, %T1], [%v2, %F1] + %And = and i32 %A, 1 + %cond = trunc i32 %And to i1 + br i1 %cond, label %T2, label %F2 + +T2: + call void @f3() + ret i32 %B + +F2: + ret i32 %B +} + +; trunc(select) should be simplified +define i32 @test2(i1 %cond0) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: br i1 [[COND0:%.*]], label [[T1:%.*]], label [[F1:%.*]] +; CHECK: T1: +; CHECK-NEXT: [[V1:%.*]] = call i32 @f1() +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: F1: +; CHECK-NEXT: [[V2:%.*]] = call i32 @f2() +; CHECK-NEXT: br label [[MERGE]] +; CHECK: Merge: +; CHECK-NEXT: [[A:%.*]] = phi i1 [ true, [[T1]] ], [ false, [[F1]] ] +; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[V1]], [[T1]] ], [ [[V2]], [[F1]] ] +; CHECK-NEXT: [[A2:%.*]] = xor i1 [[A]], true +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[A2]], i32 10, i32 0 +; CHECK-NEXT: [[COND:%.*]] = trunc i32 [[SEL]] to i1 +; CHECK-NEXT: br i1 [[COND]], label [[T2:%.*]], label [[F2:%.*]] +; CHECK: T2: +; CHECK-NEXT: call void @f3() +; CHECK-NEXT: ret i32 [[B]] +; CHECK: F2: +; CHECK-NEXT: ret i32 [[B]] +; + br i1 %cond0, label %T1, label %F1 +T1: + %v1 = call i32 @f1() + br label %Merge + +F1: + %v2 = call i32 @f2() + br label %Merge + +Merge: + %A = phi i1 [1, %T1], [0, %F1] + %B = phi i32 [%v1, %T1], [%v2, %F1] + %A2 = xor i1 %A, 1 + %sel = select i1 %A2, i32 10, i32 0 + %cond = trunc i32 %sel to i1 + br i1 %cond, label %T2, label %F2 + +T2: + call void @f3() + ret i32 %B + +F2: + ret i32 %B +} + From e734e8286b4b521d829aaddb6d1cbbd264953625 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 4 Aug 2020 18:26:56 +0900 Subject: [PATCH 290/600] [JumpThreading] Remove cast's constraint As discussed in D84949, this removes the constraint to cast since it does not cause compile time degradation. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D85188 --- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 6 +---- .../JumpThreading/threadable-edge-cast.ll | 24 +++++-------------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index f42d4841f7939..e6d261fa9aff5 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -674,13 +674,9 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessorsImpl( return !Result.empty(); } - // Handle Cast instructions. Only see through Cast when the source operand is - // PHI, Cmp, or Freeze to save the compilation time. + // Handle Cast instructions. if (CastInst *CI = dyn_cast(I)) { Value *Source = CI->getOperand(0); - if (!isa(Source) && !isa(Source) && - !isa(Source)) - return false; ComputeValueKnownInPredecessorsImpl(Source, BB, Result, Preference, RecursionSet, CxtI); if (Result.empty()) diff --git a/llvm/test/Transforms/JumpThreading/threadable-edge-cast.ll b/llvm/test/Transforms/JumpThreading/threadable-edge-cast.ll index 71c34dea3a069..ef1e8f28d14f1 100644 --- a/llvm/test/Transforms/JumpThreading/threadable-edge-cast.ll +++ b/llvm/test/Transforms/JumpThreading/threadable-edge-cast.ll @@ -11,20 +11,14 @@ define i32 @test(i1 %cond0) { ; CHECK-NEXT: br i1 [[COND0:%.*]], label [[T1:%.*]], label [[F1:%.*]] ; CHECK: T1: ; CHECK-NEXT: [[V1:%.*]] = call i32 @f1() -; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK-NEXT: br label [[F2:%.*]] ; CHECK: F1: ; CHECK-NEXT: [[V2:%.*]] = call i32 @f2() -; CHECK-NEXT: br label [[MERGE]] -; CHECK: Merge: +; CHECK-NEXT: br label [[F2]] +; CHECK: F2: ; CHECK-NEXT: [[A:%.*]] = phi i32 [ 10, [[T1]] ], [ 0, [[F1]] ] ; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[V1]], [[T1]] ], [ [[V2]], [[F1]] ] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[A]], 1 -; CHECK-NEXT: [[COND:%.*]] = trunc i32 [[AND]] to i1 -; CHECK-NEXT: br i1 [[COND]], label [[T2:%.*]], label [[F2:%.*]] -; CHECK: T2: -; CHECK-NEXT: call void @f3() -; CHECK-NEXT: ret i32 [[B]] -; CHECK: F2: ; CHECK-NEXT: ret i32 [[B]] ; br i1 %cond0, label %T1, label %F1 @@ -57,21 +51,15 @@ define i32 @test2(i1 %cond0) { ; CHECK-NEXT: br i1 [[COND0:%.*]], label [[T1:%.*]], label [[F1:%.*]] ; CHECK: T1: ; CHECK-NEXT: [[V1:%.*]] = call i32 @f1() -; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK-NEXT: br label [[F2:%.*]] ; CHECK: F1: ; CHECK-NEXT: [[V2:%.*]] = call i32 @f2() -; CHECK-NEXT: br label [[MERGE]] -; CHECK: Merge: +; CHECK-NEXT: br label [[F2]] +; CHECK: F2: ; CHECK-NEXT: [[A:%.*]] = phi i1 [ true, [[T1]] ], [ false, [[F1]] ] ; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[V1]], [[T1]] ], [ [[V2]], [[F1]] ] ; CHECK-NEXT: [[A2:%.*]] = xor i1 [[A]], true ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[A2]], i32 10, i32 0 -; CHECK-NEXT: [[COND:%.*]] = trunc i32 [[SEL]] to i1 -; CHECK-NEXT: br i1 [[COND]], label [[T2:%.*]], label [[F2:%.*]] -; CHECK: T2: -; CHECK-NEXT: call void @f3() -; CHECK-NEXT: ret i32 [[B]] -; CHECK: F2: ; CHECK-NEXT: ret i32 [[B]] ; br i1 %cond0, label %T1, label %F1 From 4be13b15d69d9d70506277eb1900eadccd75f608 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 4 Aug 2020 11:19:17 +0100 Subject: [PATCH 291/600] [SVE] Replace remaining _MERGE_OP1 nodes with _PRED variants. This is the final bit of work to relax the register allocation requirements when code generating normal LLVM IR, which rarely care about the result of inactive lanes. By using _PRED nodes we can make better use of SVE's reversed instructions. Also removes a redundant parameter from the min/max tests. Differential Revision: https://reviews.llvm.org/D85142 --- .../Target/AArch64/AArch64ISelLowering.cpp | 68 ++++--- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 16 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 63 +++--- llvm/lib/Target/AArch64/SVEInstrFormats.td | 18 +- .../CodeGen/AArch64/llvm-ir-to-intrinsic.ll | 190 +++++++++++++++--- 5 files changed, 257 insertions(+), 98 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8b533e97b928d..402d7656ca215 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1397,14 +1397,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) MAKE_CASE(AArch64ISD::ADD_PRED) MAKE_CASE(AArch64ISD::SDIV_PRED) + MAKE_CASE(AArch64ISD::SHL_PRED) + MAKE_CASE(AArch64ISD::SMAX_PRED) + MAKE_CASE(AArch64ISD::SMIN_PRED) + MAKE_CASE(AArch64ISD::SRA_PRED) + MAKE_CASE(AArch64ISD::SRL_PRED) MAKE_CASE(AArch64ISD::UDIV_PRED) - MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1) - MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1) - MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1) - MAKE_CASE(AArch64ISD::UMAX_MERGE_OP1) - MAKE_CASE(AArch64ISD::SHL_MERGE_OP1) - MAKE_CASE(AArch64ISD::SRL_MERGE_OP1) - MAKE_CASE(AArch64ISD::SRA_MERGE_OP1) + MAKE_CASE(AArch64ISD::UMAX_PRED) + MAKE_CASE(AArch64ISD::UMIN_PRED) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) MAKE_CASE(AArch64ISD::ADC) MAKE_CASE(AArch64ISD::SBC) @@ -3540,13 +3540,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::UDIV: return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED); case ISD::SMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED); case ISD::UMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED); case ISD::SMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED); case ISD::UMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED); case ISD::SRA: case ISD::SRL: case ISD::SHL: @@ -8914,7 +8914,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, case ISD::SHL: if (VT.isScalableVector()) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_MERGE_OP1); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED); if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), @@ -8926,8 +8926,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, case ISD::SRA: case ISD::SRL: if (VT.isScalableVector()) { - unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_MERGE_OP1 - : AArch64ISD::SRL_MERGE_OP1; + unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED + : AArch64ISD::SRL_PRED; return LowerToPredicatedOp(Op, DAG, Opc); } @@ -11940,6 +11940,25 @@ static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, Zero); } +// If a merged operation has no inactive lanes we can relax it to a predicated +// or unpredicated operation, which potentially allows better isel (perhaps +// using immediate forms) or relaxing register reuse requirements. +static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc, + SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!"); + assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!"); + SDValue Pg = N->getOperand(1); + + // ISD way to specify an all active predicate. + if ((Pg.getOpcode() == AArch64ISD::PTRUE) && + (Pg.getConstantOperandVal(0) == AArch64SVEPredPattern::all)) + return DAG.getNode(PredOpc, SDLoc(N), N->getValueType(0), Pg, + N->getOperand(2), N->getOperand(3)); + + // FUTURE: SplatVector(true) + return SDValue(); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -12018,26 +12037,19 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_sve_ext: return LowerSVEIntrinsicEXT(N, DAG); case Intrinsic::aarch64_sve_smin: - return DAG.getNode(AArch64ISD::SMIN_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG); case Intrinsic::aarch64_sve_umin: - return DAG.getNode(AArch64ISD::UMIN_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG); case Intrinsic::aarch64_sve_smax: - return DAG.getNode(AArch64ISD::SMAX_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG); case Intrinsic::aarch64_sve_umax: - return DAG.getNode(AArch64ISD::UMAX_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG); case Intrinsic::aarch64_sve_lsl: - return DAG.getNode(AArch64ISD::SHL_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG); case Intrinsic::aarch64_sve_lsr: - return DAG.getNode(AArch64ISD::SRL_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG); case Intrinsic::aarch64_sve_asr: - return DAG.getNode(AArch64ISD::SRA_MERGE_OP1, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); + return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG); case Intrinsic::aarch64_sve_cmphs: if (!N->getOperand(2).getValueType().isFloatingPoint()) return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index a793fb6bb4625..c346debb823aa 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -72,7 +72,7 @@ enum NodeType : unsigned { ADC, SBC, // adc, sbc instructions - // Arithmetic instructions + // Predicated instructions where inactive lanes produce undefined results. ADD_PRED, FADD_PRED, FDIV_PRED, @@ -80,14 +80,14 @@ enum NodeType : unsigned { FMUL_PRED, FSUB_PRED, SDIV_PRED, + SHL_PRED, + SMAX_PRED, + SMIN_PRED, + SRA_PRED, + SRL_PRED, UDIV_PRED, - SMIN_MERGE_OP1, - UMIN_MERGE_OP1, - SMAX_MERGE_OP1, - UMAX_MERGE_OP1, - SHL_MERGE_OP1, - SRL_MERGE_OP1, - SRA_MERGE_OP1, + UMAX_PRED, + UMIN_PRED, SETCC_MERGE_ZERO, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 7c39268a4441f..cf34e5f6470da 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -174,22 +174,20 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [ // Predicated operations with the result of inactive lanes being unspecified. def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>; +def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>; def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>; def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>; def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>; def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>; +def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>; +def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>; def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; +def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; +def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; - -// Merging op1 into the inactive lanes. -def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>; -def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>; -def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>; -def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>; -def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>; -def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>; -def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>; +def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; +def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; @@ -287,10 +285,10 @@ let Predicates = [HasSVE] in { defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>; defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>; - defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_m1>; - defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_m1>; - defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_m1>; - defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_m1>; + defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>; + defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>; + defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>; + defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>; defm MUL_ZI : sve_int_arith_imm2<"mul", mul>; defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; @@ -343,12 +341,17 @@ let Predicates = [HasSVE] in { defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>; - defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_m1>; - defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_m1>; - defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_m1>; - defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_m1>; - defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>; - defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>; + defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>; + defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>; + + defm SMAX_ZPZZ : sve_int_bin_pred_bhsd; + defm UMAX_ZPZZ : sve_int_bin_pred_bhsd; + defm SMIN_ZPZZ : sve_int_bin_pred_bhsd; + defm UMIN_ZPZZ : sve_int_bin_pred_bhsd; defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>; @@ -1313,9 +1316,9 @@ multiclass sve_prefetch; // Unpredicated shifts - defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_m1>; - defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_m1>; - defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_m1>; + defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>; + defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>; + defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>; defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">; defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">; @@ -1328,19 +1331,23 @@ multiclass sve_prefetch; let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { - defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; } - defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ">; - defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ">; - defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_m1, "LSLR_ZPmZ">; + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">; defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>; defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>; defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>; + defm ASR_ZPZZ : sve_int_bin_pred_bhsd; + defm LSR_ZPZZ : sve_int_bin_pred_bhsd; + defm LSL_ZPZZ : sve_int_bin_pred_bhsd; + defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 1f067908de6b4..c5dd327d2aef4 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2382,11 +2382,19 @@ multiclass sve_int_bin_pred_arit_0 opc, string asm, string Ps, def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_bin_pred_arit_1 opc, string asm, SDPatternOperator op> { - def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>; - def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>; - def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>; - def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>; +multiclass sve_int_bin_pred_arit_1 opc, string asm, string Ps, + SDPatternOperator op, + DestructiveInstTypeEnum flags> { + let DestructiveInstType = flags in { + def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>, + SVEPseudo2Instr; + def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>, + SVEPseudo2Instr; + def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>, + SVEPseudo2Instr; + } def : SVE_3_Op_Pat(NAME # _B)>; def : SVE_3_Op_Pat(NAME # _H)>; diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll index 816465f9eaa17..9f3a77c8fe92b 100644 --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -182,7 +182,7 @@ define @urem_i64( %a, %b ; SMIN ; -define @smin_i8( %a, %b, %c) { +define @smin_i8( %a, %b) { ; CHECK-LABEL: smin_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -193,7 +193,7 @@ define @smin_i8( %a, %b, ret %min } -define @smin_i16( %a, %b, %c) { +define @smin_i16( %a, %b) { ; CHECK-LABEL: smin_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -204,7 +204,7 @@ define @smin_i16( %a, %b ret %min } -define @smin_i32( %a, %b, %c) { +define @smin_i32( %a, %b) { ; CHECK-LABEL: smin_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -215,7 +215,7 @@ define @smin_i32( %a, %b ret %min } -define @smin_i64( %a, %b, %c) { +define @smin_i64( %a, %b) { ; CHECK-LABEL: smin_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -226,7 +226,7 @@ define @smin_i64( %a, %b ret %min } -define @smin_split_i8( %a, %b, %c) { +define @smin_split_i8( %a, %b) { ; CHECK-LABEL: smin_split_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -238,7 +238,7 @@ define @smin_split_i8( %a, %min } -define @smin_split_i16( %a, %b, %c) { +define @smin_split_i16( %a, %b) { ; CHECK-LABEL: smin_split_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -252,7 +252,7 @@ define @smin_split_i16( %a, %min } -define @smin_split_i32( %a, %b, %c) { +define @smin_split_i32( %a, %b) { ; CHECK-LABEL: smin_split_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -264,7 +264,7 @@ define @smin_split_i32( %a, %min } -define @smin_split_i64( %a, %b, %c) { +define @smin_split_i64( %a, %b) { ; CHECK-LABEL: smin_split_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -276,7 +276,7 @@ define @smin_split_i64( %a, %min } -define @smin_promote_i8( %a, %b, %c) { +define @smin_promote_i8( %a, %b) { ; CHECK-LABEL: smin_promote_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -289,7 +289,7 @@ define @smin_promote_i8( %a, %min } -define @smin_promote_i16( %a, %b, %c) { +define @smin_promote_i16( %a, %b) { ; CHECK-LABEL: smin_promote_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -302,7 +302,7 @@ define @smin_promote_i16( %a, %min } -define @smin_promote_i32( %a, %b, %c) { +define @smin_promote_i32( %a, %b) { ; CHECK-LABEL: smin_promote_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -319,7 +319,7 @@ define @smin_promote_i32( %a, @umin_i8( %a, %b, %c) { +define @umin_i8( %a, %b) { ; CHECK-LABEL: umin_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -330,7 +330,7 @@ define @umin_i8( %a, %b, ret %min } -define @umin_i16( %a, %b, %c) { +define @umin_i16( %a, %b) { ; CHECK-LABEL: umin_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -341,7 +341,7 @@ define @umin_i16( %a, %b ret %min } -define @umin_i32( %a, %b, %c) { +define @umin_i32( %a, %b) { ; CHECK-LABEL: umin_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -352,7 +352,7 @@ define @umin_i32( %a, %b ret %min } -define @umin_i64( %a, %b, %c) { +define @umin_i64( %a, %b) { ; CHECK-LABEL: umin_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -363,7 +363,7 @@ define @umin_i64( %a, %b ret %min } -define @umin_split_i64( %a, %b, %c) { +define @umin_split_i64( %a, %b) { ; CHECK-LABEL: umin_split_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -375,7 +375,7 @@ define @umin_split_i64( %a, %min } -define @umin_promote_i8( %a, %b, %c) { +define @umin_promote_i8( %a, %b) { ; CHECK-LABEL: umin_promote_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -392,7 +392,7 @@ define @umin_promote_i8( %a, @smax_i8( %a, %b, %c) { +define @smax_i8( %a, %b) { ; CHECK-LABEL: smax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -403,7 +403,7 @@ define @smax_i8( %a, %b, ret %max } -define @smax_i16( %a, %b, %c) { +define @smax_i16( %a, %b) { ; CHECK-LABEL: smax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -414,7 +414,7 @@ define @smax_i16( %a, %b ret %max } -define @smax_i32( %a, %b, %c) { +define @smax_i32( %a, %b) { ; CHECK-LABEL: smax_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -425,7 +425,7 @@ define @smax_i32( %a, %b ret %max } -define @smax_i64( %a, %b, %c) { +define @smax_i64( %a, %b) { ; CHECK-LABEL: smax_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -436,7 +436,7 @@ define @smax_i64( %a, %b ret %max } -define @smax_split_i32( %a, %b, %c) { +define @smax_split_i32( %a, %b) { ; CHECK-LABEL: smax_split_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -448,7 +448,7 @@ define @smax_split_i32( %a, %max } -define @smax_promote_i16( %a, %b, %c) { +define @smax_promote_i16( %a, %b) { ; CHECK-LABEL: smax_promote_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -465,7 +465,7 @@ define @smax_promote_i16( %a, @umax_i8( %a, %b, %c) { +define @umax_i8( %a, %b) { ; CHECK-LABEL: umax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -476,7 +476,7 @@ define @umax_i8( %a, %b, ret %max } -define @umax_i16( %a, %b, %c) { +define @umax_i16( %a, %b) { ; CHECK-LABEL: umax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -487,7 +487,7 @@ define @umax_i16( %a, %b ret %max } -define @umax_i32( %a, %b, %c) { +define @umax_i32( %a, %b) { ; CHECK-LABEL: umax_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -498,7 +498,7 @@ define @umax_i32( %a, %b ret %max } -define @umax_i64( %a, %b, %c) { +define @umax_i64( %a, %b) { ; CHECK-LABEL: umax_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -509,7 +509,7 @@ define @umax_i64( %a, %b ret %max } -define @umax_split_i16( %a, %b, %c) { +define @umax_split_i16( %a, %b) { ; CHECK-LABEL: umax_split_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -521,7 +521,7 @@ define @umax_split_i16( %a, %max } -define @umax_promote_i32( %a, %b, %c) { +define @umax_promote_i32( %a, %b) { ; CHECK-LABEL: umax_promote_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -601,6 +601,50 @@ define @asr_promote_i32( %a, %shr } +; +; ASRR +; + +define @asrr_i8( %a, %b){ +; CHECK-LABEL: asrr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shr = ashr %b, %a + ret %shr +} + +define @asrr_i16( %a, %b){ +; CHECK-LABEL: asrr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = ashr %b, %a + ret %shr +} + +define @asrr_i32( %a, %b){ +; CHECK-LABEL: asrr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shr = ashr %b, %a + ret %shr +} + +define @asrr_i64( %a, %b){ +; CHECK-LABEL: asrr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: asrr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = ashr %b, %a + ret %shr +} + ; ; LSL ; @@ -667,6 +711,50 @@ define @lsl_promote_i16( %a, %shl } +; +; LSLR +; + +define @lslr_i8( %a, %b){ +; CHECK-LABEL: lslr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: lslr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shl = shl %b, %a + ret %shl +} + +define @lslr_i16( %a, %b){ +; CHECK-LABEL: lslr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lslr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shl = shl %b, %a + ret %shl +} + +define @lslr_i32( %a, %b){ +; CHECK-LABEL: lslr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shl = shl %b, %a + ret %shl +} + +define @lslr_i64( %a, %b){ +; CHECK-LABEL: lslr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shl = shl %b, %a + ret %shl +} + ; ; LSR ; @@ -734,6 +822,50 @@ define @lsr_split_i32( %a, %shr } +; +; LSRR +; + +define @lsrr_i8( %a, %b){ +; CHECK-LABEL: lsrr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: lsrr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %shr = lshr %b, %a + ret %shr +} + +define @lsrr_i16( %a, %b){ +; CHECK-LABEL: lsrr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %shr = lshr %b, %a + ret %shr +} + +define @lsrr_i32( %a, %b){ +; CHECK-LABEL: lsrr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: lsrr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %shr = lshr %b, %a + ret %shr +} + +define @lsrr_i64( %a, %b){ +; CHECK-LABEL: lsrr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: lsrr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %shr = lshr %b, %a + ret %shr +} + ; ; CMP ; From 79b44a4d470041acf202027054ba86e935d86aa1 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Tue, 4 Aug 2020 16:47:38 +0800 Subject: [PATCH 292/600] [YAMLTraits] Fix mapping value that followed by comments. When mapping an optional value, if the value is and followed by comments, there will be a parsing error. This patch helps fix this issue. e.g., When mapping the following YAML, ``` Sections: - Name: blah Type: SHT_foo Flags: [[FLAGS=]] ## some comments. ``` the raw value of `ScalarNode` is " " rather than "". We need to remove the spaces. Differential Revision: https://reviews.llvm.org/D85180 --- llvm/include/llvm/Support/YAMLTraits.h | 4 +++- llvm/test/tools/yaml2obj/ELF/none-value.yaml | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index e52bf7892d711..acb1d61cf569d 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -1629,7 +1629,9 @@ void IO::processKeyWithDefault(const char *Key, Optional &Val, bool IsNone = false; if (!outputting()) if (auto *Node = dyn_cast(((Input *)this)->getCurrentNode())) - IsNone = Node->getRawValue() == ""; + // We use rtrim to ignore possible white spaces that might exist when a + // comment is present on the same line. + IsNone = Node->getRawValue().rtrim(' ') == ""; if (IsNone) Val = DefaultValue; diff --git a/llvm/test/tools/yaml2obj/ELF/none-value.yaml b/llvm/test/tools/yaml2obj/ELF/none-value.yaml index 786a9b53aba78..7993e54c53cfc 100644 --- a/llvm/test/tools/yaml2obj/ELF/none-value.yaml +++ b/llvm/test/tools/yaml2obj/ELF/none-value.yaml @@ -21,6 +21,7 @@ FileHeader: Sections: - Name: .bar Type: SHT_PROGBITS + Flags: [[TEST=]] ## Comment Offset: [[TEST=]] Address: [[TEST=]] Content: [[TEST=]] From fd6584a22043b254a323635c142b28ce80ae5b5b Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 4 Aug 2020 10:58:47 +0100 Subject: [PATCH 293/600] [AArch64][SVE] Fix CFA calculation in presence of SVE objects. The CFA is calculated as (SP/FP + offset), but when there are SVE objects on the stack the SP offset is partly scalable and should instead be expressed as the DWARF expression: SP + offset + scalable_offset * VG where VG is the Vector Granule register, containing the number of 64bits 'granules' in a scalable vector. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D84043 --- llvm/include/llvm/MC/MCDwarf.h | 15 +- .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 1 + .../Target/AArch64/AArch64FrameLowering.cpp | 74 ++++++- .../lib/Target/AArch64/AArch64FrameLowering.h | 7 + .../lib/Target/AArch64/AArch64RegisterInfo.td | 3 + llvm/lib/Target/AArch64/AArch64StackOffset.h | 12 ++ llvm/test/CodeGen/AArch64/framelayout-sve.mir | 186 +++++++++++++++--- llvm/test/CodeGen/AArch64/sve-trunc.ll | 2 +- 8 files changed, 267 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index e3cea0ae64cf3..70da5f76e7665 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -467,10 +467,12 @@ class MCCFIInstruction { unsigned Register2; }; std::vector Values; + std::string Comment; - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V) + MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V, + StringRef Comment = "") : Operation(Op), Label(L), Register(R), Offset(O), - Values(V.begin(), V.end()) { + Values(V.begin(), V.end()), Comment(Comment) { assert(Op != OpRegister); } @@ -570,8 +572,9 @@ class MCCFIInstruction { /// .cfi_escape Allows the user to add arbitrary bytes to the unwind /// info. - static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals) { - return MCCFIInstruction(OpEscape, L, 0, 0, Vals); + static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, + StringRef Comment = "") { + return MCCFIInstruction(OpEscape, L, 0, 0, Vals, Comment); } /// A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE @@ -606,6 +609,10 @@ class MCCFIInstruction { assert(Operation == OpEscape); return StringRef(&Values[0], Values.size()); } + + StringRef getComment() const { + return Comment; + } }; struct MCDwarfFrameInfo { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index d81a9be26d39b..b6a9a95683603 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -241,6 +241,7 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { OutStreamer->emitCFIGnuArgsSize(Inst.getOffset()); break; case MCCFIInstruction::OpEscape: + OutStreamer->AddComment(Inst.getComment()); OutStreamer->emitCFIEscape(Inst.getValues()); break; case MCCFIInstruction::OpRestore: diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 4789a9f02937a..177d5e24fdb3f 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -148,6 +148,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -399,6 +400,64 @@ static bool ShouldSignReturnAddress(MachineFunction &MF) { return false; } +// Convenience function to create a DWARF expression for +// Expr + NumBytes + NumVGScaledBytes * AArch64::VG +static void appendVGScaledOffsetExpr(SmallVectorImpl &Expr, + int NumBytes, int NumVGScaledBytes, unsigned VG, + llvm::raw_string_ostream &Comment) { + uint8_t buffer[16]; + + if (NumBytes) { + Expr.push_back(dwarf::DW_OP_consts); + Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer)); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); + } + + if (NumVGScaledBytes) { + Expr.push_back((uint8_t)dwarf::DW_OP_consts); + Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer)); + + Expr.push_back((uint8_t)dwarf::DW_OP_bregx); + Expr.append(buffer, buffer + encodeULEB128(VG, buffer)); + Expr.push_back(0); + + Expr.push_back((uint8_t)dwarf::DW_OP_mul); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + + Comment << (NumVGScaledBytes < 0 ? " - " : " + ") + << std::abs(NumVGScaledBytes) << " * VG"; + } +} + +// Creates an MCCFIInstruction: +// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr } +MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP( + const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const { + int64_t NumBytes, NumVGScaledBytes; + OffsetFromSP.getForDwarfOffset(NumBytes, NumVGScaledBytes); + + std::string CommentBuffer = "sp"; + llvm::raw_string_ostream Comment(CommentBuffer); + + // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG) + SmallString<64> Expr; + Expr.push_back(dwarf::DW_OP_breg0 + /*SP*/ 31); + Expr.push_back(0); + appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, + TRI.getDwarfRegNum(AArch64::VG, true), Comment); + + // Wrap this into DW_CFA_def_cfa. + SmallString<64> DefCfaExpr; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + uint8_t buffer[16]; + DefCfaExpr.append(buffer, + buffer + encodeULEB128(Expr.size(), buffer)); + DefCfaExpr.append(Expr.str()); + return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), + Comment.str()); +} + void AArch64FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { MachineFunction &MF = *MBB.getParent(); @@ -1383,9 +1442,18 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { - // Encode the stack size of the leaf function. - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + unsigned CFIIndex; + if (SVEStackSize) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + StackOffset TotalSize = + SVEStackSize + StackOffset((int64_t)MFI.getStackSize(), MVT::i8); + CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize)); + } else { + // Encode the stack size of the leaf function. + CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + } BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 444740cb50ab9..753593df2b4dc 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -18,6 +18,8 @@ namespace llvm { +class MCCFIInstruction; + class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() @@ -119,6 +121,11 @@ class AArch64FrameLowering : public TargetFrameLowering { int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF, int &MinCSFrameIndex, int &MaxCSFrameIndex) const; + MCCFIInstruction + createDefCFAExpressionFromSP(const TargetRegisterInfo &TRI, + const StackOffset &OffsetFromSP) const; + MCCFIInstruction createCfaOffset(const MCRegisterInfo &MRI, unsigned DwarfReg, + const StackOffset &OffsetFromDefCFA) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, unsigned StackBumpBytes) const; }; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index bd05c56009a1d..54b351fda053b 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -133,6 +133,9 @@ def NZCV : AArch64Reg<0, "nzcv">; // First fault status register def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>; +// Purely virtual Vector Granule (VG) Dwarf register +def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>; + // GPR register classes with the intersections of GPR32/GPR32sp and // GPR64/GPR64sp for use by the coalescer. def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> { diff --git a/llvm/lib/Target/AArch64/AArch64StackOffset.h b/llvm/lib/Target/AArch64/AArch64StackOffset.h index 6fa1c744f77e2..24751a81797d3 100644 --- a/llvm/lib/Target/AArch64/AArch64StackOffset.h +++ b/llvm/lib/Target/AArch64/AArch64StackOffset.h @@ -123,6 +123,18 @@ class StackOffset { } } + void getForDwarfOffset(int64_t &ByteSized, int64_t &VGSized) const { + assert(isValid() && "Invalid frame offset"); + + // VGSized offsets are divided by '2', because the VG register is the + // the number of 64bit granules as opposed to 128bit vector chunks, + // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. + // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. + // VG = n * 2 and the dwarf offset must be VG * 8 bytes. + ByteSized = Bytes; + VGSized = ScalableBytes / 2; + } + /// Returns whether the offset is known zero. explicit operator bool() const { return Bytes || ScalableBytes; } diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir index 668b243dd79e0..9e2077855c11a 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -1,4 +1,8 @@ # RUN: llc -mattr=+sve -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve -start-before=prologepilog %s -o - | FileCheck %s --check-prefix=ASM +# RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve -start-before=prologepilog %s -filetype=obj -o %t +# RUN: llvm-objdump --dwarf=frames %t | FileCheck %s --check-prefix=UNWINDINFO +# RUN: rm -rf %t # # Test allocation and deallocation of SVE objects on the stack, # as well as using a combination of scalable and non-scalable @@ -23,19 +27,19 @@ # --- | - define void @test_allocate_sve() nounwind { entry: unreachable } - define void @test_allocate_sve_gpr_callee_saves() nounwind { entry: unreachable } - define void @test_allocate_sve_gpr_realigned() nounwind { entry: unreachable } - define void @test_address_sve() nounwind { entry: unreachable } - define void @test_address_sve_fp() nounwind { entry: unreachable } - define void @test_stack_arg_sve() nounwind { entry: unreachable } - define void @test_address_sve_out_of_range() nounwind { entry: unreachable } - define void @test_address_gpr_vla() nounwind { entry: unreachable } - define aarch64_sve_vector_pcs void @save_restore_pregs_sve() nounwind { entry: unreachable } - define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable } - define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable } - define aarch64_sve_vector_pcs void @save_restore_sve_realign() nounwind { entry: unreachable } - define aarch64_sve_vector_pcs void @frame_layout() nounwind { entry: unreachable } + define void @test_allocate_sve() { entry: unreachable } + define void @test_allocate_sve_gpr_callee_saves() { entry: unreachable } + define void @test_allocate_sve_gpr_realigned() { entry: unreachable } + define void @test_address_sve() { entry: unreachable } + define void @test_address_sve_fp() { entry: unreachable } + define void @test_stack_arg_sve() { entry: unreachable } + define void @test_address_sve_out_of_range() { entry: unreachable } + define void @test_address_gpr_vla() { entry: unreachable } + define aarch64_sve_vector_pcs void @save_restore_pregs_sve() { entry: unreachable } + define aarch64_sve_vector_pcs void @save_restore_zregs_sve() { entry: unreachable } + define aarch64_sve_vector_pcs void @save_restore_sve() { entry: unreachable } + define aarch64_sve_vector_pcs void @save_restore_sve_realign() { entry: unreachable } + define aarch64_sve_vector_pcs void @frame_layout() { entry: unreachable } ... # +----------+ @@ -54,11 +58,19 @@ # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 # CHECK-NEXT: RET_ReallyLR + +# ASM-LABEL: test_allocate_sve: +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG +# ASM-NEXT: .cfi_offset w29, -16 +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: test_allocate_sve stack: - { id: 0, stack-id: sve-vec, size: 18, alignment: 2 } @@ -85,6 +97,8 @@ body: | # CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-COUNT-4: frame-setup CFI_INSTRUCTION +# # CHECK-NEXT: $x20 = IMPLICIT_DEF # CHECK-NEXT: $x21 = IMPLICIT_DEF # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2 @@ -92,6 +106,17 @@ body: | # CHECK-NEXT: $x21, $x20 = frame-destroy LDPXi $sp, 2 # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 32 # CHECK-NEXT: RET_ReallyLR +# +# ASM-LABEL: test_allocate_sve_gpr_callee_saves: +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG +# ASM-NEXT: .cfi_offset w20, -8 +# ASM-NEXT: .cfi_offset w21, -16 +# ASM-NEXT: .cfi_offset w29, -32 +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg20 -8 +# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 name: test_allocate_sve_gpr_callee_saves stack: - { id: 0, stack-id: sve-vec, size: 18, alignment: 2 } @@ -120,9 +145,20 @@ body: | # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: $sp = ANDXri killed $[[TMP]] +# CHECK-COUNT-3: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR +# +# ASM-LABEL: test_allocate_sve_gpr_realigned: +# ASM: .cfi_def_cfa w29, 16 +# ASM-NEXT: .cfi_offset w30, -8 +# ASM-NEXT: .cfi_offset w29, -16 +# +# UNWINDINFO: DW_CFA_def_cfa: reg29 +16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 + name: test_allocate_sve_gpr_realigned stack: - { id: 0, stack-id: sve-vec, size: 18, alignment: 2 } @@ -149,6 +185,7 @@ body: | # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16 # CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2 @@ -161,6 +198,14 @@ body: | # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 # CHECK-NEXT: RET_ReallyLR +# +# ASM-LABEL: test_address_sve: +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG +# ASM-NEXT: .cfi_offset w29, -16 +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 + name: test_address_sve frameInfo: maxAlignment: 16 @@ -199,6 +244,7 @@ body: | # CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-COUNT-3: frame-setup CFI_INSTRUCTION # CHECK-NEXT: STR_ZXI $z0, $fp, -1 # CHECK-NEXT: STR_ZXI $z1, $fp, -2 @@ -208,6 +254,15 @@ body: | # CHECK: $sp = frame-destroy ADDXri $sp, 16, 0 # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR +# +# ASM-LABEL: test_address_sve_fp: +# ASM: .cfi_def_cfa w29, 16 +# ASM-NEXT: .cfi_offset w30, -8 +# ASM-NEXT: .cfi_offset w29, -16 +# +# UNWINDINFO: DW_CFA_def_cfa: reg29 +16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: test_address_sve_fp frameInfo: maxAlignment: 16 @@ -244,6 +299,7 @@ body: | # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 +# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION # CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1 # CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4 @@ -252,6 +308,14 @@ body: | # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 # CHECK-NEXT: RET_ReallyLR +# +# ASM-LABEL: test_stack_arg_sve: +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG +# ASM-NEXT: .cfi_offset w29, -16 +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 + name: test_stack_arg_sve fixedStack: - { id: 0, stack-id: default, size: 16, alignment: 16, offset: 0 } @@ -292,6 +356,7 @@ body: | # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 +# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION # CHECK-NEXT: $[[TMP2:x[0-9]+]] = ADDVL_XXI $sp, 1 # CHECK-NEXT: STR_ZXI $z0, killed $[[TMP2]], 255 @@ -310,6 +375,13 @@ body: | # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 9 # CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16 # CHECK-NEXT: RET_ReallyLR +# +# ASM-LABEL: test_address_sve_out_of_range: +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x88, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 2056 * VG +# ASM-NEXT: .cfi_offset w29, -16 +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +2056, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: test_address_sve_out_of_range frameInfo: maxAlignment: 16 @@ -344,6 +416,17 @@ body: | # CHECK: bb.0.entry: # CHECK: STRXui $xzr, $x19, 0 # CHECK: RET_ReallyLR +# +# ASM-LABEL: test_address_gpr_vla: +# ASM: .cfi_def_cfa w29, 32 +# ASM-NEXT: .cfi_offset w19, -16 +# ASM-NEXT: .cfi_offset w30, -24 +# ASM-NEXT: .cfi_offset w29, -32 +# +# UNWINDINFO: DW_CFA_def_cfa: reg29 +32 +# UNWINDINFO-NEXT: DW_CFA_offset: reg19 -16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -24 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 name: test_address_gpr_vla frameInfo: maxAlignment: 16 @@ -366,6 +449,7 @@ body: | # CHECK: frame-setup STR_PXI killed $p5, $sp, 6 # CHECK: frame-setup STR_PXI killed $p4, $sp, 7 # CHECK: $sp = frame-setup SUBXri $sp, 32, 0 +# CHECK-COUNT-5: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 # CHECK: $p6 = frame-destroy LDR_PXI $sp, 5 @@ -373,6 +457,15 @@ body: | # CHECK: $p4 = frame-destroy LDR_PXI $sp, 7 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1 # CHECK: RET_ReallyLR +# +# ASM-LABEL: save_restore_pregs_sve: +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 8 * VG +# ASM-COUNT-3: .cfi_offset +# ASM-NEXT: .cfi_offset w29, -16 +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-COUNT-3: DW_CFA_offset +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: save_restore_pregs_sve stack: - { id: 0, stack-id: default, size: 32, alignment: 16 } @@ -387,18 +480,29 @@ body: | --- ... # CHECK-LABEL: name: save_restore_zregs_sve -# CHECK: $sp = frame-setup ADDVL_XXI $sp, -3 -# CHECK: frame-setup STR_ZXI killed $z10, $sp, 0 -# CHECK: frame-setup STR_ZXI killed $z9, $sp, 1 -# CHECK: frame-setup STR_ZXI killed $z8, $sp, 2 -# CHECK: $sp = frame-setup SUBXri $sp, 32, 0 - -# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 -# CHECK: $z10 = frame-destroy LDR_ZXI $sp, 0 -# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 1 -# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 2 -# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 3 -# CHECK: RET_ReallyLR +# CHECK: $sp = frame-setup STRXpre killed $fp, $sp, -16 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3 +# CHECK-NEXT: frame-setup STR_ZXI killed $z10, $sp, 0 +# CHECK-NEXT: frame-setup STR_ZXI killed $z9, $sp, 1 +# CHECK-NEXT: frame-setup STR_ZXI killed $z8, $sp, 2 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 32, 0 +# CHECK-COUNT-5: frame-setup CFI_INSTRUCTION + +# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 +# CHECK-NEXT: $z10 = frame-destroy LDR_ZXI $sp, 0 +# CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 +# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 +# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3 +# CHECK-NEXT: $sp, $fp = frame-destroy LDRXpost $sp, 16 +# CHECK-NEXT: RET_ReallyLR +# +# ASM-LABEL: save_restore_zregs_sve: +# ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 24 * VG +# ASM-COUNT-3: .cfi_offset +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-COUNT-3: DW_CFA_offset +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: save_restore_zregs_sve stack: - { id: 0, stack-id: default, size: 32, alignment: 16 } @@ -432,6 +536,7 @@ body: | # CHECK: frame-setup STR_ZXI killed $z8, $sp, 17 # CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK: $sp = frame-setup SUBXri $sp, 32, 0 +# CHECK-COUNT-33: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1 @@ -447,6 +552,22 @@ body: | # CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 2 # CHECK: $sp, ${{[a-z0-9]+}}, $x21 = frame-destroy LDPXpost $sp, 4 # CHECK: RET_ReallyLR +# +# ASM-LABEL: save_restore_sve: +# ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG +# ASM-COUNT-28: .cfi_offset +# ASM-NEXT: .cfi_offset w19, -8 +# ASM-NEXT: .cfi_offset w20, -16 +# ASM-NEXT: .cfi_offset w21, -24 +# ASM-NEXT: .cfi_offset w29, -32 +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-COUNT-28: DW_CFA_offset +# UNWINDINFO-NEXT: DW_CFA_offset: reg19 -8 +# UNWINDINFO-NEXT: DW_CFA_offset: reg20 -16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -24 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 + name: save_restore_sve stack: - { id: 0, stack-id: sve-vec, size: 16, alignment: 16 } @@ -499,6 +620,7 @@ body: | # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: $sp = ANDXri killed $[[TMP]] +# CHECK-COUNT-31: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18 # CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4 @@ -512,6 +634,11 @@ body: | # CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR +# +# UNWINDINFO: DW_CFA_def_cfa: reg29 +16 +# UNWINDINFO-COUNT-28: DW_CFA_offset +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: save_restore_sve_realign stack: - { id: 0, stack-id: sve-vec, size: 16, alignment: 16 } @@ -586,6 +713,15 @@ body: | # CHECK-NEXT: STR_ZXI killed $z23, $sp, 1 # CHECK-NEXT: STR_ZXI killed $z8, $sp, 2 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -7 +# CHECK-COUNT-6: frame-setup CFI_INSTRUCTION +# ASM-LABEL: frame_layout: +# ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 80 * VG +# ASM-COUNT-4: .cfi_offset +# ASM-NEXT: .cfi_offset w29, -16 +# +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +80, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-COUNT-4: DW_CFA_offset +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: frame_layout stack: - { id: 0, type: default, size: 32, alignment: 16, stack-id: sve-vec } diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll index af50176f6b101..191df22eda506 100644 --- a/llvm/test/CodeGen/AArch64/sve-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -116,7 +116,7 @@ define @trunc_i64toi1_split3( %in) { ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset p4, -2 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d From bb3344c7d8c2703c910dd481ada43ecaf11536a6 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 4 Aug 2020 11:10:32 +0100 Subject: [PATCH 294/600] [AArch64][SVE] Add missing unwind info for SVE registers. This patch adds a CFI entry for each SVE callee saved register that needs unwind info at an offset from the CFA. The offset is a DWARF expression because the offset is partly scalable. The CFI entries only cover a subset of the SVE callee-saves and only encodes the lower 64-bits, thus implementing the lowest common denominator ABI. Existing unwinders may support VG but only restore the lower 64-bits. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D84044 --- .../Target/AArch64/AArch64FrameLowering.cpp | 60 +++++++++++-- .../lib/Target/AArch64/AArch64FrameLowering.h | 2 +- .../Target/AArch64/AArch64RegisterInfo.cpp | 23 +++++ llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 1 + llvm/test/CodeGen/AArch64/framelayout-sve.mir | 89 +++++++++++++------ llvm/test/CodeGen/AArch64/sve-trunc.ll | 1 - 6 files changed, 142 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 177d5e24fdb3f..30666009801c5 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -458,12 +458,44 @@ MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP( Comment.str()); } +MCCFIInstruction AArch64FrameLowering::createCfaOffset( + const TargetRegisterInfo &TRI, unsigned Reg, + const StackOffset &OffsetFromDefCFA) const { + int64_t NumBytes, NumVGScaledBytes; + OffsetFromDefCFA.getForDwarfOffset(NumBytes, NumVGScaledBytes); + + unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); + + // Non-scalable offsets can use DW_CFA_offset directly. + if (!NumVGScaledBytes) + return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes); + + std::string CommentBuffer; + llvm::raw_string_ostream Comment(CommentBuffer); + Comment << printReg(Reg, &TRI) << " @ cfa"; + + // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG) + SmallString<64> OffsetExpr; + appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes, + TRI.getDwarfRegNum(AArch64::VG, true), Comment); + + // Wrap this into DW_CFA_expression + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer)); + CfaExpr.append(OffsetExpr.str()); + + return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str()); +} + void AArch64FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetSubtargetInfo &STI = MF.getSubtarget(); - const MCRegisterInfo *MRI = STI.getRegisterInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -474,11 +506,26 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves( for (const auto &Info : CSI) { unsigned Reg = Info.getReg(); - int64_t Offset = - MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); - unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + + // Not all unwinders may know about SVE registers, so assume the lowest + // common demoninator. + unsigned NewReg; + if (static_cast(TRI)->regNeedsCFI(Reg, NewReg)) + Reg = NewReg; + else + continue; + + StackOffset Offset; + if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::SVEVector) { + AArch64FunctionInfo *AFI = MF.getInfo(); + Offset = StackOffset(MFI.getObjectOffset(Info.getFrameIdx()), MVT::nxv1i8) - + StackOffset(AFI->getCalleeSavedStackSize(MFI), MVT::i8); + } else { + Offset = {MFI.getObjectOffset(Info.getFrameIdx()) - + getOffsetOfLocalArea(), + MVT::i8}; + } + unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -2074,6 +2121,7 @@ static void computeCalleeSaveRegisterPairs( // available unwind codes. This flag assures that the alignment fixup is done // only once, as intened. bool FixupDone = false; + for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 753593df2b4dc..1ca8c3e9e2bf6 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -124,7 +124,7 @@ class AArch64FrameLowering : public TargetFrameLowering { MCCFIInstruction createDefCFAExpressionFromSP(const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const; - MCCFIInstruction createCfaOffset(const MCRegisterInfo &MRI, unsigned DwarfReg, + MCCFIInstruction createCfaOffset(const TargetRegisterInfo &MRI, unsigned DwarfReg, const StackOffset &OffsetFromDefCFA) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, unsigned StackBumpBytes) const; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 83a488afc7972..62cc865fd1c36 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -40,6 +40,29 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) AArch64_MC::initLLVMToCVRegMapping(this); } +/// Return whether the register needs a CFI entry. Not all unwinders may know +/// about SVE registers, so we assume the lowest common denominator, i.e. the +/// callee-saves required by the base ABI. For the SVE registers z8-z15 only the +/// lower 64-bits (d8-d15) need to be saved. The lower 64-bits subreg is +/// returned in \p RegToUseForCFI. +bool AArch64RegisterInfo::regNeedsCFI(unsigned Reg, + unsigned &RegToUseForCFI) const { + if (AArch64::PPRRegClass.contains(Reg)) + return false; + + if (AArch64::ZPRRegClass.contains(Reg)) { + RegToUseForCFI = getSubReg(Reg, AArch64::dsub); + for (int I = 0; CSR_AArch64_AAPCS_SaveList[I]; ++I) { + if (CSR_AArch64_AAPCS_SaveList[I] == RegToUseForCFI) + return true; + } + return false; + } + + RegToUseForCFI = Reg; + return true; +} + static bool hasSVEArgsOrReturn(const MachineFunction *MF) { const Function &F = MF->getFunction(); return isa(F.getReturnType()) || diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index 22a8ba76c6111..91064787d3dac 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -122,6 +122,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo { MachineFunction &MF) const override; unsigned getLocalAddressRegister(const MachineFunction &MF) const; + bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir index 9e2077855c11a..e6eb9e7a3d3ed 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir @@ -449,7 +449,7 @@ body: | # CHECK: frame-setup STR_PXI killed $p5, $sp, 6 # CHECK: frame-setup STR_PXI killed $p4, $sp, 7 # CHECK: $sp = frame-setup SUBXri $sp, 32, 0 -# CHECK-COUNT-5: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-2: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 # CHECK: $p6 = frame-destroy LDR_PXI $sp, 5 @@ -460,11 +460,9 @@ body: | # # ASM-LABEL: save_restore_pregs_sve: # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 8 * VG -# ASM-COUNT-3: .cfi_offset # ASM-NEXT: .cfi_offset w29, -16 # # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-COUNT-3: DW_CFA_offset # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: save_restore_pregs_sve stack: @@ -498,11 +496,16 @@ body: | # # ASM-LABEL: save_restore_zregs_sve: # ASM: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 24 * VG -# ASM-COUNT-3: .cfi_offset -# -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-COUNT-3: DW_CFA_offset -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG + +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 + name: save_restore_zregs_sve stack: - { id: 0, stack-id: default, size: 32, alignment: 16 } @@ -536,7 +539,7 @@ body: | # CHECK: frame-setup STR_ZXI killed $z8, $sp, 17 # CHECK: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK: $sp = frame-setup SUBXri $sp, 32, 0 -# CHECK-COUNT-33: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-13: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDXri $sp, 32, 0 # CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1 @@ -555,18 +558,32 @@ body: | # # ASM-LABEL: save_restore_sve: # ASM: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG -# ASM-COUNT-28: .cfi_offset +# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 32 - 40 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG # ASM-NEXT: .cfi_offset w19, -8 # ASM-NEXT: .cfi_offset w20, -16 # ASM-NEXT: .cfi_offset w21, -24 # ASM-NEXT: .cfi_offset w29, -32 # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-COUNT-28: DW_CFA_offset -# UNWINDINFO-NEXT: DW_CFA_offset: reg19 -8 -# UNWINDINFO-NEXT: DW_CFA_offset: reg20 -16 -# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -24 -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -40, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg19 -8 +# UNWINDINFO-NEXT: DW_CFA_offset: reg20 -16 +# UNWINDINFO-NEXT: DW_CFA_offset: reg21 -24 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -32 name: save_restore_sve stack: @@ -620,7 +637,7 @@ body: | # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1 # CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0 # CHECK-NEXT: $sp = ANDXri killed $[[TMP]] -# CHECK-COUNT-31: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-11: frame-setup CFI_INSTRUCTION # CHECK: $sp = frame-destroy ADDVL_XXI $fp, -18 # CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4 @@ -635,10 +652,30 @@ body: | # CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 # CHECK-NEXT: RET_ReallyLR # -# UNWINDINFO: DW_CFA_def_cfa: reg29 +16 -# UNWINDINFO-COUNT-28: DW_CFA_offset -# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8 -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# ASM-LABEL: save_restore_sve_realign: +# ASM: .cfi_def_cfa w29, 16 +# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG +# ASM-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +# ASM-NEXT: .cfi_offset w30, -8 +# ASM-NEXT: .cfi_offset w29, -16 +# +# UNWINDINFO: DW_CFA_def_cfa: reg29 +16 +# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg76 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -40, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg77 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -48, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg78 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -56, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg79 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -64, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8 +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: save_restore_sve_realign stack: - { id: 0, stack-id: sve-vec, size: 16, alignment: 16 } @@ -713,15 +750,15 @@ body: | # CHECK-NEXT: STR_ZXI killed $z23, $sp, 1 # CHECK-NEXT: STR_ZXI killed $z8, $sp, 2 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -7 -# CHECK-COUNT-6: frame-setup CFI_INSTRUCTION +# CHECK-COUNT-3: frame-setup CFI_INSTRUCTION # ASM-LABEL: frame_layout: # ASM: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 80 * VG -# ASM-COUNT-4: .cfi_offset +# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG # ASM-NEXT: .cfi_offset w29, -16 # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +80, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus -# UNWINDINFO-COUNT-4: DW_CFA_offset -# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +16, DW_OP_plus, DW_OP_consts +80, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus +# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 name: frame_layout stack: - { id: 0, type: default, size: 32, alignment: 16, stack-id: sve-vec } diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll index 191df22eda506..7c0e9e9f4d9b3 100644 --- a/llvm/test/CodeGen/AArch64/sve-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -117,7 +117,6 @@ define @trunc_i64toi1_split3( %in) { ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset p4, -2 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z7.d, z7.d, #0x1 From 998c0efee0e6d6909fdee579e6d2694babd99c0f Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 4 Aug 2020 20:27:26 +0900 Subject: [PATCH 295/600] [JumpThreading] Update test freeze.ll; NFC --- llvm/test/Transforms/JumpThreading/freeze.ll | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/llvm/test/Transforms/JumpThreading/freeze.ll b/llvm/test/Transforms/JumpThreading/freeze.ll index 99fa058ade814..650cbeb2205d5 100644 --- a/llvm/test/Transforms/JumpThreading/freeze.ll +++ b/llvm/test/Transforms/JumpThreading/freeze.ll @@ -42,20 +42,14 @@ F2: define i32 @test1_cast(i1 %cond) { ; CHECK-LABEL: @test1_cast( -; CHECK-NEXT: br i1 [[COND:%.*]], label [[MERGE_THREAD:%.*]], label [[MERGE:%.*]] -; CHECK: Merge.thread: -; CHECK-NEXT: [[V1:%.*]] = call i32 @f1() -; CHECK-NEXT: br label [[T2:%.*]] -; CHECK: Merge: -; CHECK-NEXT: [[V2:%.*]] = call i32 @f2() -; CHECK-NEXT: [[A:%.*]] = trunc i32 0 to i1 -; CHECK-NEXT: [[A_FR:%.*]] = freeze i1 [[A]] -; CHECK-NEXT: br i1 [[A_FR]], label [[T2]], label [[F2:%.*]] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[T2:%.*]], label [[F2:%.*]] ; CHECK: T2: -; CHECK-NEXT: [[B5:%.*]] = phi i32 [ [[V1]], [[MERGE_THREAD]] ], [ [[V2]], [[MERGE]] ] +; CHECK-NEXT: [[V1:%.*]] = call i32 @f1() ; CHECK-NEXT: call void @f3() -; CHECK-NEXT: ret i32 [[B5]] +; CHECK-NEXT: ret i32 [[V1]] ; CHECK: F2: +; CHECK-NEXT: [[V2:%.*]] = call i32 @f2() +; CHECK-NEXT: [[A:%.*]] = trunc i32 0 to i1 ; CHECK-NEXT: ret i32 [[V2]] ; br i1 %cond, label %T1, label %F1 From f003b28363f9e09f3f68a5e08ebcca2635a4cdca Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Tue, 4 Aug 2020 12:45:18 +0300 Subject: [PATCH 296/600] [MLIR][SPIRVToLLVM] Indentation and style fix in tests This is a first patch that sweeps over tests to fix indentation (tabs to spaces). It also adds label checks and removes redundant matching of `%{{.*}} = `. The following tests have been fixed: - arithmetic-ops-to-llvm - bitwise-ops-to-llvm - cast-ops-to-llvm - comparison-ops-to-llvm - logical-ops-to-llvm (renamed to match the rest) Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D85181 --- .../SPIRVToLLVM/arithmetic-ops-to-llvm.mlir | 182 ++++---- .../SPIRVToLLVM/bitwise-ops-to-llvm.mlir | 388 +++++++++--------- .../SPIRVToLLVM/cast-ops-to-llvm.mlir | 164 ++++---- .../SPIRVToLLVM/comparison-ops-to-llvm.mlir | 304 ++++++++------ ...-to-llvm.mlir => logical-ops-to-llvm.mlir} | 52 ++- 5 files changed, 601 insertions(+), 489 deletions(-) rename mlir/test/Conversion/SPIRVToLLVM/{logical-to-llvm.mlir => logical-ops-to-llvm.mlir} (55%) diff --git a/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir index 2f5d18a6acf8f..8ae0d98407131 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir @@ -4,206 +4,232 @@ // spv.IAdd //===----------------------------------------------------------------------===// +// CHECK-LABEL: @iadd_scalar func @iadd_scalar(%arg0: i32, %arg1: i32) { - // CHECK: %{{.*}} = llvm.add %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.IAdd %arg0, %arg1 : i32 - return + // CHECK: llvm.add %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.IAdd %arg0, %arg1 : i32 + return } +// CHECK-LABEL: @iadd_vector func @iadd_vector(%arg0: vector<4xi64>, %arg1: vector<4xi64>) { - // CHECK: %{{.*}} = llvm.add %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> - %0 = spv.IAdd %arg0, %arg1 : vector<4xi64> - return + // CHECK: llvm.add %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + %0 = spv.IAdd %arg0, %arg1 : vector<4xi64> + return } //===----------------------------------------------------------------------===// // spv.ISub //===----------------------------------------------------------------------===// +// CHECK-LABEL: @isub_scalar func @isub_scalar(%arg0: i8, %arg1: i8) { - // CHECK: %{{.*}} = llvm.sub %{{.*}}, %{{.*}} : !llvm.i8 - %0 = spv.ISub %arg0, %arg1 : i8 - return + // CHECK: llvm.sub %{{.*}}, %{{.*}} : !llvm.i8 + %0 = spv.ISub %arg0, %arg1 : i8 + return } +// CHECK-LABEL: @isub_vector func @isub_vector(%arg0: vector<2xi16>, %arg1: vector<2xi16>) { - // CHECK: %{{.*}} = llvm.sub %{{.*}}, %{{.*}} : !llvm<"<2 x i16>"> - %0 = spv.ISub %arg0, %arg1 : vector<2xi16> - return + // CHECK: llvm.sub %{{.*}}, %{{.*}} : !llvm<"<2 x i16>"> + %0 = spv.ISub %arg0, %arg1 : vector<2xi16> + return } //===----------------------------------------------------------------------===// // spv.IMul //===----------------------------------------------------------------------===// +// CHECK-LABEL: @imul_scalar func @imul_scalar(%arg0: i32, %arg1: i32) { - // CHECK: %{{.*}} = llvm.mul %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.IMul %arg0, %arg1 : i32 - return + // CHECK: llvm.mul %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.IMul %arg0, %arg1 : i32 + return } +// CHECK-LABEL: @imul_vector func @imul_vector(%arg0: vector<3xi32>, %arg1: vector<3xi32>) { - // CHECK: %{{.*}} = llvm.mul %{{.*}}, %{{.*}} : !llvm<"<3 x i32>"> - %0 = spv.IMul %arg0, %arg1 : vector<3xi32> - return + // CHECK: llvm.mul %{{.*}}, %{{.*}} : !llvm<"<3 x i32>"> + %0 = spv.IMul %arg0, %arg1 : vector<3xi32> + return } //===----------------------------------------------------------------------===// // spv.FAdd //===----------------------------------------------------------------------===// +// CHECK-LABEL: @fadd_scalar func @fadd_scalar(%arg0: f16, %arg1: f16) { - // CHECK: %{{.*}} = llvm.fadd %{{.*}}, %{{.*}} : !llvm.half - %0 = spv.FAdd %arg0, %arg1 : f16 - return + // CHECK: llvm.fadd %{{.*}}, %{{.*}} : !llvm.half + %0 = spv.FAdd %arg0, %arg1 : f16 + return } +// CHECK-LABEL: @fadd_vector func @fadd_vector(%arg0: vector<4xf32>, %arg1: vector<4xf32>) { - // CHECK: %{{.*}} = llvm.fadd %{{.*}}, %{{.*}} : !llvm<"<4 x float>"> - %0 = spv.FAdd %arg0, %arg1 : vector<4xf32> - return + // CHECK: llvm.fadd %{{.*}}, %{{.*}} : !llvm<"<4 x float>"> + %0 = spv.FAdd %arg0, %arg1 : vector<4xf32> + return } //===----------------------------------------------------------------------===// // spv.FSub //===----------------------------------------------------------------------===// +// CHECK-LABEL: @fsub_scalar func @fsub_scalar(%arg0: f32, %arg1: f32) { - // CHECK: %{{.*}} = llvm.fsub %{{.*}}, %{{.*}} : !llvm.float - %0 = spv.FSub %arg0, %arg1 : f32 - return + // CHECK: llvm.fsub %{{.*}}, %{{.*}} : !llvm.float + %0 = spv.FSub %arg0, %arg1 : f32 + return } +// CHECK-LABEL: @fsub_vector func @fsub_vector(%arg0: vector<2xf32>, %arg1: vector<2xf32>) { - // CHECK: %{{.*}} = llvm.fsub %{{.*}}, %{{.*}} : !llvm<"<2 x float>"> - %0 = spv.FSub %arg0, %arg1 : vector<2xf32> - return + // CHECK: llvm.fsub %{{.*}}, %{{.*}} : !llvm<"<2 x float>"> + %0 = spv.FSub %arg0, %arg1 : vector<2xf32> + return } //===----------------------------------------------------------------------===// // spv.FDiv //===----------------------------------------------------------------------===// +// CHECK-LABEL: @fdiv_scalar func @fdiv_scalar(%arg0: f32, %arg1: f32) { - // CHECK: %{{.*}} = llvm.fdiv %{{.*}}, %{{.*}} : !llvm.float - %0 = spv.FDiv %arg0, %arg1 : f32 - return + // CHECK: llvm.fdiv %{{.*}}, %{{.*}} : !llvm.float + %0 = spv.FDiv %arg0, %arg1 : f32 + return } +// CHECK-LABEL: @fdiv_vector func @fdiv_vector(%arg0: vector<3xf64>, %arg1: vector<3xf64>) { - // CHECK: %{{.*}} = llvm.fdiv %{{.*}}, %{{.*}} : !llvm<"<3 x double>"> - %0 = spv.FDiv %arg0, %arg1 : vector<3xf64> - return + // CHECK: llvm.fdiv %{{.*}}, %{{.*}} : !llvm<"<3 x double>"> + %0 = spv.FDiv %arg0, %arg1 : vector<3xf64> + return } //===----------------------------------------------------------------------===// // spv.FMul //===----------------------------------------------------------------------===// +// CHECK-LABEL: @fmul_scalar func @fmul_scalar(%arg0: f32, %arg1: f32) { - // CHECK: %{{.*}} = llvm.fmul %{{.*}}, %{{.*}} : !llvm.float - %0 = spv.FMul %arg0, %arg1 : f32 - return + // CHECK: llvm.fmul %{{.*}}, %{{.*}} : !llvm.float + %0 = spv.FMul %arg0, %arg1 : f32 + return } +// CHECK-LABEL: @fmul_vector func @fmul_vector(%arg0: vector<2xf32>, %arg1: vector<2xf32>) { - // CHECK: %{{.*}} = llvm.fmul %{{.*}}, %{{.*}} : !llvm<"<2 x float>"> - %0 = spv.FMul %arg0, %arg1 : vector<2xf32> - return + // CHECK: llvm.fmul %{{.*}}, %{{.*}} : !llvm<"<2 x float>"> + %0 = spv.FMul %arg0, %arg1 : vector<2xf32> + return } //===----------------------------------------------------------------------===// // spv.FRem //===----------------------------------------------------------------------===// +// CHECK-LABEL: @frem_scalar func @frem_scalar(%arg0: f32, %arg1: f32) { - // CHECK: %{{.*}} = llvm.frem %{{.*}}, %{{.*}} : !llvm.float - %0 = spv.FRem %arg0, %arg1 : f32 - return + // CHECK: llvm.frem %{{.*}}, %{{.*}} : !llvm.float + %0 = spv.FRem %arg0, %arg1 : f32 + return } +// CHECK-LABEL: @frem_vector func @frem_vector(%arg0: vector<3xf64>, %arg1: vector<3xf64>) { - // CHECK: %{{.*}} = llvm.frem %{{.*}}, %{{.*}} : !llvm<"<3 x double>"> - %0 = spv.FRem %arg0, %arg1 : vector<3xf64> - return + // CHECK: llvm.frem %{{.*}}, %{{.*}} : !llvm<"<3 x double>"> + %0 = spv.FRem %arg0, %arg1 : vector<3xf64> + return } //===----------------------------------------------------------------------===// // spv.FNegate //===----------------------------------------------------------------------===// +// CHECK-LABEL: @fneg_scalar func @fneg_scalar(%arg: f64) { - // CHECK: %{{.*}} = llvm.fneg %{{.*}} : !llvm.double - %0 = spv.FNegate %arg : f64 - return + // CHECK: llvm.fneg %{{.*}} : !llvm.double + %0 = spv.FNegate %arg : f64 + return } +// CHECK-LABEL: @fneg_vector func @fneg_vector(%arg: vector<2xf32>) { - // CHECK: %{{.*}} = llvm.fneg %{{.*}} : !llvm<"<2 x float>"> - %0 = spv.FNegate %arg : vector<2xf32> - return + // CHECK: llvm.fneg %{{.*}} : !llvm<"<2 x float>"> + %0 = spv.FNegate %arg : vector<2xf32> + return } //===----------------------------------------------------------------------===// // spv.UDiv //===----------------------------------------------------------------------===// +// CHECK-LABEL: @udiv_scalar func @udiv_scalar(%arg0: i32, %arg1: i32) { - // CHECK: %{{.*}} = llvm.udiv %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.UDiv %arg0, %arg1 : i32 - return + // CHECK: llvm.udiv %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.UDiv %arg0, %arg1 : i32 + return } +// CHECK-LABEL: @udiv_vector func @udiv_vector(%arg0: vector<3xi64>, %arg1: vector<3xi64>) { - // CHECK: %{{.*}} = llvm.udiv %{{.*}}, %{{.*}} : !llvm<"<3 x i64>"> - %0 = spv.UDiv %arg0, %arg1 : vector<3xi64> - return + // CHECK: llvm.udiv %{{.*}}, %{{.*}} : !llvm<"<3 x i64>"> + %0 = spv.UDiv %arg0, %arg1 : vector<3xi64> + return } //===----------------------------------------------------------------------===// // spv.UMod //===----------------------------------------------------------------------===// +// CHECK-LABEL: @umod_scalar func @umod_scalar(%arg0: i32, %arg1: i32) { - // CHECK: %{{.*}} = llvm.urem %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.UMod %arg0, %arg1 : i32 - return + // CHECK: llvm.urem %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.UMod %arg0, %arg1 : i32 + return } +// CHECK-LABEL: @umod_vector func @umod_vector(%arg0: vector<3xi64>, %arg1: vector<3xi64>) { - // CHECK: %{{.*}} = llvm.urem %{{.*}}, %{{.*}} : !llvm<"<3 x i64>"> - %0 = spv.UMod %arg0, %arg1 : vector<3xi64> - return + // CHECK: llvm.urem %{{.*}}, %{{.*}} : !llvm<"<3 x i64>"> + %0 = spv.UMod %arg0, %arg1 : vector<3xi64> + return } //===----------------------------------------------------------------------===// // spv.SDiv //===----------------------------------------------------------------------===// +// CHECK-LABEL: @sdiv_scalar func @sdiv_scalar(%arg0: i16, %arg1: i16) { - // CHECK: %{{.*}} = llvm.sdiv %{{.*}}, %{{.*}} : !llvm.i16 - %0 = spv.SDiv %arg0, %arg1 : i16 - return + // CHECK: llvm.sdiv %{{.*}}, %{{.*}} : !llvm.i16 + %0 = spv.SDiv %arg0, %arg1 : i16 + return } +// CHECK-LABEL: @sdiv_vector func @sdiv_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.sdiv %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.SDiv %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.sdiv %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.SDiv %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.SRem //===----------------------------------------------------------------------===// +// CHECK-LABEL: @srem_scalar func @srem_scalar(%arg0: i32, %arg1: i32) { - // CHECK: %{{.*}} = llvm.srem %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.SRem %arg0, %arg1 : i32 - return + // CHECK: llvm.srem %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.SRem %arg0, %arg1 : i32 + return } +// CHECK-LABEL: @srem_vector func @srem_vector(%arg0: vector<4xi32>, %arg1: vector<4xi32>) { - // CHECK: %{{.*}} = llvm.srem %{{.*}}, %{{.*}} : !llvm<"<4 x i32>"> - %0 = spv.SRem %arg0, %arg1 : vector<4xi32> - return + // CHECK: llvm.srem %{{.*}}, %{{.*}} : !llvm<"<4 x i32>"> + %0 = spv.SRem %arg0, %arg1 : vector<4xi32> + return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir index 31ffc6dbf7dc9..aed82d218db21 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir @@ -4,309 +4,321 @@ // spv.BitCount //===----------------------------------------------------------------------===// +// CHECK-LABEL: @bitcount_scalar func @bitcount_scalar(%arg0: i16) { - // CHECK: %{{.*}} = "llvm.intr.ctpop"(%{{.*}}) : (!llvm.i16) -> !llvm.i16 - %0 = spv.BitCount %arg0: i16 - return + // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (!llvm.i16) -> !llvm.i16 + %0 = spv.BitCount %arg0: i16 + return } +// CHECK-LABEL: @bitcount_vector func @bitcount_vector(%arg0: vector<3xi32>) { - // CHECK: %{{.*}} = "llvm.intr.ctpop"(%{{.*}}) : (!llvm<"<3 x i32>">) -> !llvm<"<3 x i32>"> - %0 = spv.BitCount %arg0: vector<3xi32> - return + // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (!llvm<"<3 x i32>">) -> !llvm<"<3 x i32>"> + %0 = spv.BitCount %arg0: vector<3xi32> + return } //===----------------------------------------------------------------------===// // spv.BitReverse //===----------------------------------------------------------------------===// +// CHECK-LABEL: @bitreverse_scalar func @bitreverse_scalar(%arg0: i64) { - // CHECK: %{{.*}} = "llvm.intr.bitreverse"(%{{.*}}) : (!llvm.i64) -> !llvm.i64 - %0 = spv.BitReverse %arg0: i64 - return + // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (!llvm.i64) -> !llvm.i64 + %0 = spv.BitReverse %arg0: i64 + return } +// CHECK-LABEL: @bitreverse_vector func @bitreverse_vector(%arg0: vector<4xi32>) { - // CHECK: %{{.*}} = "llvm.intr.bitreverse"(%{{.*}}) : (!llvm<"<4 x i32>">) -> !llvm<"<4 x i32>"> - %0 = spv.BitReverse %arg0: vector<4xi32> - return + // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (!llvm<"<4 x i32>">) -> !llvm<"<4 x i32>"> + %0 = spv.BitReverse %arg0: vector<4xi32> + return } //===----------------------------------------------------------------------===// // spv.BitFieldInsert //===----------------------------------------------------------------------===// -// CHECK-LABEL: func @bitfield_insert_scalar_same_bit_width +// CHECK-LABEL: @bitfield_insert_scalar_same_bit_width // CHECK-SAME: %[[BASE:.*]]: !llvm.i32, %[[INSERT:.*]]: !llvm.i32, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 func @bitfield_insert_scalar_same_bit_width(%base: i32, %insert: i32, %offset: i32, %count: i32) { - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i32) : !llvm.i32 - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT]] : !llvm.i32 - // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i32 - // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[OFFSET]] : !llvm.i32 - // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm.i32 - // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm.i32 - // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[OFFSET]] : !llvm.i32 - // CHECK: %{{.*}} = llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm.i32 - %0 = spv.BitFieldInsert %base, %insert, %offset, %count : i32, i32, i32 - return + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i32) : !llvm.i32 + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT]] : !llvm.i32 + // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i32 + // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[OFFSET]] : !llvm.i32 + // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm.i32 + // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm.i32 + // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[OFFSET]] : !llvm.i32 + // CHECK: llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm.i32 + %0 = spv.BitFieldInsert %base, %insert, %offset, %count : i32, i32, i32 + return } -// CHECK-LABEL: func @bitfield_insert_scalar_smaller_bit_width +// CHECK-LABEL: @bitfield_insert_scalar_smaller_bit_width // CHECK-SAME: %[[BASE:.*]]: !llvm.i64, %[[INSERT:.*]]: !llvm.i64, %[[OFFSET:.*]]: !llvm.i8, %[[COUNT:.*]]: !llvm.i8 func @bitfield_insert_scalar_smaller_bit_width(%base: i64, %insert: i64, %offset: i8, %count: i8) { - // CHECK: %[[EXT_OFFSET:.*]] = llvm.zext %[[OFFSET]] : !llvm.i8 to !llvm.i64 - // CHECK: %[[EXT_COUNT:.*]] = llvm.zext %[[COUNT]] : !llvm.i8 to !llvm.i64 - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i64) : !llvm.i64 - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[EXT_COUNT]] : !llvm.i64 - // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i64 - // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[EXT_OFFSET]] : !llvm.i64 - // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm.i64 - // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm.i64 - // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[EXT_OFFSET]] : !llvm.i64 - // CHECK: %{{.*}} = llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm.i64 - %0 = spv.BitFieldInsert %base, %insert, %offset, %count : i64, i8, i8 - return + // CHECK: %[[EXT_OFFSET:.*]] = llvm.zext %[[OFFSET]] : !llvm.i8 to !llvm.i64 + // CHECK: %[[EXT_COUNT:.*]] = llvm.zext %[[COUNT]] : !llvm.i8 to !llvm.i64 + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i64) : !llvm.i64 + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[EXT_COUNT]] : !llvm.i64 + // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i64 + // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[EXT_OFFSET]] : !llvm.i64 + // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm.i64 + // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm.i64 + // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[EXT_OFFSET]] : !llvm.i64 + // CHECK: llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm.i64 + %0 = spv.BitFieldInsert %base, %insert, %offset, %count : i64, i8, i8 + return } -// CHECK-LABEL: func @bitfield_insert_scalar_greater_bit_width +// CHECK-LABEL: @bitfield_insert_scalar_greater_bit_width // CHECK-SAME: %[[BASE:.*]]: !llvm.i16, %[[INSERT:.*]]: !llvm.i16, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i64 func @bitfield_insert_scalar_greater_bit_width(%base: i16, %insert: i16, %offset: i32, %count: i64) { - // CHECK: %[[TRUNC_OFFSET:.*]] = llvm.trunc %[[OFFSET]] : !llvm.i32 to !llvm.i16 - // CHECK: %[[TRUNC_COUNT:.*]] = llvm.trunc %[[COUNT]] : !llvm.i64 to !llvm.i16 - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i16) : !llvm.i16 - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[TRUNC_COUNT]] : !llvm.i16 - // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i16 - // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[TRUNC_OFFSET]] : !llvm.i16 - // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm.i16 - // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm.i16 - // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[TRUNC_OFFSET]] : !llvm.i16 - // CHECK: %{{.*}} = llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm.i16 - %0 = spv.BitFieldInsert %base, %insert, %offset, %count : i16, i32, i64 - return + // CHECK: %[[TRUNC_OFFSET:.*]] = llvm.trunc %[[OFFSET]] : !llvm.i32 to !llvm.i16 + // CHECK: %[[TRUNC_COUNT:.*]] = llvm.trunc %[[COUNT]] : !llvm.i64 to !llvm.i16 + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i16) : !llvm.i16 + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[TRUNC_COUNT]] : !llvm.i16 + // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i16 + // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[TRUNC_OFFSET]] : !llvm.i16 + // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm.i16 + // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm.i16 + // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[TRUNC_OFFSET]] : !llvm.i16 + // CHECK: llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm.i16 + %0 = spv.BitFieldInsert %base, %insert, %offset, %count : i16, i32, i64 + return } -// CHECK-LABEL: func @bitfield_insert_vector +// CHECK-LABEL: @bitfield_insert_vector // CHECK-SAME: %[[BASE:.*]]: !llvm<"<2 x i32>">, %[[INSERT:.*]]: !llvm<"<2 x i32>">, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 func @bitfield_insert_vector(%base: vector<2xi32>, %insert: vector<2xi32>, %offset: i32, %count: i32) { - // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> - // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> - // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm<"<2 x i32>"> - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm<"<2 x i32>"> - // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm<"<2 x i32>"> - // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> - // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm<"<2 x i32>"> - // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm<"<2 x i32>"> - // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> - // CHECK: %{{.*}} = llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm<"<2 x i32>"> - %0 = spv.BitFieldInsert %base, %insert, %offset, %count : vector<2xi32>, i32, i32 - return + // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm<"<2 x i32>"> + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm<"<2 x i32>"> + // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm<"<2 x i32>"> + // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> + // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm<"<2 x i32>"> + // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm<"<2 x i32>"> + // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> + // CHECK: llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm<"<2 x i32>"> + %0 = spv.BitFieldInsert %base, %insert, %offset, %count : vector<2xi32>, i32, i32 + return } //===----------------------------------------------------------------------===// // spv.BitFieldSExtract //===----------------------------------------------------------------------===// -// CHECK-LABEL: func @bitfield_sextract_scalar_same_bit_width +// CHECK-LABEL: @bitfield_sextract_scalar_same_bit_width // CHECK-SAME: %[[BASE:.*]]: !llvm.i64, %[[OFFSET:.*]]: !llvm.i64, %[[COUNT:.*]]: !llvm.i64 func @bitfield_sextract_scalar_same_bit_width(%base: i64, %offset: i64, %count: i64) { - // CHECK: %[[SIZE:.]] = llvm.mlir.constant(64 : i64) : !llvm.i64 - // CHECK: %[[T0:.*]] = llvm.add %[[COUNT]], %[[OFFSET]] : !llvm.i64 - // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm.i64 - // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm.i64 - // CHECK: %[[T2:.*]] = llvm.add %[[OFFSET]], %[[T1]] : !llvm.i64 - // CHECK: %{{.*}} = llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm.i64 - %0 = spv.BitFieldSExtract %base, %offset, %count : i64, i64, i64 - return + // CHECK: %[[SIZE:.]] = llvm.mlir.constant(64 : i64) : !llvm.i64 + // CHECK: %[[T0:.*]] = llvm.add %[[COUNT]], %[[OFFSET]] : !llvm.i64 + // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm.i64 + // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm.i64 + // CHECK: %[[T2:.*]] = llvm.add %[[OFFSET]], %[[T1]] : !llvm.i64 + // CHECK: llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm.i64 + %0 = spv.BitFieldSExtract %base, %offset, %count : i64, i64, i64 + return } -// CHECK-LABEL: func @bitfield_sextract_scalar_smaller_bit_width +// CHECK-LABEL: @bitfield_sextract_scalar_smaller_bit_width // CHECK-SAME: %[[BASE:.*]]: !llvm.i32, %[[OFFSET:.*]]: !llvm.i8, %[[COUNT:.*]]: !llvm.i8 func @bitfield_sextract_scalar_smaller_bit_width(%base: i32, %offset: i8, %count: i8) { - // CHECK: %[[EXT_OFFSET:.*]] = llvm.zext %[[OFFSET]] : !llvm.i8 to !llvm.i32 - // CHECK: %[[EXT_COUNT:.*]] = llvm.zext %[[COUNT]] : !llvm.i8 to !llvm.i32 - // CHECK: %[[SIZE:.]] = llvm.mlir.constant(32 : i32) : !llvm.i32 - // CHECK: %[[T0:.*]] = llvm.add %[[EXT_COUNT]], %[[EXT_OFFSET]] : !llvm.i32 - // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm.i32 - // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm.i32 - // CHECK: %[[T2:.*]] = llvm.add %[[EXT_OFFSET]], %[[T1]] : !llvm.i32 - // CHECK: %{{.*}} = llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm.i32 - %0 = spv.BitFieldSExtract %base, %offset, %count : i32, i8, i8 - return + // CHECK: %[[EXT_OFFSET:.*]] = llvm.zext %[[OFFSET]] : !llvm.i8 to !llvm.i32 + // CHECK: %[[EXT_COUNT:.*]] = llvm.zext %[[COUNT]] : !llvm.i8 to !llvm.i32 + // CHECK: %[[SIZE:.]] = llvm.mlir.constant(32 : i32) : !llvm.i32 + // CHECK: %[[T0:.*]] = llvm.add %[[EXT_COUNT]], %[[EXT_OFFSET]] : !llvm.i32 + // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm.i32 + // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm.i32 + // CHECK: %[[T2:.*]] = llvm.add %[[EXT_OFFSET]], %[[T1]] : !llvm.i32 + // CHECK: llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm.i32 + %0 = spv.BitFieldSExtract %base, %offset, %count : i32, i8, i8 + return } -// CHECK-LABEL: func @bitfield_sextract_scalar_greater_bit_width +// CHECK-LABEL: @bitfield_sextract_scalar_greater_bit_width // CHECK-SAME: %[[BASE:.*]]: !llvm.i32, %[[OFFSET:.*]]: !llvm.i64, %[[COUNT:.*]]: !llvm.i64 func @bitfield_sextract_scalar_greater_bit_width(%base: i32, %offset: i64, %count: i64) { - // CHECK: %[[TRUNC_OFFSET:.*]] = llvm.trunc %[[OFFSET]] : !llvm.i64 to !llvm.i32 - // CHECK: %[[TRUNC_COUNT:.*]] = llvm.trunc %[[COUNT]] : !llvm.i64 to !llvm.i32 - // CHECK: %[[SIZE:.]] = llvm.mlir.constant(32 : i32) : !llvm.i32 - // CHECK: %[[T0:.*]] = llvm.add %[[TRUNC_COUNT]], %[[TRUNC_OFFSET]] : !llvm.i32 - // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm.i32 - // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm.i32 - // CHECK: %[[T2:.*]] = llvm.add %[[TRUNC_OFFSET]], %[[T1]] : !llvm.i32 - // CHECK: %{{.*}} = llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm.i32 - %0 = spv.BitFieldSExtract %base, %offset, %count : i32, i64, i64 - return + // CHECK: %[[TRUNC_OFFSET:.*]] = llvm.trunc %[[OFFSET]] : !llvm.i64 to !llvm.i32 + // CHECK: %[[TRUNC_COUNT:.*]] = llvm.trunc %[[COUNT]] : !llvm.i64 to !llvm.i32 + // CHECK: %[[SIZE:.]] = llvm.mlir.constant(32 : i32) : !llvm.i32 + // CHECK: %[[T0:.*]] = llvm.add %[[TRUNC_COUNT]], %[[TRUNC_OFFSET]] : !llvm.i32 + // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm.i32 + // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm.i32 + // CHECK: %[[T2:.*]] = llvm.add %[[TRUNC_OFFSET]], %[[T1]] : !llvm.i32 + // CHECK: llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm.i32 + %0 = spv.BitFieldSExtract %base, %offset, %count : i32, i64, i64 + return } -// CHECK-LABEL: func @bitfield_sextract_vector +// CHECK-LABEL: @bitfield_sextract_vector // CHECK-SAME: %[[BASE:.*]]: !llvm<"<2 x i32>">, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 func @bitfield_sextract_vector(%base: vector<2xi32>, %offset: i32, %count: i32) { - // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> - // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> - // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(dense<32> : vector<2xi32>) : !llvm<"<2 x i32>"> - // CHECK: %[[T0:.*]] = llvm.add %[[COUNT_V2]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> - // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm<"<2 x i32>"> - // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm<"<2 x i32>"> - // CHECK: %[[T2:.*]] = llvm.add %[[OFFSET_V2]], %[[T1]] : !llvm<"<2 x i32>"> - // CHECK: %{{.*}} = llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm<"<2 x i32>"> - %0 = spv.BitFieldSExtract %base, %offset, %count : vector<2xi32>, i32, i32 - return + // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(dense<32> : vector<2xi32>) : !llvm<"<2 x i32>"> + // CHECK: %[[T0:.*]] = llvm.add %[[COUNT_V2]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> + // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm<"<2 x i32>"> + // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm<"<2 x i32>"> + // CHECK: %[[T2:.*]] = llvm.add %[[OFFSET_V2]], %[[T1]] : !llvm<"<2 x i32>"> + // CHECK: llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm<"<2 x i32>"> + %0 = spv.BitFieldSExtract %base, %offset, %count : vector<2xi32>, i32, i32 + return } //===----------------------------------------------------------------------===// // spv.BitFieldUExtract //===----------------------------------------------------------------------===// -// CHECK-LABEL: func @bitfield_uextract_scalar_same_bit_width +// CHECK-LABEL: @bitfield_uextract_scalar_same_bit_width // CHECK-SAME: %[[BASE:.*]]: !llvm.i32, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 func @bitfield_uextract_scalar_same_bit_width(%base: i32, %offset: i32, %count: i32) { - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i32) : !llvm.i32 - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT]] : !llvm.i32 - // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i32 - // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[OFFSET]] : !llvm.i32 - // CHECK: %{{.*}} = llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm.i32 - %0 = spv.BitFieldUExtract %base, %offset, %count : i32, i32, i32 - return + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i32) : !llvm.i32 + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT]] : !llvm.i32 + // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i32 + // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[OFFSET]] : !llvm.i32 + // CHECK: llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm.i32 + %0 = spv.BitFieldUExtract %base, %offset, %count : i32, i32, i32 + return } -// CHECK-LABEL: func @bitfield_uextract_scalar_smaller_bit_width +// CHECK-LABEL: @bitfield_uextract_scalar_smaller_bit_width // CHECK-SAME: %[[BASE:.*]]: !llvm.i32, %[[OFFSET:.*]]: !llvm.i16, %[[COUNT:.*]]: !llvm.i8 func @bitfield_uextract_scalar_smaller_bit_width(%base: i32, %offset: i16, %count: i8) { - // CHECK: %[[EXT_OFFSET:.*]] = llvm.zext %[[OFFSET]] : !llvm.i16 to !llvm.i32 - // CHECK: %[[EXT_COUNT:.*]] = llvm.zext %[[COUNT]] : !llvm.i8 to !llvm.i32 - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i32) : !llvm.i32 - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[EXT_COUNT]] : !llvm.i32 - // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i32 - // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[EXT_OFFSET]] : !llvm.i32 - // CHECK: %{{.*}} = llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm.i32 - %0 = spv.BitFieldUExtract %base, %offset, %count : i32, i16, i8 - return + // CHECK: %[[EXT_OFFSET:.*]] = llvm.zext %[[OFFSET]] : !llvm.i16 to !llvm.i32 + // CHECK: %[[EXT_COUNT:.*]] = llvm.zext %[[COUNT]] : !llvm.i8 to !llvm.i32 + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i32) : !llvm.i32 + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[EXT_COUNT]] : !llvm.i32 + // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i32 + // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[EXT_OFFSET]] : !llvm.i32 + // CHECK: llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm.i32 + %0 = spv.BitFieldUExtract %base, %offset, %count : i32, i16, i8 + return } -// CHECK-LABEL: func @bitfield_uextract_scalar_greater_bit_width +// CHECK-LABEL: @bitfield_uextract_scalar_greater_bit_width // CHECK-SAME: %[[BASE:.*]]: !llvm.i8, %[[OFFSET:.*]]: !llvm.i16, %[[COUNT:.*]]: !llvm.i8 func @bitfield_uextract_scalar_greater_bit_width(%base: i8, %offset: i16, %count: i8) { - // CHECK: %[[TRUNC_OFFSET:.*]] = llvm.trunc %[[OFFSET]] : !llvm.i16 to !llvm.i8 - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i8) : !llvm.i8 - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT]] : !llvm.i8 - // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i8 - // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[TRUNC_OFFSET]] : !llvm.i8 - // CHECK: %{{.*}} = llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm.i8 - %0 = spv.BitFieldUExtract %base, %offset, %count : i8, i16, i8 - return + // CHECK: %[[TRUNC_OFFSET:.*]] = llvm.trunc %[[OFFSET]] : !llvm.i16 to !llvm.i8 + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(-1 : i8) : !llvm.i8 + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT]] : !llvm.i8 + // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.i8 + // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[TRUNC_OFFSET]] : !llvm.i8 + // CHECK: llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm.i8 + %0 = spv.BitFieldUExtract %base, %offset, %count : i8, i16, i8 + return } -// CHECK-LABEL: func @bitfield_uextract_vector +// CHECK-LABEL: @bitfield_uextract_vector // CHECK-SAME: %[[BASE:.*]]: !llvm<"<2 x i32>">, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 func @bitfield_uextract_vector(%base: vector<2xi32>, %offset: i32, %count: i32) { - // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> - // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> - // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm<"<2 x i32>"> - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm<"<2 x i32>"> - // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm<"<2 x i32>"> - // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> - // CHECK: %{{.*}} = llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm<"<2 x i32>"> - %0 = spv.BitFieldUExtract %base, %offset, %count : vector<2xi32>, i32, i32 - return + // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm<"<2 x i32>"> + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm<"<2 x i32>"> + // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm<"<2 x i32>"> + // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> + // CHECK: llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm<"<2 x i32>"> + %0 = spv.BitFieldUExtract %base, %offset, %count : vector<2xi32>, i32, i32 + return } //===----------------------------------------------------------------------===// // spv.BitwiseAnd //===----------------------------------------------------------------------===// +// CHECK-LABEL: @bitwise_and_scalar func @bitwise_and_scalar(%arg0: i32, %arg1: i32) { - // CHECK: %{{.*}} = llvm.and %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.BitwiseAnd %arg0, %arg1 : i32 - return + // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.BitwiseAnd %arg0, %arg1 : i32 + return } +// CHECK-LABEL: @bitwise_and_vector func @bitwise_and_vector(%arg0: vector<4xi64>, %arg1: vector<4xi64>) { - // CHECK: %{{.*}} = llvm.and %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> - %0 = spv.BitwiseAnd %arg0, %arg1 : vector<4xi64> - return + // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + %0 = spv.BitwiseAnd %arg0, %arg1 : vector<4xi64> + return } //===----------------------------------------------------------------------===// // spv.BitwiseOr //===----------------------------------------------------------------------===// +// CHECK-LABEL: @bitwise_or_scalar func @bitwise_or_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.or %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.BitwiseOr %arg0, %arg1 : i64 - return + // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.BitwiseOr %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @bitwise_or_vector func @bitwise_or_vector(%arg0: vector<3xi8>, %arg1: vector<3xi8>) { - // CHECK: %{{.*}} = llvm.or %{{.*}}, %{{.*}} : !llvm<"<3 x i8>"> - %0 = spv.BitwiseOr %arg0, %arg1 : vector<3xi8> - return + // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm<"<3 x i8>"> + %0 = spv.BitwiseOr %arg0, %arg1 : vector<3xi8> + return } //===----------------------------------------------------------------------===// // spv.BitwiseXor //===----------------------------------------------------------------------===// +// CHECK-LABEL: @bitwise_xor_scalar func @bitwise_xor_scalar(%arg0: i32, %arg1: i32) { - // CHECK: %{{.*}} = llvm.xor %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.BitwiseXor %arg0, %arg1 : i32 - return + // CHECK: llvm.xor %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.BitwiseXor %arg0, %arg1 : i32 + return } +// CHECK-LABEL: @bitwise_xor_vector func @bitwise_xor_vector(%arg0: vector<2xi16>, %arg1: vector<2xi16>) { - // CHECK: %{{.*}} = llvm.xor %{{.*}}, %{{.*}} : !llvm<"<2 x i16>"> - %0 = spv.BitwiseXor %arg0, %arg1 : vector<2xi16> - return + // CHECK: llvm.xor %{{.*}}, %{{.*}} : !llvm<"<2 x i16>"> + %0 = spv.BitwiseXor %arg0, %arg1 : vector<2xi16> + return } //===----------------------------------------------------------------------===// // spv.Not //===----------------------------------------------------------------------===// -func @not__scalar(%arg0: i32) { +// CHECK-LABEL: @not_scalar +func @not_scalar(%arg0: i32) { // CHECK: %[[CONST:.*]] = llvm.mlir.constant(-1 : i32) : !llvm.i32 - // CHECK: %{{.*}} = llvm.xor %{{.*}}, %[[CONST]] : !llvm.i32 - %0 = spv.Not %arg0 : i32 + // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm.i32 + %0 = spv.Not %arg0 : i32 return } +// CHECK-LABEL: @not_vector func @not_vector(%arg0: vector<2xi16>) { // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi16>) : !llvm<"<2 x i16>"> - // CHECK: %{{.*}} = llvm.xor %{{.*}}, %[[CONST]] : !llvm<"<2 x i16>"> - %0 = spv.Not %arg0 : vector<2xi16> + // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm<"<2 x i16>"> + %0 = spv.Not %arg0 : vector<2xi16> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir index 29f43fadf933c..a0434aad3bea7 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir @@ -4,168 +4,188 @@ // spv.Bitcast //===----------------------------------------------------------------------===// +// CHECK-LABEL: @bitcast_float_to_integer_scalar func @bitcast_float_to_integer_scalar(%arg0 : f32) { - // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm.float to !llvm.i32 - %0 = spv.Bitcast %arg0: f32 to i32 - return + // CHECK: llvm.bitcast {{.*}} : !llvm.float to !llvm.i32 + %0 = spv.Bitcast %arg0: f32 to i32 + return } +// CHECK-LABEL: @bitcast_float_to_integer_vector func @bitcast_float_to_integer_vector(%arg0 : vector<3xf32>) { - // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm<"<3 x float>"> to !llvm<"<3 x i32>"> - %0 = spv.Bitcast %arg0: vector<3xf32> to vector<3xi32> - return + // CHECK: llvm.bitcast {{.*}} : !llvm<"<3 x float>"> to !llvm<"<3 x i32>"> + %0 = spv.Bitcast %arg0: vector<3xf32> to vector<3xi32> + return } +// CHECK-LABEL: @bitcast_vector_to_scalar func @bitcast_vector_to_scalar(%arg0 : vector<2xf32>) { - // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm<"<2 x float>"> to !llvm.i64 - %0 = spv.Bitcast %arg0: vector<2xf32> to i64 - return + // CHECK: llvm.bitcast {{.*}} : !llvm<"<2 x float>"> to !llvm.i64 + %0 = spv.Bitcast %arg0: vector<2xf32> to i64 + return } +// CHECK-LABEL: @bitcast_scalar_to_vector func @bitcast_scalar_to_vector(%arg0 : f64) { - // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm.double to !llvm<"<2 x i32>"> - %0 = spv.Bitcast %arg0: f64 to vector<2xi32> - return + // CHECK: llvm.bitcast {{.*}} : !llvm.double to !llvm<"<2 x i32>"> + %0 = spv.Bitcast %arg0: f64 to vector<2xi32> + return } +// CHECK-LABEL: @bitcast_vector_to_vector func @bitcast_vector_to_vector(%arg0 : vector<4xf32>) { - // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm<"<4 x float>"> to !llvm<"<2 x i64>"> - %0 = spv.Bitcast %arg0: vector<4xf32> to vector<2xi64> - return + // CHECK: llvm.bitcast {{.*}} : !llvm<"<4 x float>"> to !llvm<"<2 x i64>"> + %0 = spv.Bitcast %arg0: vector<4xf32> to vector<2xi64> + return } +// CHECK-LABEL: @bitcast_pointer func @bitcast_pointer(%arg0: !spv.ptr) { - // CHECK: %{{.*}} = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"i32*"> - %0 = spv.Bitcast %arg0 : !spv.ptr to !spv.ptr - return + // CHECK: llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"i32*"> + %0 = spv.Bitcast %arg0 : !spv.ptr to !spv.ptr + return } //===----------------------------------------------------------------------===// // spv.ConvertFToS //===----------------------------------------------------------------------===// +// CHECK-LABEL: @convert_float_to_signed_scalar func @convert_float_to_signed_scalar(%arg0: f32) { - // CHECK: %{{.*}} = llvm.fptosi %{{.*}} : !llvm.float to !llvm.i32 - %0 = spv.ConvertFToS %arg0: f32 to i32 - return + // CHECK: llvm.fptosi %{{.*}} : !llvm.float to !llvm.i32 + %0 = spv.ConvertFToS %arg0: f32 to i32 + return } +// CHECK-LABEL: @convert_float_to_signed_vector func @convert_float_to_signed_vector(%arg0: vector<2xf32>) { - // CHECK: %{{.*}} = llvm.fptosi %{{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x i32>"> - %0 = spv.ConvertFToS %arg0: vector<2xf32> to vector<2xi32> - return + // CHECK: llvm.fptosi %{{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x i32>"> + %0 = spv.ConvertFToS %arg0: vector<2xf32> to vector<2xi32> + return } //===----------------------------------------------------------------------===// // spv.ConvertFToU //===----------------------------------------------------------------------===// +// CHECK-LABEL: @convert_float_to_unsigned_scalar func @convert_float_to_unsigned_scalar(%arg0: f32) { - // CHECK: %{{.*}} = llvm.fptoui %{{.*}} : !llvm.float to !llvm.i32 - %0 = spv.ConvertFToU %arg0: f32 to i32 - return + // CHECK: llvm.fptoui %{{.*}} : !llvm.float to !llvm.i32 + %0 = spv.ConvertFToU %arg0: f32 to i32 + return } +// CHECK-LABEL: @convert_float_to_unsigned_vector func @convert_float_to_unsigned_vector(%arg0: vector<2xf32>) { - // CHECK: %{{.*}} = llvm.fptoui %{{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x i32>"> - %0 = spv.ConvertFToU %arg0: vector<2xf32> to vector<2xi32> - return + // CHECK: llvm.fptoui %{{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x i32>"> + %0 = spv.ConvertFToU %arg0: vector<2xf32> to vector<2xi32> + return } //===----------------------------------------------------------------------===// // spv.ConvertSToF //===----------------------------------------------------------------------===// +// CHECK-LABEL: @convert_signed_to_float_scalar func @convert_signed_to_float_scalar(%arg0: i32) { - // CHECK: %{{.*}} = llvm.sitofp %{{.*}} : !llvm.i32 to !llvm.float - %0 = spv.ConvertSToF %arg0: i32 to f32 - return + // CHECK: llvm.sitofp %{{.*}} : !llvm.i32 to !llvm.float + %0 = spv.ConvertSToF %arg0: i32 to f32 + return } +// CHECK-LABEL: @convert_signed_to_float_vector func @convert_signed_to_float_vector(%arg0: vector<3xi32>) { - // CHECK: %{{.*}} = llvm.sitofp %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x float>"> - %0 = spv.ConvertSToF %arg0: vector<3xi32> to vector<3xf32> - return + // CHECK: llvm.sitofp %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x float>"> + %0 = spv.ConvertSToF %arg0: vector<3xi32> to vector<3xf32> + return } //===----------------------------------------------------------------------===// // spv.ConvertUToF //===----------------------------------------------------------------------===// +// CHECK-LABEL: @convert_unsigned_to_float_scalar func @convert_unsigned_to_float_scalar(%arg0: i32) { - // CHECK: %{{.*}} = llvm.uitofp %{{.*}} : !llvm.i32 to !llvm.float - %0 = spv.ConvertUToF %arg0: i32 to f32 - return + // CHECK: llvm.uitofp %{{.*}} : !llvm.i32 to !llvm.float + %0 = spv.ConvertUToF %arg0: i32 to f32 + return } +// CHECK-LABEL: @convert_unsigned_to_float_vector func @convert_unsigned_to_float_vector(%arg0: vector<3xi32>) { - // CHECK: %{{.*}} = llvm.uitofp %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x float>"> - %0 = spv.ConvertUToF %arg0: vector<3xi32> to vector<3xf32> - return + // CHECK: llvm.uitofp %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x float>"> + %0 = spv.ConvertUToF %arg0: vector<3xi32> to vector<3xf32> + return } //===----------------------------------------------------------------------===// // spv.FConvert //===----------------------------------------------------------------------===// +// CHECK-LABEL: @fconvert_scalar func @fconvert_scalar(%arg0: f32, %arg1: f64) { - // CHECK: %{{.*}} = llvm.fpext %{{.*}} : !llvm.float to !llvm.double - %0 = spv.FConvert %arg0: f32 to f64 + // CHECK: llvm.fpext %{{.*}} : !llvm.float to !llvm.double + %0 = spv.FConvert %arg0: f32 to f64 - // CHECK: %{{.*}} = llvm.fptrunc %{{.*}} : !llvm.double to !llvm.float - %1 = spv.FConvert %arg1: f64 to f32 - return + // CHECK: llvm.fptrunc %{{.*}} : !llvm.double to !llvm.float + %1 = spv.FConvert %arg1: f64 to f32 + return } +// CHECK-LABEL: @fconvert_vector func @fconvert_vector(%arg0: vector<2xf32>, %arg1: vector<2xf64>) { - // CHECK: %{{.*}} = llvm.fpext %{{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x double>"> - %0 = spv.FConvert %arg0: vector<2xf32> to vector<2xf64> + // CHECK: llvm.fpext %{{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x double>"> + %0 = spv.FConvert %arg0: vector<2xf32> to vector<2xf64> - // CHECK: %{{.*}} = llvm.fptrunc %{{.*}} : !llvm<"<2 x double>"> to !llvm<"<2 x float>"> - %1 = spv.FConvert %arg1: vector<2xf64> to vector<2xf32> - return + // CHECK: llvm.fptrunc %{{.*}} : !llvm<"<2 x double>"> to !llvm<"<2 x float>"> + %1 = spv.FConvert %arg1: vector<2xf64> to vector<2xf32> + return } //===----------------------------------------------------------------------===// // spv.SConvert //===----------------------------------------------------------------------===// +// CHECK-LABEL: @sconvert_scalar func @sconvert_scalar(%arg0: i32, %arg1: i64) { - // CHECK: %{{.*}} = llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 - %0 = spv.SConvert %arg0: i32 to i64 + // CHECK: llvm.sext %{{.*}} : !llvm.i32 to !llvm.i64 + %0 = spv.SConvert %arg0: i32 to i64 - // CHECK: %{{.*}} = llvm.trunc %{{.*}} : !llvm.i64 to !llvm.i32 - %1 = spv.SConvert %arg1: i64 to i32 - return + // CHECK: llvm.trunc %{{.*}} : !llvm.i64 to !llvm.i32 + %1 = spv.SConvert %arg1: i64 to i32 + return } +// CHECK-LABEL: @sconvert_vector func @sconvert_vector(%arg0: vector<3xi32>, %arg1: vector<3xi64>) { - // CHECK: %{{.*}} = llvm.sext %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x i64>"> - %0 = spv.SConvert %arg0: vector<3xi32> to vector<3xi64> + // CHECK: llvm.sext %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x i64>"> + %0 = spv.SConvert %arg0: vector<3xi32> to vector<3xi64> - // CHECK: %{{.*}} = llvm.trunc %{{.*}} : !llvm<"<3 x i64>"> to !llvm<"<3 x i32>"> - %1 = spv.SConvert %arg1: vector<3xi64> to vector<3xi32> - return + // CHECK: llvm.trunc %{{.*}} : !llvm<"<3 x i64>"> to !llvm<"<3 x i32>"> + %1 = spv.SConvert %arg1: vector<3xi64> to vector<3xi32> + return } //===----------------------------------------------------------------------===// // spv.UConvert //===----------------------------------------------------------------------===// +// CHECK-LABEL: @uconvert_scalar func @uconvert_scalar(%arg0: i32, %arg1: i64) { - // CHECK: %{{.*}} = llvm.zext %{{.*}} : !llvm.i32 to !llvm.i64 - %0 = spv.UConvert %arg0: i32 to i64 + // CHECK: llvm.zext %{{.*}} : !llvm.i32 to !llvm.i64 + %0 = spv.UConvert %arg0: i32 to i64 - // CHECK: %{{.*}} = llvm.trunc %{{.*}} : !llvm.i64 to !llvm.i32 - %1 = spv.UConvert %arg1: i64 to i32 - return + // CHECK: llvm.trunc %{{.*}} : !llvm.i64 to !llvm.i32 + %1 = spv.UConvert %arg1: i64 to i32 + return } +// CHECK-LABEL: @uconvert_vector func @uconvert_vector(%arg0: vector<3xi32>, %arg1: vector<3xi64>) { - // CHECK: %{{.*}} = llvm.zext %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x i64>"> - %0 = spv.UConvert %arg0: vector<3xi32> to vector<3xi64> + // CHECK: llvm.zext %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x i64>"> + %0 = spv.UConvert %arg0: vector<3xi32> to vector<3xi64> - // CHECK: %{{.*}} = llvm.trunc %{{.*}} : !llvm<"<3 x i64>"> to !llvm<"<3 x i32>"> - %1 = spv.UConvert %arg1: vector<3xi64> to vector<3xi32> - return + // CHECK: llvm.trunc %{{.*}} : !llvm<"<3 x i64>"> to !llvm<"<3 x i32>"> + %1 = spv.UConvert %arg1: vector<3xi64> to vector<3xi32> + return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir index c4241e226a1ba..bcfe234f7080d 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir @@ -4,190 +4,214 @@ // spv.IEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @i_equal_scalar func @i_equal_scalar(%arg0: i32, %arg1: i32) { - // CHECK: %{{.*}} = llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.IEqual %arg0, %arg1 : i32 - return + // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.IEqual %arg0, %arg1 : i32 + return } +// CHECK-LABEL: @i_equal_vector func @i_equal_vector(%arg0: vector<4xi64>, %arg1: vector<4xi64>) { - // CHECK: %{{.*}} = llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> - %0 = spv.IEqual %arg0, %arg1 : vector<4xi64> - return + // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + %0 = spv.IEqual %arg0, %arg1 : vector<4xi64> + return } //===----------------------------------------------------------------------===// // spv.INotEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @i_not_equal_scalar func @i_not_equal_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.INotEqual %arg0, %arg1 : i64 - return + // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.INotEqual %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @i_not_equal_vector func @i_not_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.INotEqual %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.INotEqual %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.SGreaterThanEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @s_greater_than_equal_scalar func @s_greater_than_equal_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.icmp "sge" %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.SGreaterThanEqual %arg0, %arg1 : i64 - return + // CHECK: llvm.icmp "sge" %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.SGreaterThanEqual %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @s_greater_than_equal_vector func @s_greater_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.icmp "sge" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.SGreaterThanEqual %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.icmp "sge" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.SGreaterThanEqual %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.SGreaterThan //===----------------------------------------------------------------------===// +// CHECK-LABEL: @s_greater_than_scalar func @s_greater_than_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.icmp "sgt" %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.SGreaterThan %arg0, %arg1 : i64 - return + // CHECK: llvm.icmp "sgt" %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.SGreaterThan %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @s_greater_than_vector func @s_greater_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.icmp "sgt" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.SGreaterThan %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.icmp "sgt" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.SGreaterThan %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.SLessThanEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @s_less_than_equal_scalar func @s_less_than_equal_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.icmp "sle" %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.SLessThanEqual %arg0, %arg1 : i64 - return + // CHECK: llvm.icmp "sle" %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.SLessThanEqual %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @s_less_than_equal_vector func @s_less_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.icmp "sle" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.SLessThanEqual %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.icmp "sle" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.SLessThanEqual %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.SLessThan //===----------------------------------------------------------------------===// +// CHECK-LABEL: @s_less_than_scalar func @s_less_than_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.icmp "slt" %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.SLessThan %arg0, %arg1 : i64 - return + // CHECK: llvm.icmp "slt" %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.SLessThan %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @s_less_than_vector func @s_less_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.icmp "slt" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.SLessThan %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.icmp "slt" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.SLessThan %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.UGreaterThanEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @u_greater_than_equal_scalar func @u_greater_than_equal_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.icmp "uge" %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.UGreaterThanEqual %arg0, %arg1 : i64 - return + // CHECK: llvm.icmp "uge" %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.UGreaterThanEqual %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @u_greater_than_equal_vector func @u_greater_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.icmp "uge" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.UGreaterThanEqual %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.icmp "uge" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.UGreaterThanEqual %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.UGreaterThan //===----------------------------------------------------------------------===// +// CHECK-LABEL: @u_greater_than_scalar func @u_greater_than_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.icmp "ugt" %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.UGreaterThan %arg0, %arg1 : i64 - return + // CHECK: llvm.icmp "ugt" %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.UGreaterThan %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @u_greater_than_vector func @u_greater_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.icmp "ugt" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.UGreaterThan %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.icmp "ugt" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.UGreaterThan %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.ULessThanEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @u_less_than_equal_scalar func @u_less_than_equal_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.icmp "ule" %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.ULessThanEqual %arg0, %arg1 : i64 - return + // CHECK: llvm.icmp "ule" %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.ULessThanEqual %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @u_less_than_equal_vector func @u_less_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.icmp "ule" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.ULessThanEqual %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.icmp "ule" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.ULessThanEqual %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.ULessThan //===----------------------------------------------------------------------===// +// CHECK-LABEL: @u_less_than_scalar func @u_less_than_scalar(%arg0: i64, %arg1: i64) { - // CHECK: %{{.*}} = llvm.icmp "ult" %{{.*}}, %{{.*}} : !llvm.i64 - %0 = spv.ULessThan %arg0, %arg1 : i64 - return + // CHECK: llvm.icmp "ult" %{{.*}}, %{{.*}} : !llvm.i64 + %0 = spv.ULessThan %arg0, %arg1 : i64 + return } +// CHECK-LABEL: @u_less_than_vector func @u_less_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: %{{.*}} = llvm.icmp "ult" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> - %0 = spv.ULessThan %arg0, %arg1 : vector<2xi64> - return + // CHECK: llvm.icmp "ult" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + %0 = spv.ULessThan %arg0, %arg1 : vector<2xi64> + return } //===----------------------------------------------------------------------===// // spv.FOrdEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_ord_equal_scalar func @f_ord_equal_scalar(%arg0: f32, %arg1: f32) { - // CHECK: %{{.*}} = llvm.fcmp "oeq" %{{.*}}, %{{.*}} : !llvm.float - %0 = spv.FOrdEqual %arg0, %arg1 : f32 - return + // CHECK: llvm.fcmp "oeq" %{{.*}}, %{{.*}} : !llvm.float + %0 = spv.FOrdEqual %arg0, %arg1 : f32 + return } +// CHECK-LABEL: @f_ord_equal_vector func @f_ord_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "oeq" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> - %0 = spv.FOrdEqual %arg0, %arg1 : vector<4xf64> - return + // CHECK: llvm.fcmp "oeq" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> + %0 = spv.FOrdEqual %arg0, %arg1 : vector<4xf64> + return } //===----------------------------------------------------------------------===// // spv.FOrdGreaterThanEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_ord_greater_than_equal_scalar func @f_ord_greater_than_equal_scalar(%arg0: f64, %arg1: f64) { - // CHECK: %{{.*}} = llvm.fcmp "oge" %{{.*}}, %{{.*}} : !llvm.double - %0 = spv.FOrdGreaterThanEqual %arg0, %arg1 : f64 - return + // CHECK: llvm.fcmp "oge" %{{.*}}, %{{.*}} : !llvm.double + %0 = spv.FOrdGreaterThanEqual %arg0, %arg1 : f64 + return } +// CHECK-LABEL: @f_ord_greater_than_equal_vector func @f_ord_greater_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "oge" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + // CHECK: llvm.fcmp "oge" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> %0 = spv.FOrdGreaterThanEqual %arg0, %arg1 : vector<2xf64> return } @@ -196,158 +220,178 @@ func @f_ord_greater_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64> // spv.FOrdGreaterThan //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_ord_greater_than_scalar func @f_ord_greater_than_scalar(%arg0: f64, %arg1: f64) { - // CHECK: %{{.*}} = llvm.fcmp "ogt" %{{.*}}, %{{.*}} : !llvm.double - %0 = spv.FOrdGreaterThan %arg0, %arg1 : f64 - return + // CHECK: llvm.fcmp "ogt" %{{.*}}, %{{.*}} : !llvm.double + %0 = spv.FOrdGreaterThan %arg0, %arg1 : f64 + return } +// CHECK-LABEL: @f_ord_greater_than_vector func @f_ord_greater_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "ogt" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> - %0 = spv.FOrdGreaterThan %arg0, %arg1 : vector<2xf64> - return + // CHECK: llvm.fcmp "ogt" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + %0 = spv.FOrdGreaterThan %arg0, %arg1 : vector<2xf64> + return } //===----------------------------------------------------------------------===// // spv.FOrdLessThan //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_ord_less_than_scalar func @f_ord_less_than_scalar(%arg0: f64, %arg1: f64) { - // CHECK: %{{.*}} = llvm.fcmp "olt" %{{.*}}, %{{.*}} : !llvm.double - %0 = spv.FOrdLessThan %arg0, %arg1 : f64 - return + // CHECK: llvm.fcmp "olt" %{{.*}}, %{{.*}} : !llvm.double + %0 = spv.FOrdLessThan %arg0, %arg1 : f64 + return } +// CHECK-LABEL: @f_ord_less_than_vector func @f_ord_less_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "olt" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> - %0 = spv.FOrdLessThan %arg0, %arg1 : vector<2xf64> - return + // CHECK: llvm.fcmp "olt" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + %0 = spv.FOrdLessThan %arg0, %arg1 : vector<2xf64> + return } //===----------------------------------------------------------------------===// // spv.FOrdLessThanEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_ord_less_than_equal_scalar func @f_ord_less_than_equal_scalar(%arg0: f64, %arg1: f64) { - // CHECK: %{{.*}} = llvm.fcmp "ole" %{{.*}}, %{{.*}} : !llvm.double - %0 = spv.FOrdLessThanEqual %arg0, %arg1 : f64 - return + // CHECK: llvm.fcmp "ole" %{{.*}}, %{{.*}} : !llvm.double + %0 = spv.FOrdLessThanEqual %arg0, %arg1 : f64 + return } +// CHECK-LABEL: @f_ord_less_than_equal_vector func @f_ord_less_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "ole" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> - %0 = spv.FOrdLessThanEqual %arg0, %arg1 : vector<2xf64> - return + // CHECK: llvm.fcmp "ole" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + %0 = spv.FOrdLessThanEqual %arg0, %arg1 : vector<2xf64> + return } //===----------------------------------------------------------------------===// // spv.FOrdNotEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_ord_not_equal_scalar func @f_ord_not_equal_scalar(%arg0: f32, %arg1: f32) { - // CHECK: %{{.*}} = llvm.fcmp "one" %{{.*}}, %{{.*}} : !llvm.float - %0 = spv.FOrdNotEqual %arg0, %arg1 : f32 - return + // CHECK: llvm.fcmp "one" %{{.*}}, %{{.*}} : !llvm.float + %0 = spv.FOrdNotEqual %arg0, %arg1 : f32 + return } +// CHECK-LABEL: @f_ord_not_equal_vector func @f_ord_not_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "one" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> - %0 = spv.FOrdNotEqual %arg0, %arg1 : vector<4xf64> - return + // CHECK: llvm.fcmp "one" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> + %0 = spv.FOrdNotEqual %arg0, %arg1 : vector<4xf64> + return } //===----------------------------------------------------------------------===// // spv.FUnordEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_unord_equal_scalar func @f_unord_equal_scalar(%arg0: f32, %arg1: f32) { - // CHECK: %{{.*}} = llvm.fcmp "ueq" %{{.*}}, %{{.*}} : !llvm.float - %0 = spv.FUnordEqual %arg0, %arg1 : f32 - return + // CHECK: llvm.fcmp "ueq" %{{.*}}, %{{.*}} : !llvm.float + %0 = spv.FUnordEqual %arg0, %arg1 : f32 + return } +// CHECK-LABEL: @f_unord_equal_vector func @f_unord_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "ueq" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> - %0 = spv.FUnordEqual %arg0, %arg1 : vector<4xf64> - return + // CHECK: llvm.fcmp "ueq" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> + %0 = spv.FUnordEqual %arg0, %arg1 : vector<4xf64> + return } //===----------------------------------------------------------------------===// // spv.FUnordGreaterThanEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_unord_greater_than_equal_scalar func @f_unord_greater_than_equal_scalar(%arg0: f64, %arg1: f64) { - // CHECK: %{{.*}} = llvm.fcmp "uge" %{{.*}}, %{{.*}} : !llvm.double - %0 = spv.FUnordGreaterThanEqual %arg0, %arg1 : f64 - return + // CHECK: llvm.fcmp "uge" %{{.*}}, %{{.*}} : !llvm.double + %0 = spv.FUnordGreaterThanEqual %arg0, %arg1 : f64 + return } +// CHECK-LABEL: @f_unord_greater_than_equal_vector func @f_unord_greater_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "uge" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> - %0 = spv.FUnordGreaterThanEqual %arg0, %arg1 : vector<2xf64> - return + // CHECK: llvm.fcmp "uge" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + %0 = spv.FUnordGreaterThanEqual %arg0, %arg1 : vector<2xf64> + return } //===----------------------------------------------------------------------===// // spv.FUnordGreaterThan //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_unord_greater_than_scalar func @f_unord_greater_than_scalar(%arg0: f64, %arg1: f64) { - // CHECK: %{{.*}} = llvm.fcmp "ugt" %{{.*}}, %{{.*}} : !llvm.double - %0 = spv.FUnordGreaterThan %arg0, %arg1 : f64 - return + // CHECK: llvm.fcmp "ugt" %{{.*}}, %{{.*}} : !llvm.double + %0 = spv.FUnordGreaterThan %arg0, %arg1 : f64 + return } +// CHECK-LABEL: @f_unord_greater_than_vector func @f_unord_greater_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "ugt" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> - %0 = spv.FUnordGreaterThan %arg0, %arg1 : vector<2xf64> - return + // CHECK: llvm.fcmp "ugt" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + %0 = spv.FUnordGreaterThan %arg0, %arg1 : vector<2xf64> + return } //===----------------------------------------------------------------------===// // spv.FUnordLessThan //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_unord_less_than_scalar func @f_unord_less_than_scalar(%arg0: f64, %arg1: f64) { - // CHECK: %{{.*}} = llvm.fcmp "ult" %{{.*}}, %{{.*}} : !llvm.double - %0 = spv.FUnordLessThan %arg0, %arg1 : f64 - return + // CHECK: llvm.fcmp "ult" %{{.*}}, %{{.*}} : !llvm.double + %0 = spv.FUnordLessThan %arg0, %arg1 : f64 + return } +// CHECK-LABEL: @f_unord_less_than_vector func @f_unord_less_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "ult" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> - %0 = spv.FUnordLessThan %arg0, %arg1 : vector<2xf64> - return + // CHECK: llvm.fcmp "ult" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + %0 = spv.FUnordLessThan %arg0, %arg1 : vector<2xf64> + return } //===----------------------------------------------------------------------===// // spv.FUnordLessThanEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_unord_less_than_equal_scalar func @f_unord_less_than_equal_scalar(%arg0: f64, %arg1: f64) { - // CHECK: %{{.*}} = llvm.fcmp "ule" %{{.*}}, %{{.*}} : !llvm.double - %0 = spv.FUnordLessThanEqual %arg0, %arg1 : f64 - return + // CHECK: llvm.fcmp "ule" %{{.*}}, %{{.*}} : !llvm.double + %0 = spv.FUnordLessThanEqual %arg0, %arg1 : f64 + return } +// CHECK-LABEL: @f_unord_less_than_equal_vector func @f_unord_less_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "ule" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> - %0 = spv.FUnordLessThanEqual %arg0, %arg1 : vector<2xf64> - return + // CHECK: llvm.fcmp "ule" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + %0 = spv.FUnordLessThanEqual %arg0, %arg1 : vector<2xf64> + return } //===----------------------------------------------------------------------===// // spv.FUnordNotEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @f_unord_not_equal_scalar func @f_unord_not_equal_scalar(%arg0: f32, %arg1: f32) { - // CHECK: %{{.*}} = llvm.fcmp "une" %{{.*}}, %{{.*}} : !llvm.float - %0 = spv.FUnordNotEqual %arg0, %arg1 : f32 - return + // CHECK: llvm.fcmp "une" %{{.*}}, %{{.*}} : !llvm.float + %0 = spv.FUnordNotEqual %arg0, %arg1 : f32 + return } +// CHECK-LABEL: @f_unord_not_equal_vector func @f_unord_not_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) { - // CHECK: %{{.*}} = llvm.fcmp "une" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> - %0 = spv.FUnordNotEqual %arg0, %arg1 : vector<4xf64> - return + // CHECK: llvm.fcmp "une" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> + %0 = spv.FUnordNotEqual %arg0, %arg1 : vector<4xf64> + return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/logical-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir similarity index 55% rename from mlir/test/Conversion/SPIRVToLLVM/logical-to-llvm.mlir rename to mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir index e6f2ec2433fa5..a816424d5b1c3 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/logical-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir @@ -4,15 +4,17 @@ // spv.LogicalEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @logical_equal_scalar func @logical_equal_scalar(%arg0: i1, %arg1: i1) { - // CHECK: %{{.*}} = llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm.i1 - %0 = spv.LogicalEqual %arg0, %arg0 : i1 + // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm.i1 + %0 = spv.LogicalEqual %arg0, %arg0 : i1 return } +// CHECK-LABEL: @logical_equal_vector func @logical_equal_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { - // CHECK: %{{.*}} = llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> - %0 = spv.LogicalEqual %arg0, %arg0 : vector<4xi1> + // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> + %0 = spv.LogicalEqual %arg0, %arg0 : vector<4xi1> return } @@ -20,15 +22,17 @@ func @logical_equal_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { // spv.LogicalNotEqual //===----------------------------------------------------------------------===// +// CHECK-LABEL: @logical_not_equal_scalar func @logical_not_equal_scalar(%arg0: i1, %arg1: i1) { - // CHECK: %{{.*}} = llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm.i1 - %0 = spv.LogicalNotEqual %arg0, %arg0 : i1 + // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm.i1 + %0 = spv.LogicalNotEqual %arg0, %arg0 : i1 return } +// CHECK-LABEL: @logical_not_equal_vector func @logical_not_equal_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { - // CHECK: %{{.*}} = llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> - %0 = spv.LogicalNotEqual %arg0, %arg0 : vector<4xi1> + // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> + %0 = spv.LogicalNotEqual %arg0, %arg0 : vector<4xi1> return } @@ -36,17 +40,19 @@ func @logical_not_equal_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { // spv.LogicalNot //===----------------------------------------------------------------------===// -func @logical_not__scalar(%arg0: i1) { +// CHECK-LABEL: @logical_not_scalar +func @logical_not_scalar(%arg0: i1) { // CHECK: %[[CONST:.*]] = llvm.mlir.constant(true) : !llvm.i1 - // CHECK: %{{.*}} = llvm.xor %{{.*}}, %[[CONST]] : !llvm.i1 - %0 = spv.LogicalNot %arg0 : i1 + // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm.i1 + %0 = spv.LogicalNot %arg0 : i1 return } +// CHECK-LABEL: @logical_not_vector func @logical_not_vector(%arg0: vector<4xi1>) { // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense : vector<4xi1>) : !llvm<"<4 x i1>"> - // CHECK: %{{.*}} = llvm.xor %{{.*}}, %[[CONST]] : !llvm<"<4 x i1>"> - %0 = spv.LogicalNot %arg0 : vector<4xi1> + // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm<"<4 x i1>"> + %0 = spv.LogicalNot %arg0 : vector<4xi1> return } @@ -54,15 +60,17 @@ func @logical_not_vector(%arg0: vector<4xi1>) { // spv.LogicalAnd //===----------------------------------------------------------------------===// +// CHECK-LABEL: @logical_and_scalar func @logical_and_scalar(%arg0: i1, %arg1: i1) { - // CHECK: %{{.*}} = llvm.and %{{.*}}, %{{.*}} : !llvm.i1 - %0 = spv.LogicalAnd %arg0, %arg0 : i1 + // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm.i1 + %0 = spv.LogicalAnd %arg0, %arg0 : i1 return } +// CHECK-LABEL: @logical_and_vector func @logical_and_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { - // CHECK: %{{.*}} = llvm.and %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> - %0 = spv.LogicalAnd %arg0, %arg0 : vector<4xi1> + // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> + %0 = spv.LogicalAnd %arg0, %arg0 : vector<4xi1> return } @@ -70,14 +78,16 @@ func @logical_and_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { // spv.LogicalOr //===----------------------------------------------------------------------===// +// CHECK-LABEL: @logical_or_scalar func @logical_or_scalar(%arg0: i1, %arg1: i1) { - // CHECK: %{{.*}} = llvm.or %{{.*}}, %{{.*}} : !llvm.i1 - %0 = spv.LogicalOr %arg0, %arg0 : i1 + // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm.i1 + %0 = spv.LogicalOr %arg0, %arg0 : i1 return } +// CHECK-LABEL: @logical_or_vector func @logical_or_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { - // CHECK: %{{.*}} = llvm.or %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> - %0 = spv.LogicalOr %arg0, %arg0 : vector<4xi1> + // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> + %0 = spv.LogicalOr %arg0, %arg0 : vector<4xi1> return } From 8979a9cdf226066196f1710903d13492e6929563 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Tue, 4 Aug 2020 13:34:47 +0200 Subject: [PATCH 297/600] [mlir] Fix adding wrong operand value in `promoteMemRefDescriptors`. The bug was not noticed because we didn't have a lot of custom type conversions directly to LLVM dialect. Differential Revision: https://reviews.llvm.org/D85192 --- mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 42c4d49b8941b..d0b49bb181955 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -3352,7 +3352,7 @@ LLVMTypeConverter::promoteMemRefDescriptors(Location loc, ValueRange opOperands, continue; } - promotedOperands.push_back(operand); + promotedOperands.push_back(llvmOperand); } return promotedOperands; } From d4fbbab2e494a59480096a257136ed2b75d07e87 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 4 Aug 2020 11:37:25 +0200 Subject: [PATCH 298/600] [mlir] translate types between MLIR LLVM dialect and LLVM IR With new LLVM dialect type modeling, the dialect types no longer wrap LLVM IR types. Therefore, they need to be translated to and from LLVM IR during export and import. Introduce the relevant functionality for translating types. It is currently exercised by an ad-hoc type translation roundtripping test that will be subsumed by the actual translation test when the type system transition is complete. Depends On D84339 Reviewed By: herhut Differential Revision: https://reviews.llvm.org/D85019 --- .../mlir/Target/LLVMIR/TypeTranslation.h | 36 ++ mlir/lib/Target/CMakeLists.txt | 1 + mlir/lib/Target/LLVMIR/TypeTranslation.cpp | 309 ++++++++++++++++++ mlir/test/Target/llvmir-types.mlir | 228 +++++++++++++ mlir/test/lib/CMakeLists.txt | 1 + mlir/test/lib/Target/CMakeLists.txt | 13 + .../lib/Target/TestLLVMTypeTranslation.cpp | 79 +++++ mlir/tools/mlir-translate/CMakeLists.txt | 4 + mlir/tools/mlir-translate/mlir-translate.cpp | 4 + 9 files changed, 675 insertions(+) create mode 100644 mlir/include/mlir/Target/LLVMIR/TypeTranslation.h create mode 100644 mlir/lib/Target/LLVMIR/TypeTranslation.cpp create mode 100644 mlir/test/Target/llvmir-types.mlir create mode 100644 mlir/test/lib/Target/CMakeLists.txt create mode 100644 mlir/test/lib/Target/TestLLVMTypeTranslation.cpp diff --git a/mlir/include/mlir/Target/LLVMIR/TypeTranslation.h b/mlir/include/mlir/Target/LLVMIR/TypeTranslation.h new file mode 100644 index 0000000000000..5a82f0a096dfc --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/TypeTranslation.h @@ -0,0 +1,36 @@ +//===- TypeTranslation.h - Translate types between MLIR & LLVM --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the type translation function going from MLIR LLVM dialect +// to LLVM IR and back. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_TYPETRANSLATION_H +#define MLIR_TARGET_LLVMIR_TYPETRANSLATION_H + +namespace llvm { +class LLVMContext; +class Type; +} // namespace llvm + +namespace mlir { + +class MLIRContext; + +namespace LLVM { + +class LLVMTypeNew; + +llvm::Type *translateTypeToLLVMIR(LLVMTypeNew type, llvm::LLVMContext &context); +LLVMTypeNew translateTypeFromLLVMIR(llvm::Type *type, MLIRContext &context); + +} // namespace LLVM +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_TYPETRANSLATION_H diff --git a/mlir/lib/Target/CMakeLists.txt b/mlir/lib/Target/CMakeLists.txt index 4a0af66a04b16..5ca335b4b4b50 100644 --- a/mlir/lib/Target/CMakeLists.txt +++ b/mlir/lib/Target/CMakeLists.txt @@ -1,6 +1,7 @@ add_mlir_translation_library(MLIRTargetLLVMIRModuleTranslation LLVMIR/DebugTranslation.cpp LLVMIR/ModuleTranslation.cpp + LLVMIR/TypeTranslation.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR diff --git a/mlir/lib/Target/LLVMIR/TypeTranslation.cpp b/mlir/lib/Target/LLVMIR/TypeTranslation.cpp new file mode 100644 index 0000000000000..6163334d3b4ef --- /dev/null +++ b/mlir/lib/Target/LLVMIR/TypeTranslation.cpp @@ -0,0 +1,309 @@ +//===- TypeTranslation.cpp - type translation between MLIR LLVM & LLVM IR -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVMIR/TypeTranslation.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/IR/MLIRContext.h" + +#include "llvm/ADT/TypeSwitch.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" + +using namespace mlir; + +namespace { +/// Support for translating MLIR LLVM dialect types to LLVM IR. +class TypeToLLVMIRTranslator { +public: + /// Constructs a class creating types in the given LLVM context. + TypeToLLVMIRTranslator(llvm::LLVMContext &context) : context(context) {} + + /// Translates a single type. + llvm::Type *translateType(LLVM::LLVMTypeNew type) { + // If the conversion is already known, just return it. + if (knownTranslations.count(type)) + return knownTranslations.lookup(type); + + // Dispatch to an appropriate function. + llvm::Type *translated = + llvm::TypeSwitch(type) + .Case([this](LLVM::LLVMVoidType) { + return llvm::Type::getVoidTy(context); + }) + .Case([this](LLVM::LLVMHalfType) { + return llvm::Type::getHalfTy(context); + }) + .Case([this](LLVM::LLVMBFloatType) { + return llvm::Type::getBFloatTy(context); + }) + .Case([this](LLVM::LLVMFloatType) { + return llvm::Type::getFloatTy(context); + }) + .Case([this](LLVM::LLVMDoubleType) { + return llvm::Type::getDoubleTy(context); + }) + .Case([this](LLVM::LLVMFP128Type) { + return llvm::Type::getFP128Ty(context); + }) + .Case([this](LLVM::LLVMX86FP80Type) { + return llvm::Type::getX86_FP80Ty(context); + }) + .Case([this](LLVM::LLVMPPCFP128Type) { + return llvm::Type::getPPC_FP128Ty(context); + }) + .Case([this](LLVM::LLVMX86MMXType) { + return llvm::Type::getX86_MMXTy(context); + }) + .Case([this](LLVM::LLVMTokenType) { + return llvm::Type::getTokenTy(context); + }) + .Case([this](LLVM::LLVMLabelType) { + return llvm::Type::getLabelTy(context); + }) + .Case([this](LLVM::LLVMMetadataType) { + return llvm::Type::getMetadataTy(context); + }) + .Case( + [this](auto array) { return translate(array); }) + .Default([](LLVM::LLVMTypeNew t) -> llvm::Type * { + llvm_unreachable("unknown LLVM dialect type"); + }); + + // Cache the result of the conversion and return. + knownTranslations.try_emplace(type, translated); + return translated; + } + +private: + /// Translates the given array type. + llvm::Type *translate(LLVM::LLVMArrayType type) { + return llvm::ArrayType::get(translateType(type.getElementType()), + type.getNumElements()); + } + + /// Translates the given function type. + llvm::Type *translate(LLVM::LLVMFunctionType type) { + SmallVector paramTypes; + translateTypes(type.getParams(), paramTypes); + return llvm::FunctionType::get(translateType(type.getReturnType()), + paramTypes, type.isVarArg()); + } + + /// Translates the given integer type. + llvm::Type *translate(LLVM::LLVMIntegerType type) { + return llvm::IntegerType::get(context, type.getBitWidth()); + } + + /// Translates the given pointer type. + llvm::Type *translate(LLVM::LLVMPointerType type) { + return llvm::PointerType::get(translateType(type.getElementType()), + type.getAddressSpace()); + } + + /// Translates the given structure type, supports both identified and literal + /// structs. This will _create_ a new identified structure every time, use + /// `convertType` if a structure with the same name must be looked up instead. + llvm::Type *translate(LLVM::LLVMStructType type) { + SmallVector subtypes; + if (!type.isIdentified()) { + translateTypes(type.getBody(), subtypes); + return llvm::StructType::get(context, subtypes, type.isPacked()); + } + + llvm::StructType *structType = + llvm::StructType::create(context, type.getName()); + // Mark the type we just created as known so that recursive calls can pick + // it up and use directly. + knownTranslations.try_emplace(type, structType); + if (type.isOpaque()) + return structType; + + translateTypes(type.getBody(), subtypes); + structType->setBody(subtypes, type.isPacked()); + return structType; + } + + /// Translates the given fixed-vector type. + llvm::Type *translate(LLVM::LLVMFixedVectorType type) { + return llvm::FixedVectorType::get(translateType(type.getElementType()), + type.getNumElements()); + } + + /// Translates the given scalable-vector type. + llvm::Type *translate(LLVM::LLVMScalableVectorType type) { + return llvm::ScalableVectorType::get(translateType(type.getElementType()), + type.getMinNumElements()); + } + + /// Translates a list of types. + void translateTypes(ArrayRef types, + SmallVectorImpl &result) { + result.reserve(result.size() + types.size()); + for (auto type : types) + result.push_back(translateType(type)); + } + + /// Reference to the context in which the LLVM IR types are created. + llvm::LLVMContext &context; + + /// Map of known translation. This serves a double purpose: caches translation + /// results to avoid repeated recursive calls and makes sure identified + /// structs with the same name (that is, equal) are resolved to an existing + /// type instead of creating a new type. + llvm::DenseMap knownTranslations; +}; +} // end namespace + +/// Translates a type from MLIR LLVM dialect to LLVM IR. This does not maintain +/// the mapping for identified structs so new structs will be created with +/// auto-renaming on each call. This is intended exclusively for testing. +llvm::Type *mlir::LLVM::translateTypeToLLVMIR(LLVM::LLVMTypeNew type, + llvm::LLVMContext &context) { + return TypeToLLVMIRTranslator(context).translateType(type); +} + +namespace { +/// Support for translating LLVM IR types to MLIR LLVM dialect types. +class TypeFromLLVMIRTranslator { +public: + /// Constructs a class creating types in the given MLIR context. + TypeFromLLVMIRTranslator(MLIRContext &context) : context(context) {} + + /// Translates the given type. + LLVM::LLVMTypeNew translateType(llvm::Type *type) { + if (knownTranslations.count(type)) + return knownTranslations.lookup(type); + + LLVM::LLVMTypeNew translated = + llvm::TypeSwitch(type) + .Case( + [this](auto *type) { return translate(type); }) + .Default([this](llvm::Type *type) { + return translatePrimitiveType(type); + }); + knownTranslations.try_emplace(type, translated); + return translated; + } + +private: + /// Translates the given primitive, i.e. non-parametric in MLIR nomenclature, + /// type. + LLVM::LLVMTypeNew translatePrimitiveType(llvm::Type *type) { + if (type->isVoidTy()) + return LLVM::LLVMVoidType::get(&context); + if (type->isHalfTy()) + return LLVM::LLVMHalfType::get(&context); + if (type->isBFloatTy()) + return LLVM::LLVMBFloatType::get(&context); + if (type->isFloatTy()) + return LLVM::LLVMFloatType::get(&context); + if (type->isDoubleTy()) + return LLVM::LLVMDoubleType::get(&context); + if (type->isFP128Ty()) + return LLVM::LLVMFP128Type::get(&context); + if (type->isX86_FP80Ty()) + return LLVM::LLVMX86FP80Type::get(&context); + if (type->isPPC_FP128Ty()) + return LLVM::LLVMPPCFP128Type::get(&context); + if (type->isX86_MMXTy()) + return LLVM::LLVMX86MMXType::get(&context); + if (type->isLabelTy()) + return LLVM::LLVMLabelType::get(&context); + if (type->isMetadataTy()) + return LLVM::LLVMMetadataType::get(&context); + llvm_unreachable("not a primitive type"); + } + + /// Translates the given array type. + LLVM::LLVMTypeNew translate(llvm::ArrayType *type) { + return LLVM::LLVMArrayType::get(translateType(type->getElementType()), + type->getNumElements()); + } + + /// Translates the given function type. + LLVM::LLVMTypeNew translate(llvm::FunctionType *type) { + SmallVector paramTypes; + translateTypes(type->params(), paramTypes); + return LLVM::LLVMFunctionType::get(translateType(type->getReturnType()), + paramTypes, type->isVarArg()); + } + + /// Translates the given integer type. + LLVM::LLVMTypeNew translate(llvm::IntegerType *type) { + return LLVM::LLVMIntegerType::get(&context, type->getBitWidth()); + } + + /// Translates the given pointer type. + LLVM::LLVMTypeNew translate(llvm::PointerType *type) { + return LLVM::LLVMPointerType::get(translateType(type->getElementType()), + type->getAddressSpace()); + } + + /// Translates the given structure type. + LLVM::LLVMTypeNew translate(llvm::StructType *type) { + SmallVector subtypes; + if (type->isLiteral()) { + translateTypes(type->subtypes(), subtypes); + return LLVM::LLVMStructType::getLiteral(&context, subtypes, + type->isPacked()); + } + + if (type->isOpaque()) + return LLVM::LLVMStructType::getOpaque(type->getName(), &context); + + LLVM::LLVMStructType translated = + LLVM::LLVMStructType::getIdentified(&context, type->getName()); + knownTranslations.try_emplace(type, translated); + translateTypes(type->subtypes(), subtypes); + LogicalResult bodySet = translated.setBody(subtypes, type->isPacked()); + assert(succeeded(bodySet) && + "could not set the body of an identified struct"); + (void)bodySet; + return translated; + } + + /// Translates the given fixed-vector type. + LLVM::LLVMTypeNew translate(llvm::FixedVectorType *type) { + return LLVM::LLVMFixedVectorType::get(translateType(type->getElementType()), + type->getNumElements()); + } + + /// Translates the given scalable-vector type. + LLVM::LLVMTypeNew translate(llvm::ScalableVectorType *type) { + return LLVM::LLVMScalableVectorType::get( + translateType(type->getElementType()), type->getMinNumElements()); + } + + /// Translates a list of types. + void translateTypes(ArrayRef types, + SmallVectorImpl &result) { + result.reserve(result.size() + types.size()); + for (llvm::Type *type : types) + result.push_back(translateType(type)); + } + + /// Map of known translations. Serves as a cache and as recursion stopper for + /// translating recursive structs. + llvm::DenseMap knownTranslations; + + /// The context in which MLIR types are created. + MLIRContext &context; +}; +} // end namespace + +/// Translates a type from LLVM IR to MLIR LLVM dialect. This is intended +/// exclusively for testing. +LLVM::LLVMTypeNew mlir::LLVM::translateTypeFromLLVMIR(llvm::Type *type, + MLIRContext &context) { + return TypeFromLLVMIRTranslator(context).translateType(type); +} diff --git a/mlir/test/Target/llvmir-types.mlir b/mlir/test/Target/llvmir-types.mlir new file mode 100644 index 0000000000000..d807562d1a2d0 --- /dev/null +++ b/mlir/test/Target/llvmir-types.mlir @@ -0,0 +1,228 @@ +// RUN: mlir-translate -test-mlir-to-llvmir -split-input-file %s | FileCheck %s + +llvm.func @primitives() { + // CHECK: declare void @return_void() + // CHECK: declare void @return_void_round() + "llvm.test_introduce_func"() { name = "return_void", type = !llvm2.void } : () -> () + // CHECK: declare half @return_half() + // CHECK: declare half @return_half_round() + "llvm.test_introduce_func"() { name = "return_half", type = !llvm2.half } : () -> () + // CHECK: declare bfloat @return_bfloat() + // CHECK: declare bfloat @return_bfloat_round() + "llvm.test_introduce_func"() { name = "return_bfloat", type = !llvm2.bfloat } : () -> () + // CHECK: declare float @return_float() + // CHECK: declare float @return_float_round() + "llvm.test_introduce_func"() { name = "return_float", type = !llvm2.float } : () -> () + // CHECK: declare double @return_double() + // CHECK: declare double @return_double_round() + "llvm.test_introduce_func"() { name = "return_double", type = !llvm2.double } : () -> () + // CHECK: declare fp128 @return_fp128() + // CHECK: declare fp128 @return_fp128_round() + "llvm.test_introduce_func"() { name = "return_fp128", type = !llvm2.fp128 } : () -> () + // CHECK: declare x86_fp80 @return_x86_fp80() + // CHECK: declare x86_fp80 @return_x86_fp80_round() + "llvm.test_introduce_func"() { name = "return_x86_fp80", type = !llvm2.x86_fp80 } : () -> () + // CHECK: declare ppc_fp128 @return_ppc_fp128() + // CHECK: declare ppc_fp128 @return_ppc_fp128_round() + "llvm.test_introduce_func"() { name = "return_ppc_fp128", type = !llvm2.ppc_fp128 } : () -> () + // CHECK: declare x86_mmx @return_x86_mmx() + // CHECK: declare x86_mmx @return_x86_mmx_round() + "llvm.test_introduce_func"() { name = "return_x86_mmx", type = !llvm2.x86_mmx } : () -> () + llvm.return +} + +llvm.func @funcs() { + // CHECK: declare void @f_void_i32(i32) + // CHECK: declare void @f_void_i32_round(i32) + "llvm.test_introduce_func"() { name ="f_void_i32", type = !llvm2.func } : () -> () + // CHECK: declare i32 @f_i32_empty() + // CHECK: declare i32 @f_i32_empty_round() + "llvm.test_introduce_func"() { name ="f_i32_empty", type = !llvm2.func } : () -> () + // CHECK: declare i32 @f_i32_half_bfloat_float_double(half, bfloat, float, double) + // CHECK: declare i32 @f_i32_half_bfloat_float_double_round(half, bfloat, float, double) + "llvm.test_introduce_func"() { name ="f_i32_half_bfloat_float_double", type = !llvm2.func } : () -> () + // CHECK: declare i32 @f_i32_i32_i32(i32, i32) + // CHECK: declare i32 @f_i32_i32_i32_round(i32, i32) + "llvm.test_introduce_func"() { name ="f_i32_i32_i32", type = !llvm2.func } : () -> () + // CHECK: declare void @f_void_variadic(...) + // CHECK: declare void @f_void_variadic_round(...) + "llvm.test_introduce_func"() { name ="f_void_variadic", type = !llvm2.func } : () -> () + // CHECK: declare void @f_void_i32_i32_variadic(i32, i32, ...) + // CHECK: declare void @f_void_i32_i32_variadic_round(i32, i32, ...) + "llvm.test_introduce_func"() { name ="f_void_i32_i32_variadic", type = !llvm2.func } : () -> () + llvm.return +} + +llvm.func @ints() { + // CHECK: declare i1 @return_i1() + // CHECK: declare i1 @return_i1_round() + "llvm.test_introduce_func"() { name = "return_i1", type = !llvm2.i1 } : () -> () + // CHECK: declare i8 @return_i8() + // CHECK: declare i8 @return_i8_round() + "llvm.test_introduce_func"() { name = "return_i8", type = !llvm2.i8 } : () -> () + // CHECK: declare i16 @return_i16() + // CHECK: declare i16 @return_i16_round() + "llvm.test_introduce_func"() { name = "return_i16", type = !llvm2.i16 } : () -> () + // CHECK: declare i32 @return_i32() + // CHECK: declare i32 @return_i32_round() + "llvm.test_introduce_func"() { name = "return_i32", type = !llvm2.i32 } : () -> () + // CHECK: declare i64 @return_i64() + // CHECK: declare i64 @return_i64_round() + "llvm.test_introduce_func"() { name = "return_i64", type = !llvm2.i64 } : () -> () + // CHECK: declare i57 @return_i57() + // CHECK: declare i57 @return_i57_round() + "llvm.test_introduce_func"() { name = "return_i57", type = !llvm2.i57 } : () -> () + // CHECK: declare i129 @return_i129() + // CHECK: declare i129 @return_i129_round() + "llvm.test_introduce_func"() { name = "return_i129", type = !llvm2.i129 } : () -> () + llvm.return +} + +llvm.func @pointers() { + // CHECK: declare i8* @return_pi8() + // CHECK: declare i8* @return_pi8_round() + "llvm.test_introduce_func"() { name = "return_pi8", type = !llvm2.ptr } : () -> () + // CHECK: declare float* @return_pfloat() + // CHECK: declare float* @return_pfloat_round() + "llvm.test_introduce_func"() { name = "return_pfloat", type = !llvm2.ptr } : () -> () + // CHECK: declare i8** @return_ppi8() + // CHECK: declare i8** @return_ppi8_round() + "llvm.test_introduce_func"() { name = "return_ppi8", type = !llvm2.ptr> } : () -> () + // CHECK: declare i8***** @return_pppppi8() + // CHECK: declare i8***** @return_pppppi8_round() + "llvm.test_introduce_func"() { name = "return_pppppi8", type = !llvm2.ptr>>>> } : () -> () + // CHECK: declare i8* @return_pi8_0() + // CHECK: declare i8* @return_pi8_0_round() + "llvm.test_introduce_func"() { name = "return_pi8_0", type = !llvm2.ptr } : () -> () + // CHECK: declare i8 addrspace(1)* @return_pi8_1() + // CHECK: declare i8 addrspace(1)* @return_pi8_1_round() + "llvm.test_introduce_func"() { name = "return_pi8_1", type = !llvm2.ptr } : () -> () + // CHECK: declare i8 addrspace(42)* @return_pi8_42() + // CHECK: declare i8 addrspace(42)* @return_pi8_42_round() + "llvm.test_introduce_func"() { name = "return_pi8_42", type = !llvm2.ptr } : () -> () + // CHECK: declare i8 addrspace(42)* addrspace(9)* @return_ppi8_42_9() + // CHECK: declare i8 addrspace(42)* addrspace(9)* @return_ppi8_42_9_round() + "llvm.test_introduce_func"() { name = "return_ppi8_42_9", type = !llvm2.ptr, 9> } : () -> () + llvm.return +} + +llvm.func @vectors() { + // CHECK: declare <4 x i32> @return_v4_i32() + // CHECK: declare <4 x i32> @return_v4_i32_round() + "llvm.test_introduce_func"() { name = "return_v4_i32", type = !llvm2.vec<4 x i32> } : () -> () + // CHECK: declare <4 x float> @return_v4_float() + // CHECK: declare <4 x float> @return_v4_float_round() + "llvm.test_introduce_func"() { name = "return_v4_float", type = !llvm2.vec<4 x float> } : () -> () + // CHECK: declare @return_vs_4_i32() + // CHECK: declare @return_vs_4_i32_round() + "llvm.test_introduce_func"() { name = "return_vs_4_i32", type = !llvm2.vec } : () -> () + // CHECK: declare @return_vs_8_half() + // CHECK: declare @return_vs_8_half_round() + "llvm.test_introduce_func"() { name = "return_vs_8_half", type = !llvm2.vec } : () -> () + // CHECK: declare <4 x i8*> @return_v_4_pi8() + // CHECK: declare <4 x i8*> @return_v_4_pi8_round() + "llvm.test_introduce_func"() { name = "return_v_4_pi8", type = !llvm2.vec<4 x ptr> } : () -> () + llvm.return +} + +llvm.func @arrays() { + // CHECK: declare [10 x i32] @return_a10_i32() + // CHECK: declare [10 x i32] @return_a10_i32_round() + "llvm.test_introduce_func"() { name = "return_a10_i32", type = !llvm2.array<10 x i32> } : () -> () + // CHECK: declare [8 x float] @return_a8_float() + // CHECK: declare [8 x float] @return_a8_float_round() + "llvm.test_introduce_func"() { name = "return_a8_float", type = !llvm2.array<8 x float> } : () -> () + // CHECK: declare [10 x i32 addrspace(4)*] @return_a10_pi32_4() + // CHECK: declare [10 x i32 addrspace(4)*] @return_a10_pi32_4_round() + "llvm.test_introduce_func"() { name = "return_a10_pi32_4", type = !llvm2.array<10 x ptr> } : () -> () + // CHECK: declare [10 x [4 x float]] @return_a10_a4_float() + // CHECK: declare [10 x [4 x float]] @return_a10_a4_float_round() + "llvm.test_introduce_func"() { name = "return_a10_a4_float", type = !llvm2.array<10 x array<4 x float>> } : () -> () + llvm.return +} + +llvm.func @literal_structs() { + // CHECK: declare {} @return_struct_empty() + // CHECK: declare {} @return_struct_empty_round() + "llvm.test_introduce_func"() { name = "return_struct_empty", type = !llvm2.struct<()> } : () -> () + // CHECK: declare { i32 } @return_s_i32() + // CHECK: declare { i32 } @return_s_i32_round() + "llvm.test_introduce_func"() { name = "return_s_i32", type = !llvm2.struct<(i32)> } : () -> () + // CHECK: declare { float, i32 } @return_s_float_i32() + // CHECK: declare { float, i32 } @return_s_float_i32_round() + "llvm.test_introduce_func"() { name = "return_s_float_i32", type = !llvm2.struct<(float, i32)> } : () -> () + // CHECK: declare { { i32 } } @return_s_s_i32() + // CHECK: declare { { i32 } } @return_s_s_i32_round() + "llvm.test_introduce_func"() { name = "return_s_s_i32", type = !llvm2.struct<(struct<(i32)>)> } : () -> () + // CHECK: declare { i32, { i32 }, float } @return_s_i32_s_i32_float() + // CHECK: declare { i32, { i32 }, float } @return_s_i32_s_i32_float_round() + "llvm.test_introduce_func"() { name = "return_s_i32_s_i32_float", type = !llvm2.struct<(i32, struct<(i32)>, float)> } : () -> () + + // CHECK: declare <{}> @return_sp_empty() + // CHECK: declare <{}> @return_sp_empty_round() + "llvm.test_introduce_func"() { name = "return_sp_empty", type = !llvm2.struct } : () -> () + // CHECK: declare <{ i32 }> @return_sp_i32() + // CHECK: declare <{ i32 }> @return_sp_i32_round() + "llvm.test_introduce_func"() { name = "return_sp_i32", type = !llvm2.struct } : () -> () + // CHECK: declare <{ float, i32 }> @return_sp_float_i32() + // CHECK: declare <{ float, i32 }> @return_sp_float_i32_round() + "llvm.test_introduce_func"() { name = "return_sp_float_i32", type = !llvm2.struct } : () -> () + // CHECK: declare <{ i32, { i32, i1 }, float }> @return_sp_i32_s_i31_1_float() + // CHECK: declare <{ i32, { i32, i1 }, float }> @return_sp_i32_s_i31_1_float_round() + "llvm.test_introduce_func"() { name = "return_sp_i32_s_i31_1_float", type = !llvm2.struct, float)> } : () -> () + + // CHECK: declare { <{ i32 }> } @return_s_sp_i32() + // CHECK: declare { <{ i32 }> } @return_s_sp_i32_round() + "llvm.test_introduce_func"() { name = "return_s_sp_i32", type = !llvm2.struct<(struct)> } : () -> () + // CHECK: declare <{ { i32 } }> @return_sp_s_i32() + // CHECK: declare <{ { i32 } }> @return_sp_s_i32_round() + "llvm.test_introduce_func"() { name = "return_sp_s_i32", type = !llvm2.struct)> } : () -> () + llvm.return +} + +// ----- +// Put structs into a separate split so that we can match their declarations +// locally. + +// CHECK: %empty = type {} +// CHECK: %opaque = type opaque +// CHECK: %long = type { i32, { i32, i1 }, float, void ()* } +// CHECK: %self-recursive = type { %self-recursive* } +// CHECK: %unpacked = type { i32 } +// CHECK: %packed = type <{ i32 }> +// CHECK: %"name with spaces and !^$@$#" = type <{ i32 }> +// CHECK: %mutually-a = type { %mutually-b* } +// CHECK: %mutually-b = type { %mutually-a addrspace(3)* } +// CHECK: %struct-of-arrays = type { [10 x i32] } +// CHECK: %array-of-structs = type { i32 } +// CHECK: %ptr-to-struct = type { i8 } + +llvm.func @identified_structs() { + // CHECK: declare %empty + "llvm.test_introduce_func"() { name = "return_s_empty", type = !llvm2.struct<"empty", ()> } : () -> () + // CHECK: declare %opaque + "llvm.test_introduce_func"() { name = "return_s_opaque", type = !llvm2.struct<"opaque", opaque> } : () -> () + // CHECK: declare %long + "llvm.test_introduce_func"() { name = "return_s_long", type = !llvm2.struct<"long", (i32, struct<(i32, i1)>, float, ptr>)> } : () -> () + // CHECK: declare %self-recursive + "llvm.test_introduce_func"() { name = "return_s_self_recurisve", type = !llvm2.struct<"self-recursive", (ptr>)> } : () -> () + // CHECK: declare %unpacked + "llvm.test_introduce_func"() { name = "return_s_unpacked", type = !llvm2.struct<"unpacked", (i32)> } : () -> () + // CHECK: declare %packed + "llvm.test_introduce_func"() { name = "return_s_packed", type = !llvm2.struct<"packed", packed (i32)> } : () -> () + // CHECK: declare %"name with spaces and !^$@$#" + "llvm.test_introduce_func"() { name = "return_s_symbols", type = !llvm2.struct<"name with spaces and !^$@$#", packed (i32)> } : () -> () + + // CHECK: declare %mutually-a + "llvm.test_introduce_func"() { name = "return_s_mutually_a", type = !llvm2.struct<"mutually-a", (ptr, 3>)>>)> } : () -> () + // CHECK: declare %mutually-b + "llvm.test_introduce_func"() { name = "return_s_mutually_b", type = !llvm2.struct<"mutually-b", (ptr>)>, 3>)> } : () -> () + + // CHECK: declare %struct-of-arrays + "llvm.test_introduce_func"() { name = "return_s_struct_of_arrays", type = !llvm2.struct<"struct-of-arrays", (array<10 x i32>)> } : () -> () + // CHECK: declare [10 x %array-of-structs] + "llvm.test_introduce_func"() { name = "return_s_array_of_structs", type = !llvm2.array<10 x struct<"array-of-structs", (i32)>> } : () -> () + // CHECK: declare %ptr-to-struct* + "llvm.test_introduce_func"() { name = "return_s_ptr_to_struct", type = !llvm2.ptr> } : () -> () + llvm.return +} diff --git a/mlir/test/lib/CMakeLists.txt b/mlir/test/lib/CMakeLists.txt index 0df357c8c355e..ec9e5cd998018 100644 --- a/mlir/test/lib/CMakeLists.txt +++ b/mlir/test/lib/CMakeLists.txt @@ -2,4 +2,5 @@ add_subdirectory(Dialect) add_subdirectory(IR) add_subdirectory(Pass) add_subdirectory(Reducer) +add_subdirectory(Target) add_subdirectory(Transforms) diff --git a/mlir/test/lib/Target/CMakeLists.txt b/mlir/test/lib/Target/CMakeLists.txt new file mode 100644 index 0000000000000..cb8f206469aec --- /dev/null +++ b/mlir/test/lib/Target/CMakeLists.txt @@ -0,0 +1,13 @@ +add_mlir_translation_library(MLIRTestLLVMTypeTranslation + TestLLVMTypeTranslation.cpp + + LINK_COMPONENTS + Core + TransformUtils + + LINK_LIBS PUBLIC + MLIRLLVMIR + MLIRTargetLLVMIRModuleTranslation + MLIRTestIR + MLIRTranslation + ) diff --git a/mlir/test/lib/Target/TestLLVMTypeTranslation.cpp b/mlir/test/lib/Target/TestLLVMTypeTranslation.cpp new file mode 100644 index 0000000000000..b76ac2a13344b --- /dev/null +++ b/mlir/test/lib/Target/TestLLVMTypeTranslation.cpp @@ -0,0 +1,79 @@ +//===- TestLLVMTypeTranslation.cpp - Test MLIR/LLVM IR type translation ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/Target/LLVMIR/ModuleTranslation.h" +#include "mlir/Target/LLVMIR/TypeTranslation.h" +#include "mlir/Translation.h" + +using namespace mlir; + +namespace { +class TestLLVMTypeTranslation : public LLVM::ModuleTranslation { + // Allow access to the constructors under MSVC. + friend LLVM::ModuleTranslation; + +public: + using LLVM::ModuleTranslation::ModuleTranslation; + +protected: + /// Simple test facility for translating types from MLIR LLVM dialect to LLVM + /// IR. This converts the "llvm.test_introduce_func" operation into an LLVM IR + /// function with the name extracted from the `name` attribute that returns + /// the type contained in the `type` attribute if it is a non-function type or + /// that has the signature obtained by converting `type` if it is a function + /// type. This is a temporary check before type translation is substituted + /// into the main translation flow and exercised here. + LogicalResult convertOperation(Operation &op, + llvm::IRBuilder<> &builder) override { + if (op.getName().getStringRef() == "llvm.test_introduce_func") { + auto attr = op.getAttrOfType("type"); + assert(attr && "expected 'type' attribute"); + auto type = attr.getValue().cast(); + + auto nameAttr = op.getAttrOfType("name"); + assert(nameAttr && "expected 'name' attributes"); + + llvm::Type *translated = + LLVM::translateTypeToLLVMIR(type, builder.getContext()); + + llvm::Module *module = builder.GetInsertBlock()->getModule(); + if (auto *funcType = dyn_cast(translated)) + module->getOrInsertFunction(nameAttr.getValue(), funcType); + else + module->getOrInsertFunction(nameAttr.getValue(), translated); + + std::string roundtripName = (Twine(nameAttr.getValue()) + "_round").str(); + LLVM::LLVMTypeNew translatedBack = + LLVM::translateTypeFromLLVMIR(translated, *op.getContext()); + llvm::Type *translatedBackAndForth = + LLVM::translateTypeToLLVMIR(translatedBack, builder.getContext()); + if (auto *funcType = dyn_cast(translatedBackAndForth)) + module->getOrInsertFunction(roundtripName, funcType); + else + module->getOrInsertFunction(roundtripName, translatedBackAndForth); + return success(); + } + + return LLVM::ModuleTranslation::convertOperation(op, builder); + } +}; +} // namespace + +namespace mlir { +void registerTestLLVMTypeTranslation() { + TranslateFromMLIRRegistration reg( + "test-mlir-to-llvmir", [](ModuleOp module, raw_ostream &output) { + std::unique_ptr llvmModule = + LLVM::ModuleTranslation::translateModule( + module.getOperation()); + llvmModule->print(output, nullptr); + return success(); + }); +} +} // namespace mlir diff --git a/mlir/tools/mlir-translate/CMakeLists.txt b/mlir/tools/mlir-translate/CMakeLists.txt index 897e7adc03bd1..1e6cdfe0f3b14 100644 --- a/mlir/tools/mlir-translate/CMakeLists.txt +++ b/mlir/tools/mlir-translate/CMakeLists.txt @@ -13,7 +13,11 @@ target_link_libraries(mlir-translate PRIVATE ${dialect_libs} ${translation_libs} + ${test_libs} MLIRIR + # TODO: remove after LLVM dialect transition is complete; translation uses a + # registration function defined in this library unconditionally. + MLIRLLVMTypeTestDialect MLIRParser MLIRPass MLIRSPIRV diff --git a/mlir/tools/mlir-translate/mlir-translate.cpp b/mlir/tools/mlir-translate/mlir-translate.cpp index 914bd340b3f56..70bf285112a4b 100644 --- a/mlir/tools/mlir-translate/mlir-translate.cpp +++ b/mlir/tools/mlir-translate/mlir-translate.cpp @@ -49,17 +49,21 @@ static llvm::cl::opt verifyDiagnostics( namespace mlir { // Defined in the test directory, no public header. +void registerLLVMTypeTestDialect(); +void registerTestLLVMTypeTranslation(); void registerTestRoundtripSPIRV(); void registerTestRoundtripDebugSPIRV(); } // namespace mlir static void registerTestTranslations() { + registerTestLLVMTypeTranslation(); registerTestRoundtripSPIRV(); registerTestRoundtripDebugSPIRV(); } int main(int argc, char **argv) { registerAllDialects(); + registerLLVMTypeTestDialect(); registerAllTranslations(); registerTestTranslations(); llvm::InitLLVM y(argc, argv); From 6abd7e2e622bc7eabdb673a7815f6673523a1e94 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 4 Aug 2020 11:37:31 +0200 Subject: [PATCH 299/600] [mlir] provide same APIs as existing LLVMType in the new LLVM type modeling These are intended to smoothen the transition and may be removed in the future in favor of more MLIR-compatible APIs. They intentionally have the same semantics as the existing functions, which must remain stable until the transition is complete. Depends On D85019 Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D85020 --- mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h | 155 ++++++++++++++ mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 209 +++++++++++++++++++ 2 files changed, 364 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h index 6764f9815c3fb..e409d6880283f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -26,6 +26,8 @@ class DialectAsmParser; class DialectAsmPrinter; namespace LLVM { +class LLVMDialect; + namespace detail { struct LLVMFunctionTypeStorage; struct LLVMIntegerTypeStorage; @@ -34,6 +36,12 @@ struct LLVMStructTypeStorage; struct LLVMTypeAndSizeStorage; } // namespace detail +class LLVMBFloatType; +class LLVMHalfType; +class LLVMFloatType; +class LLVMDoubleType; +class LLVMIntegerType; + //===----------------------------------------------------------------------===// // LLVMTypeNew. //===----------------------------------------------------------------------===// @@ -96,6 +104,150 @@ class LLVMTypeNew : public Type { static bool kindof(unsigned kind) { return FIRST_NEW_LLVM_TYPE <= kind && kind <= LAST_NEW_LLVM_TYPE; } + + LLVMDialect &getDialect(); + + /// Floating-point type utilities. + bool isBFloatTy() { return isa(); } + bool isHalfTy() { return isa(); } + bool isFloatTy() { return isa(); } + bool isDoubleTy() { return isa(); } + bool isFloatingPointTy() { + return isa() || isa() || + isa() || isa(); + } + + /// Array type utilities. + LLVMTypeNew getArrayElementType(); + unsigned getArrayNumElements(); + bool isArrayTy(); + + /// Integer type utilities. + bool isIntegerTy() { return isa(); } + bool isIntegerTy(unsigned bitwidth); + unsigned getIntegerBitWidth(); + + /// Vector type utilities. + LLVMTypeNew getVectorElementType(); + unsigned getVectorNumElements(); + llvm::ElementCount getVectorElementCount(); + bool isVectorTy(); + + /// Function type utilities. + LLVMTypeNew getFunctionParamType(unsigned argIdx); + unsigned getFunctionNumParams(); + LLVMTypeNew getFunctionResultType(); + bool isFunctionTy(); + bool isFunctionVarArg(); + + /// Pointer type utilities. + LLVMTypeNew getPointerTo(unsigned addrSpace = 0); + LLVMTypeNew getPointerElementTy(); + bool isPointerTy(); + static bool isValidPointerElementType(LLVMTypeNew type); + + /// Struct type utilities. + LLVMTypeNew getStructElementType(unsigned i); + unsigned getStructNumElements(); + bool isStructTy(); + + /// Utilities used to generate floating point types. + static LLVMTypeNew getDoubleTy(LLVMDialect *dialect); + static LLVMTypeNew getFloatTy(LLVMDialect *dialect); + static LLVMTypeNew getBFloatTy(LLVMDialect *dialect); + static LLVMTypeNew getHalfTy(LLVMDialect *dialect); + static LLVMTypeNew getFP128Ty(LLVMDialect *dialect); + static LLVMTypeNew getX86_FP80Ty(LLVMDialect *dialect); + + /// Utilities used to generate integer types. + static LLVMTypeNew getIntNTy(LLVMDialect *dialect, unsigned numBits); + static LLVMTypeNew getInt1Ty(LLVMDialect *dialect) { + return getIntNTy(dialect, /*numBits=*/1); + } + static LLVMTypeNew getInt8Ty(LLVMDialect *dialect) { + return getIntNTy(dialect, /*numBits=*/8); + } + static LLVMTypeNew getInt8PtrTy(LLVMDialect *dialect) { + return getInt8Ty(dialect).getPointerTo(); + } + static LLVMTypeNew getInt16Ty(LLVMDialect *dialect) { + return getIntNTy(dialect, /*numBits=*/16); + } + static LLVMTypeNew getInt32Ty(LLVMDialect *dialect) { + return getIntNTy(dialect, /*numBits=*/32); + } + static LLVMTypeNew getInt64Ty(LLVMDialect *dialect) { + return getIntNTy(dialect, /*numBits=*/64); + } + + /// Utilities used to generate other miscellaneous types. + static LLVMTypeNew getArrayTy(LLVMTypeNew elementType, uint64_t numElements); + static LLVMTypeNew getFunctionTy(LLVMTypeNew result, + ArrayRef params, bool isVarArg); + static LLVMTypeNew getFunctionTy(LLVMTypeNew result, bool isVarArg) { + return getFunctionTy(result, llvm::None, isVarArg); + } + static LLVMTypeNew getStructTy(LLVMDialect *dialect, + ArrayRef elements, + bool isPacked = false); + static LLVMTypeNew getStructTy(LLVMDialect *dialect, bool isPacked = false) { + return getStructTy(dialect, llvm::None, isPacked); + } + template + static typename std::enable_if::value, + LLVMTypeNew>::type + getStructTy(LLVMTypeNew elt1, Args... elts) { + SmallVector fields({elt1, elts...}); + return getStructTy(&elt1.getDialect(), fields); + } + static LLVMTypeNew getVectorTy(LLVMTypeNew elementType, unsigned numElements); + + /// Void type utilities. + static LLVMTypeNew getVoidTy(LLVMDialect *dialect); + bool isVoidTy(); + + // Creation and setting of LLVM's identified struct types + static LLVMTypeNew createStructTy(LLVMDialect *dialect, + ArrayRef elements, + Optional name, + bool isPacked = false); + + static LLVMTypeNew createStructTy(LLVMDialect *dialect, + Optional name) { + return createStructTy(dialect, llvm::None, name); + } + + static LLVMTypeNew createStructTy(ArrayRef elements, + Optional name, + bool isPacked = false) { + assert(!elements.empty() && + "This method may not be invoked with an empty list"); + LLVMTypeNew ele0 = elements.front(); + return createStructTy(&ele0.getDialect(), elements, name, isPacked); + } + + template + static + typename std::enable_if_t::value, + LLVMTypeNew> + createStructTy(StringRef name, LLVMTypeNew elt1, Args... elts) { + SmallVector fields({elt1, elts...}); + Optional opt_name(name); + return createStructTy(&elt1.getDialect(), fields, opt_name); + } + + static LLVMTypeNew setStructTyBody(LLVMTypeNew structType, + ArrayRef elements, + bool isPacked = false); + + template + static + typename std::enable_if_t::value, + LLVMTypeNew> + setStructTyBody(LLVMTypeNew structType, LLVMTypeNew elt1, Args... elts) { + SmallVector fields({elt1, elts...}); + return setStructTyBody(structType, fields); + } }; //===----------------------------------------------------------------------===// @@ -323,6 +475,9 @@ class LLVMStructType : public Type::TypeBasegetRegisteredDialect(); +} + +//----------------------------------------------------------------------------// +// Integer type utilities. + +bool LLVMTypeNew::isIntegerTy(unsigned bitwidth) { + if (auto intType = dyn_cast()) + return intType.getBitWidth() == bitwidth; + return false; +} + +unsigned LLVMTypeNew::getIntegerBitWidth() { + return cast().getBitWidth(); +} + +LLVMTypeNew LLVMTypeNew::getArrayElementType() { + return cast().getElementType(); +} + +//----------------------------------------------------------------------------// +// Array type utilities. + +unsigned LLVMTypeNew::getArrayNumElements() { + return cast().getNumElements(); +} + +bool LLVMTypeNew::isArrayTy() { return isa(); } + +//----------------------------------------------------------------------------// +// Vector type utilities. + +LLVMTypeNew LLVMTypeNew::getVectorElementType() { + return cast().getElementType(); +} + +unsigned LLVMTypeNew::getVectorNumElements() { + return cast().getNumElements(); +} +llvm::ElementCount LLVMTypeNew::getVectorElementCount() { + return cast().getElementCount(); +} + +bool LLVMTypeNew::isVectorTy() { return isa(); } + +//----------------------------------------------------------------------------// +// Function type utilities. + +LLVMTypeNew LLVMTypeNew::getFunctionParamType(unsigned argIdx) { + return cast().getParamType(argIdx); +} + +unsigned LLVMTypeNew::getFunctionNumParams() { + return cast().getNumParams(); +} + +LLVMTypeNew LLVMTypeNew::getFunctionResultType() { + return cast().getReturnType(); +} + +bool LLVMTypeNew::isFunctionTy() { return isa(); } + +bool LLVMTypeNew::isFunctionVarArg() { + return cast().isVarArg(); +} + +//----------------------------------------------------------------------------// +// Pointer type utilities. + +LLVMTypeNew LLVMTypeNew::getPointerTo(unsigned addrSpace) { + return LLVMPointerType::get(*this, addrSpace); +} + +LLVMTypeNew LLVMTypeNew::getPointerElementTy() { + return cast().getElementType(); +} + +bool LLVMTypeNew::isPointerTy() { return isa(); } + +bool LLVMTypeNew::isValidPointerElementType(LLVMTypeNew type) { + return !type.isa() && !type.isa() && + !type.isa() && !type.isa(); +} + +//----------------------------------------------------------------------------// +// Struct type utilities. + +LLVMTypeNew LLVMTypeNew::getStructElementType(unsigned i) { + return cast().getBody()[i]; +} + +unsigned LLVMTypeNew::getStructNumElements() { + return cast().getBody().size(); +} + +bool LLVMTypeNew::isStructTy() { return isa(); } + +//----------------------------------------------------------------------------// +// Utilities used to generate floating point types. + +LLVMTypeNew LLVMTypeNew::getDoubleTy(LLVMDialect *dialect) { + return LLVMDoubleType::get(dialect->getContext()); +} + +LLVMTypeNew LLVMTypeNew::getFloatTy(LLVMDialect *dialect) { + return LLVMFloatType::get(dialect->getContext()); +} + +LLVMTypeNew LLVMTypeNew::getBFloatTy(LLVMDialect *dialect) { + return LLVMBFloatType::get(dialect->getContext()); +} + +LLVMTypeNew LLVMTypeNew::getHalfTy(LLVMDialect *dialect) { + return LLVMHalfType::get(dialect->getContext()); +} + +LLVMTypeNew LLVMTypeNew::getFP128Ty(LLVMDialect *dialect) { + return LLVMFP128Type::get(dialect->getContext()); +} + +LLVMTypeNew LLVMTypeNew::getX86_FP80Ty(LLVMDialect *dialect) { + return LLVMX86FP80Type::get(dialect->getContext()); +} + +//----------------------------------------------------------------------------// +// Utilities used to generate integer types. + +LLVMTypeNew LLVMTypeNew::getIntNTy(LLVMDialect *dialect, unsigned numBits) { + return LLVMIntegerType::get(dialect->getContext(), numBits); +} + +//----------------------------------------------------------------------------// +// Utilities used to generate other miscellaneous types. + +LLVMTypeNew LLVMTypeNew::getArrayTy(LLVMTypeNew elementType, + uint64_t numElements) { + return LLVMArrayType::get(elementType, numElements); +} + +LLVMTypeNew LLVMTypeNew::getFunctionTy(LLVMTypeNew result, + ArrayRef params, + bool isVarArg) { + return LLVMFunctionType::get(result, params, isVarArg); +} + +LLVMTypeNew LLVMTypeNew::getStructTy(LLVMDialect *dialect, + ArrayRef elements, + bool isPacked) { + return LLVMStructType::getLiteral(dialect->getContext(), elements, isPacked); +} + +LLVMTypeNew LLVMTypeNew::getVectorTy(LLVMTypeNew elementType, + unsigned numElements) { + return LLVMFixedVectorType::get(elementType, numElements); +} + +//----------------------------------------------------------------------------// +// Void type utilities. + +LLVMTypeNew LLVMTypeNew::getVoidTy(LLVMDialect *dialect) { + return LLVMVoidType::get(dialect->getContext()); +} + +bool LLVMTypeNew::isVoidTy() { return isa(); } + +//----------------------------------------------------------------------------// +// Creation and setting of LLVM's identified struct types + +LLVMTypeNew LLVMTypeNew::createStructTy(LLVMDialect *dialect, + ArrayRef elements, + Optional name, + bool isPacked) { + assert(name.hasValue() && + "identified structs with no identifier not supported"); + StringRef stringNameBase = name.getValueOr(""); + std::string stringName = stringNameBase.str(); + unsigned counter = 0; + do { + auto type = + LLVMStructType::getIdentified(dialect->getContext(), stringName); + if (type.isInitialized() || failed(type.setBody(elements, isPacked))) { + counter += 1; + stringName = + (Twine(stringNameBase) + "." + std::to_string(counter)).str(); + continue; + } + return type; + } while (true); +} + +LLVMTypeNew LLVMTypeNew::setStructTyBody(LLVMTypeNew structType, + ArrayRef elements, + bool isPacked) { + LogicalResult couldSet = + structType.cast().setBody(elements, isPacked); + assert(succeeded(couldSet) && "failed to set the body"); + (void)couldSet; + return structType; +} + //===----------------------------------------------------------------------===// // Array type. @@ -117,6 +325,7 @@ bool LLVMStructType::isIdentified() { return getImpl()->isIdentified(); } bool LLVMStructType::isOpaque() { return getImpl()->isOpaque() || !getImpl()->isInitialized(); } +bool LLVMStructType::isInitialized() { return getImpl()->isInitialized(); } StringRef LLVMStructType::getName() { return getImpl()->getIdentifier(); } ArrayRef LLVMStructType::getBody() { return isIdentified() ? getImpl()->getIdentifiedStructBody() From 20c71e55aad5bf6008c7f5ed63c90ed98907fa99 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 3 Aug 2020 15:31:24 -0400 Subject: [PATCH 300/600] [InstSimplify] reduce code for min/max analysis; NFC This should probably be moved up to some common area eventually when there's another user. --- llvm/lib/Analysis/InstructionSimplify.cpp | 53 ++++++++++------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index f827f0230a3e4..2119ddcc7649b 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5208,6 +5208,16 @@ static Intrinsic::ID getMaxMinOpposite(Intrinsic::ID ID) { } } +static APInt getMaxMinLimit(Intrinsic::ID ID, unsigned BitWidth) { + switch (ID) { + case Intrinsic::smax: return APInt::getSignedMaxValue(BitWidth); + case Intrinsic::smin: return APInt::getSignedMinValue(BitWidth); + case Intrinsic::umax: return APInt::getMaxValue(BitWidth); + case Intrinsic::umin: return APInt::getMinValue(BitWidth); + default: llvm_unreachable("Unexpected intrinsic"); + } +} + static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, const SimplifyQuery &Q) { Intrinsic::ID IID = F->getIntrinsicID(); @@ -5238,16 +5248,8 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, std::swap(Op0, Op1); // Assume undef is the limit value. - if (isa(Op1)) { - if (IID == Intrinsic::smax) - return ConstantInt::get(ReturnType, APInt::getSignedMaxValue(BitWidth)); - if (IID == Intrinsic::smin) - return ConstantInt::get(ReturnType, APInt::getSignedMinValue(BitWidth)); - if (IID == Intrinsic::umax) - return ConstantInt::get(ReturnType, APInt::getMaxValue(BitWidth)); - if (IID == Intrinsic::umin) - return ConstantInt::get(ReturnType, APInt::getMinValue(BitWidth)); - } + if (isa(Op1)) + return ConstantInt::get(ReturnType, getMaxMinLimit(IID, BitWidth)); auto hasSpecificOperand = [](IntrinsicInst *II, Value *V) { return II->getOperand(0) == V || II->getOperand(1) == V; @@ -5272,25 +5274,18 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, } const APInt *C; - if (!match(Op1, m_APIntAllowUndef(C))) - break; - - // Clamp to limit value. For example: - // umax(i8 %x, i8 255) --> 255 - if ((IID == Intrinsic::smax && C->isMaxSignedValue()) || - (IID == Intrinsic::smin && C->isMinSignedValue()) || - (IID == Intrinsic::umax && C->isMaxValue()) || - (IID == Intrinsic::umin && C->isMinValue())) - return ConstantInt::get(ReturnType, *C); - - // If the constant op is the opposite of the limit value, the other must be - // larger/smaller or equal. For example: - // umin(i8 %x, i8 255) --> %x - if ((IID == Intrinsic::smax && C->isMinSignedValue()) || - (IID == Intrinsic::smin && C->isMaxSignedValue()) || - (IID == Intrinsic::umax && C->isMinValue()) || - (IID == Intrinsic::umin && C->isMaxValue())) - return Op0; + if (match(Op1, m_APIntAllowUndef(C))) { + // Clamp to limit value. For example: + // umax(i8 %x, i8 255) --> 255 + if (*C == getMaxMinLimit(IID, BitWidth)) + return ConstantInt::get(ReturnType, *C); + + // If the constant op is the opposite of the limit value, the other must + // be larger/smaller or equal. For example: + // umin(i8 %x, i8 255) --> %x + if (*C == getMaxMinLimit(getMaxMinOpposite(IID), BitWidth)) + return Op0; + } break; } From 011e15bea3456cf429a801e309c65bef14fa22ad Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 3 Aug 2020 16:51:05 -0400 Subject: [PATCH 301/600] [InstSimplify] add tests for min/max with constants; NFC --- .../InstSimplify/maxmin_intrinsics.ll | 206 ++++++++++++++++++ 1 file changed, 206 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index 7a31a4dcb9a3b..3aa19e91e0e38 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -572,6 +572,8 @@ define i8 @smin_smax_commute3(i8 %x, i8 %y) { ret i8 %m2 } +; Negative test - mismatched intrinsics. + define i8 @smax_umin(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_umin( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) @@ -583,6 +585,8 @@ define i8 @smax_umin(i8 %x, i8 %y) { ret i8 %m2 } +; Negative test - mismatched intrinsics. + define i8 @smax_umax(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_umax( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) @@ -594,6 +598,8 @@ define i8 @smax_umax(i8 %x, i8 %y) { ret i8 %m2 } +; Negative test - mismatched intrinsics. + define i8 @umax_smin(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_smin( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) @@ -605,6 +611,8 @@ define i8 @umax_smin(i8 %x, i8 %y) { ret i8 %m2 } +; Negative test - mismatched intrinsics. + define i8 @umin_smin(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_smin( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[Y:%.*]], i8 [[X:%.*]]) @@ -615,3 +623,201 @@ define i8 @umin_smin(i8 %x, i8 %y) { %m2 = call i8 @llvm.smin.i8(i8 %m, i8 %x) ret i8 %m2 } + +define i8 @umax_umax_constants(i8 %x) { +; CHECK-LABEL: @umax_umax_constants( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 9) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 7, i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 9) + %m2 = call i8 @llvm.umax.i8(i8 7, i8 %m) + ret i8 %m2 +} + +define i8 @umax_umax_constants_commute1(i8 %x) { +; CHECK-LABEL: @umax_umax_constants_commute1( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 -128, i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 7, i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 128, i8 %x) + %m2 = call i8 @llvm.umax.i8(i8 7, i8 %m) + ret i8 %m2 +} + +define i8 @umax_umax_constants_commute2(i8 %x) { +; CHECK-LABEL: @umax_umax_constants_commute2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 -56) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[M]], i8 127) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 200) + %m2 = call i8 @llvm.umax.i8(i8 %m, i8 127) + ret i8 %m2 +} + +define <2 x i8> @umax_umax_constants_commute3(<2 x i8> %x) { +; CHECK-LABEL: @umax_umax_constants_commute3( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[M]], <2 x i8> ) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> %x) + %m2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %m, <2 x i8> ) + ret <2 x i8> %m2 +} + +define i8 @umin_umin_constants(i8 %x) { +; CHECK-LABEL: @umin_umin_constants( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 7) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 9, i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 %x, i8 7) + %m2 = call i8 @llvm.umin.i8(i8 9, i8 %m) + ret i8 %m2 +} + +define i8 @umin_umin_constants_commute1(i8 %x) { +; CHECK-LABEL: @umin_umin_constants_commute1( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 7, i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 -128, i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 7, i8 %x) + %m2 = call i8 @llvm.umin.i8(i8 128, i8 %m) + ret i8 %m2 +} + +define <2 x i8> @umin_umin_constants_commute2(<2 x i8> %x) { +; CHECK-LABEL: @umin_umin_constants_commute2( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[M]], <2 x i8> ) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> ) + %m2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %m, <2 x i8> ) + ret <2 x i8> %m2 +} + +define i8 @umin_umin_constants_commute3(i8 %x) { +; CHECK-LABEL: @umin_umin_constants_commute3( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 -128, i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 -2) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.umin.i8(i8 128, i8 %x) + %m2 = call i8 @llvm.umin.i8(i8 %m, i8 254) + ret i8 %m2 +} + +define i8 @smax_smax_constants(i8 %x) { +; CHECK-LABEL: @smax_smax_constants( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 9) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 7, i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 9) + %m2 = call i8 @llvm.smax.i8(i8 7, i8 %m) + ret i8 %m2 +} + +define <2 x i8> @smax_smax_constants_commute1(<2 x i8> %x) { +; CHECK-LABEL: @smax_smax_constants_commute1( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[M]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %x) + %m2 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %m) + ret <2 x i8> %m2 +} + +define i8 @smax_smax_constants_commute2(i8 %x) { +; CHECK-LABEL: @smax_smax_constants_commute2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 0) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 -1) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 0) + %m2 = call i8 @llvm.smax.i8(i8 %m, i8 -1) + ret i8 %m2 +} + +define i8 @smax_smax_constants_commute3(i8 %x) { +; CHECK-LABEL: @smax_smax_constants_commute3( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 -1, i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 -127) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smax.i8(i8 -1, i8 %x) + %m2 = call i8 @llvm.smax.i8(i8 %m, i8 -127) + ret i8 %m2 +} + +define <2 x i8> @smin_smin_constants(<2 x i8> %x) { +; CHECK-LABEL: @smin_smin_constants( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> , <2 x i8> [[M]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> ) + %m2 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> , <2 x i8> %m) + ret <2 x i8> %m2 +} + +define i8 @smin_smin_constants_commute1(i8 %x) { +; CHECK-LABEL: @smin_smin_constants_commute1( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 -127, i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 7, i8 [[M]]) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smin.i8(i8 -127, i8 %x) + %m2 = call i8 @llvm.smin.i8(i8 7, i8 %m) + ret i8 %m2 +} + +define i8 @smin_smin_constants_commute2(i8 %x) { +; CHECK-LABEL: @smin_smin_constants_commute2( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 -1) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 0) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smin.i8(i8 %x, i8 -1) + %m2 = call i8 @llvm.smin.i8(i8 %m, i8 0) + ret i8 %m2 +} + +define i8 @smin_smin_constants_commute3(i8 %x) { +; CHECK-LABEL: @smin_smin_constants_commute3( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 -127, i8 [[X:%.*]]) +; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 -1) +; CHECK-NEXT: ret i8 [[M2]] +; + %m = call i8 @llvm.smin.i8(i8 -127, i8 %x) + %m2 = call i8 @llvm.smin.i8(i8 %m, i8 -1) + ret i8 %m2 +} + +define <2 x i8> @umin_umin_constants_partial_undef(<2 x i8> %x) { +; CHECK-LABEL: @umin_umin_constants_partial_undef( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> , <2 x i8> [[M]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> ) + %m2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> , <2 x i8> %m) + ret <2 x i8> %m2 +} + +define <2 x i8> @smax_smax_constants_partial_undef(<2 x i8> %x) { +; CHECK-LABEL: @smax_smax_constants_partial_undef( +; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) +; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[M]]) +; CHECK-NEXT: ret <2 x i8> [[M2]] +; + %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %x, <2 x i8> ) + %m2 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %m) + ret <2 x i8> %m2 +} From ec1f4e7c3b17656658c9cf49c33bc06c4bc747c2 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 4 Aug 2020 11:37:50 +0200 Subject: [PATCH 302/600] [mlir] switch the modeling of LLVM types to use the new mechanism A new first-party modeling for LLVM IR types in the LLVM dialect has been developed in parallel to the existing modeling based on wrapping LLVM `Type *` instances. It resolves the long-standing problem of modeling identified structure types, including recursive structures, and enables future removal of LLVMContext and related locking mechanisms from LLVMDialect. This commit only switches the modeling by (a) renaming LLVMTypeNew to LLVMType, (b) removing the old implementaiton of LLVMType, and (c) updating the tests. It is intentionally minimal. Separate commits will remove the infrastructure built for the transition and update API uses where appropriate. Depends On D85020 Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D85021 --- .../include/mlir/Dialect/LLVMIR/LLVMDialect.h | 176 +---- mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h | 253 +++--- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 3 +- .../mlir/Target/LLVMIR/TypeTranslation.h | 6 +- .../LLVMIR/CPU/test-vector-reductions-fp.mlir | 32 +- .../CPU/test-vector-reductions-int.mlir | 30 +- .../Conversion/VectorToLLVM/CMakeLists.txt | 3 +- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 7 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 302 +------ mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp | 98 ++- mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 150 ++-- mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h | 47 +- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 5 +- mlir/lib/Target/LLVMIR/TypeTranslation.cpp | 46 +- ...ower-launch-func-to-gpu-runtime-calls.mlir | 12 +- .../GPUCommon/memory-attrbution.mlir | 56 +- .../GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir | 2 +- .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 6 +- .../lower-rocdl-kernel-to-hsaco.mlir | 2 +- .../Conversion/GPUToVulkan/invoke-vulkan.mlir | 72 +- .../SPIRVToLLVM/arithmetic-ops-to-llvm.mlir | 26 +- .../SPIRVToLLVM/bitwise-ops-to-llvm.mlir | 100 +-- .../SPIRVToLLVM/cast-ops-to-llvm.mlir | 38 +- .../SPIRVToLLVM/comparison-ops-to-llvm.mlir | 48 +- .../SPIRVToLLVM/constant-op-to-llvm.mlir | 12 +- .../Conversion/SPIRVToLLVM/func-to-llvm.mlir | 8 +- .../SPIRVToLLVM/glsl-ops-to-llvm.mlir | 142 ++-- .../SPIRVToLLVM/logical-ops-to-llvm.mlir | 12 +- .../SPIRVToLLVM/memory-ops-to-llvm.mlir | 72 +- .../SPIRVToLLVM/misc-ops-to-llvm.mlir | 6 +- .../SPIRVToLLVM/shifts-to-llvm.mlir | 36 +- .../SPIRVToLLVM/spirv-types-to-llvm.mlir | 14 +- .../StandardToLLVM/calling-convention.mlir | 52 +- .../convert-dynamic-memref-ops.mlir | 246 +++--- .../StandardToLLVM/convert-funcs.mlir | 28 +- .../convert-static-memref-ops.mlir | 278 +++---- .../StandardToLLVM/convert-to-llvmir.mlir | 542 ++++++------- .../Conversion/StandardToLLVM/invalid.mlir | 6 +- .../StandardToLLVM/standard-to-llvm.mlir | 24 +- .../vector-reduction-to-llvm.mlir | 16 +- .../VectorToLLVM/vector-to-llvm.mlir | 736 +++++++++--------- .../VectorToROCDL/vector-to-rocdl.mlir | 8 +- mlir/test/Dialect/GPU/invalid.mlir | 22 +- .../test/Dialect/GPU/multiple-all-reduce.mlir | 4 +- mlir/test/Dialect/GPU/outlining.mlir | 4 +- mlir/test/Dialect/LLVMIR/func.mlir | 42 +- mlir/test/Dialect/LLVMIR/global.mlir | 26 +- mlir/test/Dialect/LLVMIR/invalid.mlir | 194 +++-- mlir/test/Dialect/LLVMIR/nvvm.mlir | 22 +- mlir/test/Dialect/LLVMIR/rocdl.mlir | 164 ++-- mlir/test/Dialect/LLVMIR/roundtrip.mlir | 232 +++--- mlir/test/Dialect/LLVMIR/types.mlir | 276 +++---- mlir/test/Dialect/Linalg/llvm.mlir | 166 ++-- mlir/test/Target/avx512.mlir | 20 +- mlir/test/Target/import.ll | 66 +- mlir/test/Target/llvmir-intrinsics.mlir | 146 ++-- mlir/test/Target/llvmir-invalid.mlir | 8 +- mlir/test/Target/llvmir.mlir | 386 ++++----- mlir/test/Target/nvvmir.mlir | 16 +- mlir/test/Target/rocdl.mlir | 110 +-- .../Dialect/LLVMIR/LLVMTypeTestDialect.cpp | 40 +- .../lib/Target/TestLLVMTypeTranslation.cpp | 4 +- .../mlir-cpu-runner/bare_ptr_call_conv.mlir | 102 +-- mlir/test/mlir-cpu-runner/simple.mlir | 30 +- 64 files changed, 2693 insertions(+), 3145 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h index 52acfbfa8e507..6b771f8e91239 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h @@ -14,6 +14,7 @@ #ifndef MLIR_DIALECT_LLVMIR_LLVMDIALECT_H_ #define MLIR_DIALECT_LLVMIR_LLVMDIALECT_H_ +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Function.h" #include "mlir/IR/OpDefinition.h" @@ -47,187 +48,12 @@ struct LLVMTypeStorage; struct LLVMDialectImpl; } // namespace detail -class LLVMType; - /// Converts an MLIR LLVM dialect type to LLVM IR type. Note that this function /// exists exclusively for the purpose of gradual transition to the first-party /// modeling of LLVM types. It should not be used outside MLIR-to-LLVM /// translation. llvm::Type *convertLLVMType(LLVMType type); -class LLVMType : public mlir::Type::TypeBase { -public: - enum Kind { - LLVM_TYPE = FIRST_LLVM_TYPE, - }; - - using Base::Base; - - static bool kindof(unsigned kind) { return kind == LLVM_TYPE; } - - LLVMDialect &getDialect(); - - /// Utilities to identify types. - bool isBFloatTy() { return getUnderlyingType()->isBFloatTy(); } - bool isHalfTy() { return getUnderlyingType()->isHalfTy(); } - bool isFloatTy() { return getUnderlyingType()->isFloatTy(); } - bool isDoubleTy() { return getUnderlyingType()->isDoubleTy(); } - bool isFloatingPointTy() { return getUnderlyingType()->isFloatingPointTy(); } - - /// Array type utilities. - LLVMType getArrayElementType(); - unsigned getArrayNumElements(); - bool isArrayTy(); - - /// Integer type utilities. - unsigned getIntegerBitWidth() { - return getUnderlyingType()->getIntegerBitWidth(); - } - bool isIntegerTy() { return getUnderlyingType()->isIntegerTy(); } - bool isIntegerTy(unsigned bitwidth) { - return getUnderlyingType()->isIntegerTy(bitwidth); - } - - /// Vector type utilities. - LLVMType getVectorElementType(); - unsigned getVectorNumElements(); - llvm::ElementCount getVectorElementCount(); - bool isVectorTy(); - - /// Function type utilities. - LLVMType getFunctionParamType(unsigned argIdx); - unsigned getFunctionNumParams(); - LLVMType getFunctionResultType(); - bool isFunctionTy(); - bool isFunctionVarArg(); - - /// Pointer type utilities. - LLVMType getPointerTo(unsigned addrSpace = 0); - LLVMType getPointerElementTy(); - bool isPointerTy(); - static bool isValidPointerElementType(LLVMType type); - - /// Struct type utilities. - LLVMType getStructElementType(unsigned i); - unsigned getStructNumElements(); - bool isStructTy(); - - /// Utilities used to generate floating point types. - static LLVMType getDoubleTy(LLVMDialect *dialect); - static LLVMType getFloatTy(LLVMDialect *dialect); - static LLVMType getBFloatTy(LLVMDialect *dialect); - static LLVMType getHalfTy(LLVMDialect *dialect); - static LLVMType getFP128Ty(LLVMDialect *dialect); - static LLVMType getX86_FP80Ty(LLVMDialect *dialect); - - /// Utilities used to generate integer types. - static LLVMType getIntNTy(LLVMDialect *dialect, unsigned numBits); - static LLVMType getInt1Ty(LLVMDialect *dialect) { - return getIntNTy(dialect, /*numBits=*/1); - } - static LLVMType getInt8Ty(LLVMDialect *dialect) { - return getIntNTy(dialect, /*numBits=*/8); - } - static LLVMType getInt8PtrTy(LLVMDialect *dialect) { - return getInt8Ty(dialect).getPointerTo(); - } - static LLVMType getInt16Ty(LLVMDialect *dialect) { - return getIntNTy(dialect, /*numBits=*/16); - } - static LLVMType getInt32Ty(LLVMDialect *dialect) { - return getIntNTy(dialect, /*numBits=*/32); - } - static LLVMType getInt64Ty(LLVMDialect *dialect) { - return getIntNTy(dialect, /*numBits=*/64); - } - - /// Utilities used to generate other miscellaneous types. - static LLVMType getArrayTy(LLVMType elementType, uint64_t numElements); - static LLVMType getFunctionTy(LLVMType result, ArrayRef params, - bool isVarArg); - static LLVMType getFunctionTy(LLVMType result, bool isVarArg) { - return getFunctionTy(result, llvm::None, isVarArg); - } - static LLVMType getStructTy(LLVMDialect *dialect, ArrayRef elements, - bool isPacked = false); - static LLVMType getStructTy(LLVMDialect *dialect, bool isPacked = false) { - return getStructTy(dialect, llvm::None, isPacked); - } - template - static typename std::enable_if::value, - LLVMType>::type - getStructTy(LLVMType elt1, Args... elts) { - SmallVector fields({elt1, elts...}); - return getStructTy(&elt1.getDialect(), fields); - } - static LLVMType getVectorTy(LLVMType elementType, unsigned numElements); - - /// Void type utilities. - static LLVMType getVoidTy(LLVMDialect *dialect); - bool isVoidTy(); - - // Creation and setting of LLVM's identified struct types - static LLVMType createStructTy(LLVMDialect *dialect, - ArrayRef elements, - Optional name, - bool isPacked = false); - - static LLVMType createStructTy(LLVMDialect *dialect, - Optional name) { - return createStructTy(dialect, llvm::None, name); - } - - static LLVMType createStructTy(ArrayRef elements, - Optional name, - bool isPacked = false) { - assert(!elements.empty() && - "This method may not be invoked with an empty list"); - LLVMType ele0 = elements.front(); - return createStructTy(&ele0.getDialect(), elements, name, isPacked); - } - - template - static typename std::enable_if_t::value, - LLVMType> - createStructTy(StringRef name, LLVMType elt1, Args... elts) { - SmallVector fields({elt1, elts...}); - Optional opt_name(name); - return createStructTy(&elt1.getDialect(), fields, opt_name); - } - - static LLVMType setStructTyBody(LLVMType structType, - ArrayRef elements, - bool isPacked = false); - - template - static typename std::enable_if_t::value, - LLVMType> - setStructTyBody(LLVMType structType, LLVMType elt1, Args... elts) { - SmallVector fields({elt1, elts...}); - return setStructTyBody(structType, fields); - } - -private: - friend LLVMDialect; - friend llvm::Type *convertLLVMType(LLVMType type); - - /// Get the underlying LLVM IR type. - llvm::Type *getUnderlyingType() const; - - /// Get the underlying LLVM IR types for the given array of types. - static void getUnderlyingTypes(ArrayRef types, - SmallVectorImpl &result); - - /// Get an LLVMType with a pre-existing llvm type. - static LLVMType get(MLIRContext *context, llvm::Type *llvmType); - - /// Get an LLVMType with an llvm type that may cause changes to the underlying - /// llvm context when constructed. - static LLVMType getLocked(LLVMDialect *dialect, - function_ref typeBuilder); -}; - ///// Ops ///// #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/LLVMOps.h.inc" diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h index e409d6880283f..7d7839c166f78 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h @@ -43,7 +43,7 @@ class LLVMDoubleType; class LLVMIntegerType; //===----------------------------------------------------------------------===// -// LLVMTypeNew. +// LLVMType. //===----------------------------------------------------------------------===// /// Base class for LLVM dialect types. @@ -61,7 +61,7 @@ class LLVMIntegerType; /// Similarly to other MLIR types, LLVM dialect types are owned by the MLIR /// context, have an immutable identifier (for most types except identified /// structs, the entire type is the identifier) and are thread-safe. -class LLVMTypeNew : public Type { +class LLVMType : public Type::TypeBase { public: enum Kind { // Keep non-parametric types contiguous in the enum. @@ -92,12 +92,12 @@ class LLVMTypeNew : public Type { }; /// Inherit base constructors. - using Type::Type; + using Base::Base; /// Support for PointerLikeTypeTraits. using Type::getAsOpaquePointer; - static LLVMTypeNew getFromOpaquePointer(const void *ptr) { - return LLVMTypeNew(static_cast(const_cast(ptr))); + static LLVMType getFromOpaquePointer(const void *ptr) { + return LLVMType(static_cast(const_cast(ptr))); } /// Support for isa/cast. @@ -118,7 +118,7 @@ class LLVMTypeNew : public Type { } /// Array type utilities. - LLVMTypeNew getArrayElementType(); + LLVMType getArrayElementType(); unsigned getArrayNumElements(); bool isArrayTy(); @@ -128,124 +128,121 @@ class LLVMTypeNew : public Type { unsigned getIntegerBitWidth(); /// Vector type utilities. - LLVMTypeNew getVectorElementType(); + LLVMType getVectorElementType(); unsigned getVectorNumElements(); llvm::ElementCount getVectorElementCount(); bool isVectorTy(); /// Function type utilities. - LLVMTypeNew getFunctionParamType(unsigned argIdx); + LLVMType getFunctionParamType(unsigned argIdx); unsigned getFunctionNumParams(); - LLVMTypeNew getFunctionResultType(); + LLVMType getFunctionResultType(); bool isFunctionTy(); bool isFunctionVarArg(); /// Pointer type utilities. - LLVMTypeNew getPointerTo(unsigned addrSpace = 0); - LLVMTypeNew getPointerElementTy(); + LLVMType getPointerTo(unsigned addrSpace = 0); + LLVMType getPointerElementTy(); bool isPointerTy(); - static bool isValidPointerElementType(LLVMTypeNew type); + static bool isValidPointerElementType(LLVMType type); /// Struct type utilities. - LLVMTypeNew getStructElementType(unsigned i); + LLVMType getStructElementType(unsigned i); unsigned getStructNumElements(); bool isStructTy(); /// Utilities used to generate floating point types. - static LLVMTypeNew getDoubleTy(LLVMDialect *dialect); - static LLVMTypeNew getFloatTy(LLVMDialect *dialect); - static LLVMTypeNew getBFloatTy(LLVMDialect *dialect); - static LLVMTypeNew getHalfTy(LLVMDialect *dialect); - static LLVMTypeNew getFP128Ty(LLVMDialect *dialect); - static LLVMTypeNew getX86_FP80Ty(LLVMDialect *dialect); + static LLVMType getDoubleTy(LLVMDialect *dialect); + static LLVMType getFloatTy(LLVMDialect *dialect); + static LLVMType getBFloatTy(LLVMDialect *dialect); + static LLVMType getHalfTy(LLVMDialect *dialect); + static LLVMType getFP128Ty(LLVMDialect *dialect); + static LLVMType getX86_FP80Ty(LLVMDialect *dialect); /// Utilities used to generate integer types. - static LLVMTypeNew getIntNTy(LLVMDialect *dialect, unsigned numBits); - static LLVMTypeNew getInt1Ty(LLVMDialect *dialect) { + static LLVMType getIntNTy(LLVMDialect *dialect, unsigned numBits); + static LLVMType getInt1Ty(LLVMDialect *dialect) { return getIntNTy(dialect, /*numBits=*/1); } - static LLVMTypeNew getInt8Ty(LLVMDialect *dialect) { + static LLVMType getInt8Ty(LLVMDialect *dialect) { return getIntNTy(dialect, /*numBits=*/8); } - static LLVMTypeNew getInt8PtrTy(LLVMDialect *dialect) { + static LLVMType getInt8PtrTy(LLVMDialect *dialect) { return getInt8Ty(dialect).getPointerTo(); } - static LLVMTypeNew getInt16Ty(LLVMDialect *dialect) { + static LLVMType getInt16Ty(LLVMDialect *dialect) { return getIntNTy(dialect, /*numBits=*/16); } - static LLVMTypeNew getInt32Ty(LLVMDialect *dialect) { + static LLVMType getInt32Ty(LLVMDialect *dialect) { return getIntNTy(dialect, /*numBits=*/32); } - static LLVMTypeNew getInt64Ty(LLVMDialect *dialect) { + static LLVMType getInt64Ty(LLVMDialect *dialect) { return getIntNTy(dialect, /*numBits=*/64); } /// Utilities used to generate other miscellaneous types. - static LLVMTypeNew getArrayTy(LLVMTypeNew elementType, uint64_t numElements); - static LLVMTypeNew getFunctionTy(LLVMTypeNew result, - ArrayRef params, bool isVarArg); - static LLVMTypeNew getFunctionTy(LLVMTypeNew result, bool isVarArg) { + static LLVMType getArrayTy(LLVMType elementType, uint64_t numElements); + static LLVMType getFunctionTy(LLVMType result, ArrayRef params, + bool isVarArg); + static LLVMType getFunctionTy(LLVMType result, bool isVarArg) { return getFunctionTy(result, llvm::None, isVarArg); } - static LLVMTypeNew getStructTy(LLVMDialect *dialect, - ArrayRef elements, - bool isPacked = false); - static LLVMTypeNew getStructTy(LLVMDialect *dialect, bool isPacked = false) { + static LLVMType getStructTy(LLVMDialect *dialect, ArrayRef elements, + bool isPacked = false); + static LLVMType getStructTy(LLVMDialect *dialect, bool isPacked = false) { return getStructTy(dialect, llvm::None, isPacked); } template - static typename std::enable_if::value, - LLVMTypeNew>::type - getStructTy(LLVMTypeNew elt1, Args... elts) { - SmallVector fields({elt1, elts...}); + static typename std::enable_if::value, + LLVMType>::type + getStructTy(LLVMType elt1, Args... elts) { + SmallVector fields({elt1, elts...}); return getStructTy(&elt1.getDialect(), fields); } - static LLVMTypeNew getVectorTy(LLVMTypeNew elementType, unsigned numElements); + static LLVMType getVectorTy(LLVMType elementType, unsigned numElements); /// Void type utilities. - static LLVMTypeNew getVoidTy(LLVMDialect *dialect); + static LLVMType getVoidTy(LLVMDialect *dialect); bool isVoidTy(); // Creation and setting of LLVM's identified struct types - static LLVMTypeNew createStructTy(LLVMDialect *dialect, - ArrayRef elements, - Optional name, - bool isPacked = false); + static LLVMType createStructTy(LLVMDialect *dialect, + ArrayRef elements, + Optional name, + bool isPacked = false); - static LLVMTypeNew createStructTy(LLVMDialect *dialect, - Optional name) { + static LLVMType createStructTy(LLVMDialect *dialect, + Optional name) { return createStructTy(dialect, llvm::None, name); } - static LLVMTypeNew createStructTy(ArrayRef elements, - Optional name, - bool isPacked = false) { + static LLVMType createStructTy(ArrayRef elements, + Optional name, + bool isPacked = false) { assert(!elements.empty() && "This method may not be invoked with an empty list"); - LLVMTypeNew ele0 = elements.front(); + LLVMType ele0 = elements.front(); return createStructTy(&ele0.getDialect(), elements, name, isPacked); } template - static - typename std::enable_if_t::value, - LLVMTypeNew> - createStructTy(StringRef name, LLVMTypeNew elt1, Args... elts) { - SmallVector fields({elt1, elts...}); + static typename std::enable_if_t::value, + LLVMType> + createStructTy(StringRef name, LLVMType elt1, Args... elts) { + SmallVector fields({elt1, elts...}); Optional opt_name(name); return createStructTy(&elt1.getDialect(), fields, opt_name); } - static LLVMTypeNew setStructTyBody(LLVMTypeNew structType, - ArrayRef elements, - bool isPacked = false); + static LLVMType setStructTyBody(LLVMType structType, + ArrayRef elements, + bool isPacked = false); template - static - typename std::enable_if_t::value, - LLVMTypeNew> - setStructTyBody(LLVMTypeNew structType, LLVMTypeNew elt1, Args... elts) { - SmallVector fields({elt1, elts...}); + static typename std::enable_if_t::value, + LLVMType> + setStructTyBody(LLVMType structType, LLVMType elt1, Args... elts) { + SmallVector fields({elt1, elts...}); return setStructTyBody(structType, fields); } }; @@ -256,8 +253,7 @@ class LLVMTypeNew : public Type { // Batch-define trivial types. #define DEFINE_TRIVIAL_LLVM_TYPE(ClassName, Kind) \ - class ClassName \ - : public Type::TypeBase { \ + class ClassName : public Type::TypeBase { \ public: \ using Base::Base; \ static bool kindof(unsigned kind) { return kind == Kind; } \ @@ -266,18 +262,18 @@ class LLVMTypeNew : public Type { } \ } -DEFINE_TRIVIAL_LLVM_TYPE(LLVMVoidType, LLVMTypeNew::VoidType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMHalfType, LLVMTypeNew::HalfType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMBFloatType, LLVMTypeNew::BFloatType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMFloatType, LLVMTypeNew::FloatType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMDoubleType, LLVMTypeNew::DoubleType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMFP128Type, LLVMTypeNew::FP128Type); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86FP80Type, LLVMTypeNew::X86FP80Type); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMPPCFP128Type, LLVMTypeNew::PPCFP128Type); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86MMXType, LLVMTypeNew::X86MMXType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMTokenType, LLVMTypeNew::TokenType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMLabelType, LLVMTypeNew::LabelType); -DEFINE_TRIVIAL_LLVM_TYPE(LLVMMetadataType, LLVMTypeNew::MetadataType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMVoidType, LLVMType::VoidType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMHalfType, LLVMType::HalfType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMBFloatType, LLVMType::BFloatType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMFloatType, LLVMType::FloatType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMDoubleType, LLVMType::DoubleType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMFP128Type, LLVMType::FP128Type); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86FP80Type, LLVMType::X86FP80Type); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMPPCFP128Type, LLVMType::PPCFP128Type); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMX86MMXType, LLVMType::X86MMXType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMTokenType, LLVMType::TokenType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMLabelType, LLVMType::LabelType); +DEFINE_TRIVIAL_LLVM_TYPE(LLVMMetadataType, LLVMType::MetadataType); #undef DEFINE_TRIVIAL_LLVM_TYPE @@ -288,21 +284,21 @@ DEFINE_TRIVIAL_LLVM_TYPE(LLVMMetadataType, LLVMTypeNew::MetadataType); /// LLVM dialect array type. It is an aggregate type representing consecutive /// elements in memory, parameterized by the number of elements and the element /// type. -class LLVMArrayType : public Type::TypeBase { public: /// Inherit base constructors. using Base::Base; /// Support for isa/cast. - static bool kindof(unsigned kind) { return kind == LLVMTypeNew::ArrayType; } + static bool kindof(unsigned kind) { return kind == LLVMType::ArrayType; } /// Gets or creates an instance of LLVM dialect array type containing /// `numElements` of `elementType`, in the same context as `elementType`. - static LLVMArrayType get(LLVMTypeNew elementType, unsigned numElements); + static LLVMArrayType get(LLVMType elementType, unsigned numElements); /// Returns the element type of the array. - LLVMTypeNew getElementType(); + LLVMType getElementType(); /// Returns the number of elements in the array type. unsigned getNumElements(); @@ -316,38 +312,35 @@ class LLVMArrayType : public Type::TypeBase { public: /// Inherit base constructors. using Base::Base; /// Support for isa/cast. - static bool kindof(unsigned kind) { - return kind == LLVMTypeNew::FunctionType; - } + static bool kindof(unsigned kind) { return kind == LLVMType::FunctionType; } /// Gets or creates an instance of LLVM dialect function in the same context /// as the `result` type. - static LLVMFunctionType get(LLVMTypeNew result, - ArrayRef arguments, + static LLVMFunctionType get(LLVMType result, ArrayRef arguments, bool isVarArg = false); /// Returns the result type of the function. - LLVMTypeNew getReturnType(); + LLVMType getReturnType(); /// Returns the number of arguments to the function. unsigned getNumParams(); /// Returns `i`-th argument of the function. Asserts on out-of-bounds. - LLVMTypeNew getParamType(unsigned i); + LLVMType getParamType(unsigned i); /// Returns whether the function is variadic. bool isVarArg(); /// Returns a list of argument types of the function. - ArrayRef getParams(); - ArrayRef params() { return getParams(); } + ArrayRef getParams(); + ArrayRef params() { return getParams(); } }; //===----------------------------------------------------------------------===// @@ -355,14 +348,14 @@ class LLVMFunctionType //===----------------------------------------------------------------------===// /// LLVM dialect signless integer type parameterized by bitwidth. -class LLVMIntegerType : public Type::TypeBase { public: /// Inherit base constructor. using Base::Base; /// Support for isa/cast. - static bool kindof(unsigned kind) { return kind == LLVMTypeNew::IntegerType; } + static bool kindof(unsigned kind) { return kind == LLVMType::IntegerType; } /// Gets or creates an instance of the integer of the specified `bitwidth` in /// the given context. @@ -379,22 +372,22 @@ class LLVMIntegerType : public Type::TypeBase { public: /// Inherit base constructors. using Base::Base; /// Support for isa/cast. - static bool kindof(unsigned kind) { return kind == LLVMTypeNew::PointerType; } + static bool kindof(unsigned kind) { return kind == LLVMType::PointerType; } /// Gets or creates an instance of LLVM dialect pointer type pointing to an /// object of `pointee` type in the given address space. The pointer type is /// created in the same context as `pointee`. - static LLVMPointerType get(LLVMTypeNew pointee, unsigned addressSpace = 0); + static LLVMPointerType get(LLVMType pointee, unsigned addressSpace = 0); /// Returns the pointed-to type. - LLVMTypeNew getElementType(); + LLVMType getElementType(); /// Returns the address space of the pointer. unsigned getAddressSpace(); @@ -428,14 +421,14 @@ class LLVMPointerType : public Type::TypeBase { public: /// Inherit base construtors. using Base::Base; /// Support for isa/cast. - static bool kindof(unsigned kind) { return kind == LLVMTypeNew::StructType; } + static bool kindof(unsigned kind) { return kind == LLVMType::StructType; } /// Gets or creates an identified struct with the given name in the provided /// context. Note that unlike llvm::StructType::create, this function will @@ -447,7 +440,7 @@ class LLVMStructType : public Type::TypeBase types, + ArrayRef types, bool isPacked = false); /// Gets or creates an intentionally-opaque identified struct. Such a struct @@ -464,7 +457,7 @@ class LLVMStructType : public Type::TypeBase types, bool isPacked); + LogicalResult setBody(ArrayRef types, bool isPacked); /// Checks if a struct is packed. bool isPacked(); @@ -482,7 +475,7 @@ class LLVMStructType : public Type::TypeBase getBody(); + ArrayRef getBody(); }; //===----------------------------------------------------------------------===// @@ -492,19 +485,20 @@ class LLVMStructType : public Type::TypeBase { public: /// Inherit base constructor. - using LLVMTypeNew::LLVMTypeNew; + using Base::Base; /// Support for isa/cast. static bool kindof(unsigned kind) { - return kind == LLVMTypeNew::FixedVectorType || - kind == LLVMTypeNew::ScalableVectorType; + return kind == LLVMType::FixedVectorType || + kind == LLVMType::ScalableVectorType; } /// Returns the element type of the vector. - LLVMTypeNew getElementType(); + LLVMType getElementType(); /// Returns the number of elements in the vector. llvm::ElementCount getElementCount(); @@ -525,12 +519,12 @@ class LLVMFixedVectorType /// Support for isa/cast. static bool kindof(unsigned kind) { - return kind == LLVMTypeNew::FixedVectorType; + return kind == LLVMType::FixedVectorType; } /// Gets or creates a fixed vector type containing `numElements` of /// `elementType` in the same context as `elementType`. - static LLVMFixedVectorType get(LLVMTypeNew elementType, unsigned numElements); + static LLVMFixedVectorType get(LLVMType elementType, unsigned numElements); /// Returns the number of elements in the fixed vector. unsigned getNumElements(); @@ -552,12 +546,12 @@ class LLVMScalableVectorType /// Support for isa/cast. static bool kindof(unsigned kind) { - return kind == LLVMTypeNew::ScalableVectorType; + return kind == LLVMType::ScalableVectorType; } /// Gets or creates a scalable vector type containing a non-zero multiple of /// `minNumElements` of `elementType` in the same context as `elementType`. - static LLVMScalableVectorType get(LLVMTypeNew elementType, + static LLVMScalableVectorType get(LLVMType elementType, unsigned minNumElements); /// Returns the scaling factor of the number of elements in the vector. The @@ -572,10 +566,10 @@ class LLVMScalableVectorType namespace detail { /// Parses an LLVM dialect type. -LLVMTypeNew parseType(DialectAsmParser &parser); +LLVMType parseType(DialectAsmParser &parser); /// Prints an LLVM Dialect type. -void printType(LLVMTypeNew type, DialectAsmPrinter &printer); +void printType(LLVMType type, DialectAsmPrinter &printer); } // namespace detail } // namespace LLVM @@ -587,34 +581,35 @@ void printType(LLVMTypeNew type, DialectAsmPrinter &printer); namespace llvm { -// LLVMTypeNew instances hash just like pointers. -template <> struct DenseMapInfo { - static mlir::LLVM::LLVMTypeNew getEmptyKey() { +// LLVMType instances hash just like pointers. +template <> +struct DenseMapInfo { + static mlir::LLVM::LLVMType getEmptyKey() { void *pointer = llvm::DenseMapInfo::getEmptyKey(); - return mlir::LLVM::LLVMTypeNew( - static_cast(pointer)); + return mlir::LLVM::LLVMType( + static_cast(pointer)); } - static mlir::LLVM::LLVMTypeNew getTombstoneKey() { + static mlir::LLVM::LLVMType getTombstoneKey() { void *pointer = llvm::DenseMapInfo::getTombstoneKey(); - return mlir::LLVM::LLVMTypeNew( - static_cast(pointer)); + return mlir::LLVM::LLVMType( + static_cast(pointer)); } - static unsigned getHashValue(mlir::LLVM::LLVMTypeNew val) { + static unsigned getHashValue(mlir::LLVM::LLVMType val) { return mlir::hash_value(val); } - static bool isEqual(mlir::LLVM::LLVMTypeNew lhs, - mlir::LLVM::LLVMTypeNew rhs) { + static bool isEqual(mlir::LLVM::LLVMType lhs, mlir::LLVM::LLVMType rhs) { return lhs == rhs; } }; -// LLVMTypeNew behaves like a pointer similarly to mlir::Type. -template <> struct PointerLikeTypeTraits { - static inline void *getAsVoidPointer(mlir::LLVM::LLVMTypeNew type) { +// LLVMType behaves like a pointer similarly to mlir::Type. +template <> +struct PointerLikeTypeTraits { + static inline void *getAsVoidPointer(mlir::LLVM::LLVMType type) { return const_cast(type.getAsOpaquePointer()); } - static inline mlir::LLVM::LLVMTypeNew getFromVoidPointer(void *ptr) { - return mlir::LLVM::LLVMTypeNew::getFromOpaquePointer(ptr); + static inline mlir::LLVM::LLVMType getFromVoidPointer(void *ptr) { + return mlir::LLVM::LLVMType::getFromOpaquePointer(ptr); } static constexpr int NumLowBitsAvailable = PointerLikeTypeTraits::NumLowBitsAvailable; diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 5c793f8547dab..5f022e32b801d 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -112,7 +112,8 @@ def NVVM_ShflBflyOp : if (!type.isStructTy() || type.getStructNumElements() != 2 || !type.getStructElementType(1).isIntegerTy( /*Bitwidth=*/1)) - return emitError("expected return type !llvm<\"{ ?, i1 }\">"); + return emitError("expected return type to be a two-element struct with " + "i1 as the second element"); return success(); }]; } diff --git a/mlir/include/mlir/Target/LLVMIR/TypeTranslation.h b/mlir/include/mlir/Target/LLVMIR/TypeTranslation.h index 5a82f0a096dfc..3ab962b9ab11b 100644 --- a/mlir/include/mlir/Target/LLVMIR/TypeTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/TypeTranslation.h @@ -25,10 +25,10 @@ class MLIRContext; namespace LLVM { -class LLVMTypeNew; +class LLVMType; -llvm::Type *translateTypeToLLVMIR(LLVMTypeNew type, llvm::LLVMContext &context); -LLVMTypeNew translateTypeFromLLVMIR(llvm::Type *type, MLIRContext &context); +llvm::Type *translateTypeToLLVMIR(LLVMType type, llvm::LLVMContext &context); +LLVMType translateTypeFromLLVMIR(llvm::Type *type, MLIRContext &context); } // namespace LLVM } // namespace mlir diff --git a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir index 9b61414277b6a..2f17dbe4455f8 100644 --- a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir +++ b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-fp.mlir @@ -12,74 +12,74 @@ module { %1 = llvm.mlir.constant(2.000000e+00 : f32) : !llvm.float %2 = llvm.mlir.constant(3.000000e+00 : f32) : !llvm.float %3 = llvm.mlir.constant(4.000000e+00 : f32) : !llvm.float - %4 = llvm.mlir.undef : !llvm<"<4 x float>"> + %4 = llvm.mlir.undef : !llvm.vec<4 x float> %5 = llvm.mlir.constant(0 : index) : !llvm.i64 - %6 = llvm.insertelement %0, %4[%5 : !llvm.i64] : !llvm<"<4 x float>"> + %6 = llvm.insertelement %0, %4[%5 : !llvm.i64] : !llvm.vec<4 x float> %7 = llvm.shufflevector %6, %4 [0 : i32, 0 : i32, 0 : i32, 0 : i32] - : !llvm<"<4 x float>">, !llvm<"<4 x float>"> + : !llvm.vec<4 x float>, !llvm.vec<4 x float> %8 = llvm.mlir.constant(1 : i64) : !llvm.i64 - %9 = llvm.insertelement %1, %7[%8 : !llvm.i64] : !llvm<"<4 x float>"> + %9 = llvm.insertelement %1, %7[%8 : !llvm.i64] : !llvm.vec<4 x float> %10 = llvm.mlir.constant(2 : i64) : !llvm.i64 - %11 = llvm.insertelement %2, %9[%10 : !llvm.i64] : !llvm<"<4 x float>"> + %11 = llvm.insertelement %2, %9[%10 : !llvm.i64] : !llvm.vec<4 x float> %12 = llvm.mlir.constant(3 : i64) : !llvm.i64 - %v = llvm.insertelement %3, %11[%12 : !llvm.i64] : !llvm<"<4 x float>"> + %v = llvm.insertelement %3, %11[%12 : !llvm.i64] : !llvm.vec<4 x float> %max = "llvm.intr.experimental.vector.reduce.fmax"(%v) - : (!llvm<"<4 x float>">) -> !llvm.float + : (!llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%max) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 4 %min = "llvm.intr.experimental.vector.reduce.fmin"(%v) - : (!llvm<"<4 x float>">) -> !llvm.float + : (!llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%min) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 1 %add1 = "llvm.intr.experimental.vector.reduce.v2.fadd"(%0, %v) - : (!llvm.float, !llvm<"<4 x float>">) -> !llvm.float + : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%add1) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 11 %add1r = "llvm.intr.experimental.vector.reduce.v2.fadd"(%0, %v) - {reassoc = true} : (!llvm.float, !llvm<"<4 x float>">) -> !llvm.float + {reassoc = true} : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%add1r) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 11 %add2 = "llvm.intr.experimental.vector.reduce.v2.fadd"(%1, %v) - : (!llvm.float, !llvm<"<4 x float>">) -> !llvm.float + : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%add2) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 12 %add2r = "llvm.intr.experimental.vector.reduce.v2.fadd"(%1, %v) - {reassoc = true} : (!llvm.float, !llvm<"<4 x float>">) -> !llvm.float + {reassoc = true} : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%add2r) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 12 %mul1 = "llvm.intr.experimental.vector.reduce.v2.fmul"(%0, %v) - : (!llvm.float, !llvm<"<4 x float>">) -> !llvm.float + : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%mul1) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 24 %mul1r = "llvm.intr.experimental.vector.reduce.v2.fmul"(%0, %v) - {reassoc = true} : (!llvm.float, !llvm<"<4 x float>">) -> !llvm.float + {reassoc = true} : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%mul1r) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 24 %mul2 = "llvm.intr.experimental.vector.reduce.v2.fmul"(%1, %v) - : (!llvm.float, !llvm<"<4 x float>">) -> !llvm.float + : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%mul2) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 48 %mul2r = "llvm.intr.experimental.vector.reduce.v2.fmul"(%1, %v) - {reassoc = true} : (!llvm.float, !llvm<"<4 x float>">) -> !llvm.float + {reassoc = true} : (!llvm.float, !llvm.vec<4 x float>) -> !llvm.float llvm.call @print_f32(%mul2r) : (!llvm.float) -> () llvm.call @print_newline() : () -> () // CHECK: 48 diff --git a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir index 3fa556c62b31e..227c00a08ad82 100644 --- a/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir +++ b/mlir/integration_test/Dialect/LLVMIR/CPU/test-vector-reductions-int.mlir @@ -12,68 +12,68 @@ module { %1 = llvm.mlir.constant(2 : i32) : !llvm.i32 %2 = llvm.mlir.constant(3 : i32) : !llvm.i32 %3 = llvm.mlir.constant(4 : i32) : !llvm.i32 - %4 = llvm.mlir.undef : !llvm<"<4 x i32>"> + %4 = llvm.mlir.undef : !llvm.vec<4 x i32> %5 = llvm.mlir.constant(0 : index) : !llvm.i64 - %6 = llvm.insertelement %0, %4[%5 : !llvm.i64] : !llvm<"<4 x i32>"> + %6 = llvm.insertelement %0, %4[%5 : !llvm.i64] : !llvm.vec<4 x i32> %7 = llvm.shufflevector %6, %4 [0 : i32, 0 : i32, 0 : i32, 0 : i32] - : !llvm<"<4 x i32>">, !llvm<"<4 x i32>"> + : !llvm.vec<4 x i32>, !llvm.vec<4 x i32> %8 = llvm.mlir.constant(1 : i64) : !llvm.i64 - %9 = llvm.insertelement %1, %7[%8 : !llvm.i64] : !llvm<"<4 x i32>"> + %9 = llvm.insertelement %1, %7[%8 : !llvm.i64] : !llvm.vec<4 x i32> %10 = llvm.mlir.constant(2 : i64) : !llvm.i64 - %11 = llvm.insertelement %2, %9[%10 : !llvm.i64] : !llvm<"<4 x i32>"> + %11 = llvm.insertelement %2, %9[%10 : !llvm.i64] : !llvm.vec<4 x i32> %12 = llvm.mlir.constant(3 : i64) : !llvm.i64 - %v = llvm.insertelement %3, %11[%12 : !llvm.i64] : !llvm<"<4 x i32>"> + %v = llvm.insertelement %3, %11[%12 : !llvm.i64] : !llvm.vec<4 x i32> %add = "llvm.intr.experimental.vector.reduce.add"(%v) - : (!llvm<"<4 x i32>">) -> !llvm.i32 + : (!llvm.vec<4 x i32>) -> !llvm.i32 llvm.call @print_i32(%add) : (!llvm.i32) -> () llvm.call @print_newline() : () -> () // CHECK: 10 %and = "llvm.intr.experimental.vector.reduce.and"(%v) - : (!llvm<"<4 x i32>">) -> !llvm.i32 + : (!llvm.vec<4 x i32>) -> !llvm.i32 llvm.call @print_i32(%and) : (!llvm.i32) -> () llvm.call @print_newline() : () -> () // CHECK: 0 %mul = "llvm.intr.experimental.vector.reduce.mul"(%v) - : (!llvm<"<4 x i32>">) -> !llvm.i32 + : (!llvm.vec<4 x i32>) -> !llvm.i32 llvm.call @print_i32(%mul) : (!llvm.i32) -> () llvm.call @print_newline() : () -> () // CHECK: 24 %or = "llvm.intr.experimental.vector.reduce.or"(%v) - : (!llvm<"<4 x i32>">) -> !llvm.i32 + : (!llvm.vec<4 x i32>) -> !llvm.i32 llvm.call @print_i32(%or) : (!llvm.i32) -> () llvm.call @print_newline() : () -> () // CHECK: 7 %smax = "llvm.intr.experimental.vector.reduce.smax"(%v) - : (!llvm<"<4 x i32>">) -> !llvm.i32 + : (!llvm.vec<4 x i32>) -> !llvm.i32 llvm.call @print_i32(%smax) : (!llvm.i32) -> () llvm.call @print_newline() : () -> () // CHECK: 4 %smin = "llvm.intr.experimental.vector.reduce.smin"(%v) - : (!llvm<"<4 x i32>">) -> !llvm.i32 + : (!llvm.vec<4 x i32>) -> !llvm.i32 llvm.call @print_i32(%smin) : (!llvm.i32) -> () llvm.call @print_newline() : () -> () // CHECK: 1 %umax = "llvm.intr.experimental.vector.reduce.umax"(%v) - : (!llvm<"<4 x i32>">) -> !llvm.i32 + : (!llvm.vec<4 x i32>) -> !llvm.i32 llvm.call @print_i32(%umax) : (!llvm.i32) -> () llvm.call @print_newline() : () -> () // CHECK: 4 %umin = "llvm.intr.experimental.vector.reduce.umin"(%v) - : (!llvm<"<4 x i32>">) -> !llvm.i32 + : (!llvm.vec<4 x i32>) -> !llvm.i32 llvm.call @print_i32(%umin) : (!llvm.i32) -> () llvm.call @print_newline() : () -> () // CHECK: 1 %xor = "llvm.intr.experimental.vector.reduce.xor"(%v) - : (!llvm<"<4 x i32>">) -> !llvm.i32 + : (!llvm.vec<4 x i32>) -> !llvm.i32 llvm.call @print_i32(%xor) : (!llvm.i32) -> () llvm.call @print_newline() : () -> () // CHECK: 4 diff --git a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt index 569619f2bcef2..eeefd372f85f5 100644 --- a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt @@ -14,6 +14,7 @@ add_mlir_conversion_library(MLIRVectorToLLVM LINK_LIBS PUBLIC MLIRLLVMIR MLIRStandardToLLVM - MLIRVector + MLIRTargetLLVMIRModuleTranslation MLIRTransforms + MLIRVector ) diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 4fa7b573f84ef..3dbfaf88a443b 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -23,6 +23,7 @@ #include "mlir/IR/PatternMatch.h" #include "mlir/IR/StandardTypes.h" #include "mlir/IR/Types.h" +#include "mlir/Target/LLVMIR/TypeTranslation.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" #include "llvm/IR/DerivedTypes.h" @@ -126,8 +127,10 @@ LogicalResult getMemRefAlignment(LLVMTypeConverter &typeConverter, T op, return failure(); auto dataLayout = typeConverter.getDialect()->getLLVMModule().getDataLayout(); - align = dataLayout.getPrefTypeAlignment( - LLVM::convertLLVMType(elementTy.cast())); + // TODO: this should be abstracted away to avoid depending on translation. + align = dataLayout.getPrefTypeAlignment(LLVM::translateTypeToLLVMIR( + elementTy.cast(), + typeConverter.getDialect()->getLLVMContext())); return success(); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 17848c6bf3ee9..cc2200e84da57 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/FunctionImplementation.h" @@ -1691,12 +1692,6 @@ struct LLVMDialectImpl { llvm::LLVMContext llvmContext; llvm::Module module; - /// A set of LLVMTypes that are cached on construction to avoid any lookups or - /// locking. - LLVMType int1Ty, int8Ty, int16Ty, int32Ty, int64Ty, int128Ty; - LLVMType doubleTy, floatTy, bfloatTy, halfTy, fp128Ty, x86_fp80Ty; - LLVMType voidTy; - /// A smart mutex to lock access to the llvm context. Unlike MLIR, LLVM is not /// multi-threaded and requires locked access to prevent race conditions. llvm::sys::SmartMutex mutex; @@ -1708,7 +1703,27 @@ struct LLVMDialectImpl { LLVMDialect::LLVMDialect(MLIRContext *context) : Dialect(getDialectNamespace(), context), impl(new detail::LLVMDialectImpl()) { - addTypes(); + // clang-format off + addTypes(); + // clang-format on addOperations< #define GET_OP_LIST #include "mlir/Dialect/LLVMIR/LLVMOps.cpp.inc" @@ -1716,26 +1731,6 @@ LLVMDialect::LLVMDialect(MLIRContext *context) // Support unknown operations because not all LLVM operations are registered. allowUnknownOperations(); - - // Cache some of the common LLVM types to avoid the need for lookups/locking. - auto &llvmContext = impl->llvmContext; - /// Integer Types. - impl->int1Ty = LLVMType::get(context, llvm::Type::getInt1Ty(llvmContext)); - impl->int8Ty = LLVMType::get(context, llvm::Type::getInt8Ty(llvmContext)); - impl->int16Ty = LLVMType::get(context, llvm::Type::getInt16Ty(llvmContext)); - impl->int32Ty = LLVMType::get(context, llvm::Type::getInt32Ty(llvmContext)); - impl->int64Ty = LLVMType::get(context, llvm::Type::getInt64Ty(llvmContext)); - impl->int128Ty = LLVMType::get(context, llvm::Type::getInt128Ty(llvmContext)); - /// Float Types. - impl->doubleTy = LLVMType::get(context, llvm::Type::getDoubleTy(llvmContext)); - impl->floatTy = LLVMType::get(context, llvm::Type::getFloatTy(llvmContext)); - impl->bfloatTy = LLVMType::get(context, llvm::Type::getBFloatTy(llvmContext)); - impl->halfTy = LLVMType::get(context, llvm::Type::getHalfTy(llvmContext)); - impl->fp128Ty = LLVMType::get(context, llvm::Type::getFP128Ty(llvmContext)); - impl->x86_fp80Ty = - LLVMType::get(context, llvm::Type::getX86_FP80Ty(llvmContext)); - /// Other Types. - impl->voidTy = LLVMType::get(context, llvm::Type::getVoidTy(llvmContext)); } LLVMDialect::~LLVMDialect() {} @@ -1751,25 +1746,12 @@ llvm::sys::SmartMutex &LLVMDialect::getLLVMContextMutex() { /// Parse a type registered to this dialect. Type LLVMDialect::parseType(DialectAsmParser &parser) const { - StringRef tyData = parser.getFullSymbolSpec(); - - // LLVM is not thread-safe, so lock access to it. - llvm::sys::SmartScopedLock lock(impl->mutex); - - llvm::SMDiagnostic errorMessage; - llvm::Type *type = llvm::parseType(tyData, errorMessage, impl->module); - if (!type) - return (parser.emitError(parser.getNameLoc(), errorMessage.getMessage()), - nullptr); - return LLVMType::get(getContext(), type); + return detail::parseType(parser); } /// Print a type registered to this dialect. void LLVMDialect::printType(Type type, DialectAsmPrinter &os) const { - auto llvmType = type.dyn_cast(); - assert(llvmType && "printing wrong type"); - assert(llvmType.getUnderlyingType() && "no underlying LLVM type"); - llvmType.getUnderlyingType()->print(os.getStream()); + return detail::printType(type.cast(), os); } /// Verify LLVMIR function argument attributes. @@ -1788,242 +1770,6 @@ LogicalResult LLVMDialect::verifyRegionArgAttribute(Operation *op, return success(); } -//===----------------------------------------------------------------------===// -// LLVMType. -//===----------------------------------------------------------------------===// - -namespace mlir { -namespace LLVM { -namespace detail { -struct LLVMTypeStorage : public ::mlir::TypeStorage { - LLVMTypeStorage(llvm::Type *ty) : underlyingType(ty) {} - - // LLVM types are pointer-unique. - using KeyTy = llvm::Type *; - bool operator==(const KeyTy &key) const { return key == underlyingType; } - - static LLVMTypeStorage *construct(TypeStorageAllocator &allocator, - llvm::Type *ty) { - return new (allocator.allocate()) LLVMTypeStorage(ty); - } - - llvm::Type *underlyingType; -}; -} // end namespace detail -} // end namespace LLVM -} // end namespace mlir - -LLVMType LLVMType::get(MLIRContext *context, llvm::Type *llvmType) { - return Base::get(context, FIRST_LLVM_TYPE, llvmType); -} - -/// Get an LLVMType with an llvm type that may cause changes to the underlying -/// llvm context when constructed. -LLVMType LLVMType::getLocked(LLVMDialect *dialect, - function_ref typeBuilder) { - // Lock access to the llvm context and build the type. - llvm::sys::SmartScopedLock lock(dialect->impl->mutex); - return get(dialect->getContext(), typeBuilder()); -} - -LLVMDialect &LLVMType::getDialect() { - return static_cast(Type::getDialect()); -} - -llvm::Type *LLVMType::getUnderlyingType() const { - return getImpl()->underlyingType; -} - -void LLVMType::getUnderlyingTypes(ArrayRef types, - SmallVectorImpl &result) { - result.reserve(result.size() + types.size()); - for (LLVMType ty : types) - result.push_back(ty.getUnderlyingType()); -} - -/// Array type utilities. -LLVMType LLVMType::getArrayElementType() { - return get(getContext(), getUnderlyingType()->getArrayElementType()); -} -unsigned LLVMType::getArrayNumElements() { - return getUnderlyingType()->getArrayNumElements(); -} -bool LLVMType::isArrayTy() { return getUnderlyingType()->isArrayTy(); } - -/// Vector type utilities. -LLVMType LLVMType::getVectorElementType() { - return get( - getContext(), - llvm::cast(getUnderlyingType())->getElementType()); -} -unsigned LLVMType::getVectorNumElements() { - return llvm::cast(getUnderlyingType()) - ->getNumElements(); -} -llvm::ElementCount LLVMType::getVectorElementCount() { - return llvm::cast(getUnderlyingType())->getElementCount(); -} -bool LLVMType::isVectorTy() { return getUnderlyingType()->isVectorTy(); } - -/// Function type utilities. -LLVMType LLVMType::getFunctionParamType(unsigned argIdx) { - return get(getContext(), getUnderlyingType()->getFunctionParamType(argIdx)); -} -unsigned LLVMType::getFunctionNumParams() { - return getUnderlyingType()->getFunctionNumParams(); -} -LLVMType LLVMType::getFunctionResultType() { - return get( - getContext(), - llvm::cast(getUnderlyingType())->getReturnType()); -} -bool LLVMType::isFunctionTy() { return getUnderlyingType()->isFunctionTy(); } -bool LLVMType::isFunctionVarArg() { - return getUnderlyingType()->isFunctionVarArg(); -} - -/// Pointer type utilities. -LLVMType LLVMType::getPointerTo(unsigned addrSpace) { - // Lock access to the dialect as this may modify the LLVM context. - return getLocked(&getDialect(), [=] { - return getUnderlyingType()->getPointerTo(addrSpace); - }); -} -LLVMType LLVMType::getPointerElementTy() { - return get(getContext(), getUnderlyingType()->getPointerElementType()); -} -bool LLVMType::isPointerTy() { return getUnderlyingType()->isPointerTy(); } -bool LLVMType::isValidPointerElementType(LLVMType type) { - return llvm::PointerType::isValidElementType(type.getUnderlyingType()); -} - -/// Struct type utilities. -LLVMType LLVMType::getStructElementType(unsigned i) { - return get(getContext(), getUnderlyingType()->getStructElementType(i)); -} -unsigned LLVMType::getStructNumElements() { - return getUnderlyingType()->getStructNumElements(); -} -bool LLVMType::isStructTy() { return getUnderlyingType()->isStructTy(); } - -/// Utilities used to generate floating point types. -LLVMType LLVMType::getDoubleTy(LLVMDialect *dialect) { - return dialect->impl->doubleTy; -} -LLVMType LLVMType::getFloatTy(LLVMDialect *dialect) { - return dialect->impl->floatTy; -} -LLVMType LLVMType::getBFloatTy(LLVMDialect *dialect) { - return dialect->impl->bfloatTy; -} -LLVMType LLVMType::getHalfTy(LLVMDialect *dialect) { - return dialect->impl->halfTy; -} -LLVMType LLVMType::getFP128Ty(LLVMDialect *dialect) { - return dialect->impl->fp128Ty; -} -LLVMType LLVMType::getX86_FP80Ty(LLVMDialect *dialect) { - return dialect->impl->x86_fp80Ty; -} - -/// Utilities used to generate integer types. -LLVMType LLVMType::getIntNTy(LLVMDialect *dialect, unsigned numBits) { - switch (numBits) { - case 1: - return dialect->impl->int1Ty; - case 8: - return dialect->impl->int8Ty; - case 16: - return dialect->impl->int16Ty; - case 32: - return dialect->impl->int32Ty; - case 64: - return dialect->impl->int64Ty; - case 128: - return dialect->impl->int128Ty; - default: - break; - } - - // Lock access to the dialect as this may modify the LLVM context. - return getLocked(dialect, [=] { - return llvm::Type::getIntNTy(dialect->getLLVMContext(), numBits); - }); -} - -/// Utilities used to generate other miscellaneous types. -LLVMType LLVMType::getArrayTy(LLVMType elementType, uint64_t numElements) { - // Lock access to the dialect as this may modify the LLVM context. - return getLocked(&elementType.getDialect(), [=] { - return llvm::ArrayType::get(elementType.getUnderlyingType(), numElements); - }); -} -LLVMType LLVMType::getFunctionTy(LLVMType result, ArrayRef params, - bool isVarArg) { - SmallVector llvmParams; - for (auto param : params) - llvmParams.push_back(param.getUnderlyingType()); - - // Lock access to the dialect as this may modify the LLVM context. - return getLocked(&result.getDialect(), [=] { - return llvm::FunctionType::get(result.getUnderlyingType(), llvmParams, - isVarArg); - }); -} -LLVMType LLVMType::getStructTy(LLVMDialect *dialect, - ArrayRef elements, bool isPacked) { - SmallVector llvmElements; - for (auto elt : elements) - llvmElements.push_back(elt.getUnderlyingType()); - - // Lock access to the dialect as this may modify the LLVM context. - return getLocked(dialect, [=] { - return llvm::StructType::get(dialect->getLLVMContext(), llvmElements, - isPacked); - }); -} -LLVMType LLVMType::createStructTy(LLVMDialect *dialect, - ArrayRef elements, - Optional name, bool isPacked) { - StringRef sr = name.hasValue() ? *name : ""; - SmallVector llvmElements; - getUnderlyingTypes(elements, llvmElements); - return getLocked(dialect, [=] { - auto *rv = llvm::StructType::create(dialect->getLLVMContext(), sr); - if (!llvmElements.empty()) - rv->setBody(llvmElements, isPacked); - return rv; - }); -} -LLVMType LLVMType::setStructTyBody(LLVMType structType, - ArrayRef elements, bool isPacked) { - llvm::StructType *st = - llvm::cast(structType.getUnderlyingType()); - SmallVector llvmElements; - getUnderlyingTypes(elements, llvmElements); - return getLocked(&structType.getDialect(), [=] { - st->setBody(llvmElements, isPacked); - return st; - }); -} -LLVMType LLVMType::getVectorTy(LLVMType elementType, unsigned numElements) { - // Lock access to the dialect as this may modify the LLVM context. - return getLocked(&elementType.getDialect(), [=] { - return llvm::FixedVectorType::get(elementType.getUnderlyingType(), - numElements); - }); -} - -LLVMType LLVMType::getVoidTy(LLVMDialect *dialect) { - return dialect->impl->voidTy; -} - -bool LLVMType::isVoidTy() { return getUnderlyingType()->isVoidTy(); } - -llvm::Type *mlir::LLVM::convertLLVMType(LLVMType type) { - return type.getUnderlyingType(); -} - //===----------------------------------------------------------------------===// // Utility functions. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp index d272297525c1e..7df3ebe7c5b4e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp @@ -18,48 +18,48 @@ using namespace mlir::LLVM; // Printing. //===----------------------------------------------------------------------===// -static void printTypeImpl(llvm::raw_ostream &os, LLVMTypeNew type, +static void printTypeImpl(llvm::raw_ostream &os, LLVMType type, llvm::SetVector &stack); /// Returns the keyword to use for the given type. -static StringRef getTypeKeyword(LLVMTypeNew type) { +static StringRef getTypeKeyword(LLVMType type) { switch (type.getKind()) { - case LLVMTypeNew::VoidType: + case LLVMType::VoidType: return "void"; - case LLVMTypeNew::HalfType: + case LLVMType::HalfType: return "half"; - case LLVMTypeNew::BFloatType: + case LLVMType::BFloatType: return "bfloat"; - case LLVMTypeNew::FloatType: + case LLVMType::FloatType: return "float"; - case LLVMTypeNew::DoubleType: + case LLVMType::DoubleType: return "double"; - case LLVMTypeNew::FP128Type: + case LLVMType::FP128Type: return "fp128"; - case LLVMTypeNew::X86FP80Type: + case LLVMType::X86FP80Type: return "x86_fp80"; - case LLVMTypeNew::PPCFP128Type: + case LLVMType::PPCFP128Type: return "ppc_fp128"; - case LLVMTypeNew::X86MMXType: + case LLVMType::X86MMXType: return "x86_mmx"; - case LLVMTypeNew::TokenType: + case LLVMType::TokenType: return "token"; - case LLVMTypeNew::LabelType: + case LLVMType::LabelType: return "label"; - case LLVMTypeNew::MetadataType: + case LLVMType::MetadataType: return "metadata"; - case LLVMTypeNew::FunctionType: + case LLVMType::FunctionType: return "func"; - case LLVMTypeNew::IntegerType: + case LLVMType::IntegerType: return "i"; - case LLVMTypeNew::PointerType: + case LLVMType::PointerType: return "ptr"; - case LLVMTypeNew::FixedVectorType: - case LLVMTypeNew::ScalableVectorType: + case LLVMType::FixedVectorType: + case LLVMType::ScalableVectorType: return "vec"; - case LLVMTypeNew::ArrayType: + case LLVMType::ArrayType: return "array"; - case LLVMTypeNew::StructType: + case LLVMType::StructType: return "struct"; } llvm_unreachable("unhandled type kind"); @@ -81,7 +81,7 @@ static void printStructTypeBody(llvm::raw_ostream &os, LLVMStructType type, os << '('; if (type.isIdentified()) stack.insert(type.getName()); - llvm::interleaveComma(type.getBody(), os, [&](LLVMTypeNew subtype) { + llvm::interleaveComma(type.getBody(), os, [&](LLVMType subtype) { printTypeImpl(os, subtype, stack); }); if (type.isIdentified()) @@ -126,10 +126,9 @@ static void printFunctionType(llvm::raw_ostream &os, LLVMFunctionType funcType, os << '<'; printTypeImpl(os, funcType.getReturnType(), stack); os << " ("; - llvm::interleaveComma(funcType.getParams(), os, - [&os, &stack](LLVMTypeNew subtype) { - printTypeImpl(os, subtype, stack); - }); + llvm::interleaveComma( + funcType.getParams(), os, + [&os, &stack](LLVMType subtype) { printTypeImpl(os, subtype, stack); }); if (funcType.isVarArg()) { if (funcType.getNumParams() != 0) os << ", "; @@ -147,7 +146,7 @@ static void printFunctionType(llvm::raw_ostream &os, LLVMFunctionType funcType, /// struct<"c", (ptr>)>>, /// ptr>)>>)> /// note that "b" is printed twice. -static void printTypeImpl(llvm::raw_ostream &os, LLVMTypeNew type, +static void printTypeImpl(llvm::raw_ostream &os, LLVMType type, llvm::SetVector &stack) { if (!type) { os << "<>"; @@ -158,8 +157,8 @@ static void printTypeImpl(llvm::raw_ostream &os, LLVMTypeNew type, os << getTypeKeyword(type); // Trivial types only consist of their keyword. - if (LLVMTypeNew::FIRST_TRIVIAL_TYPE <= kind && - kind <= LLVMTypeNew::LAST_TRIVIAL_TYPE) + if (LLVMType::FIRST_TRIVIAL_TYPE <= kind && + kind <= LLVMType::LAST_TRIVIAL_TYPE) return; if (auto intType = type.dyn_cast()) { @@ -194,8 +193,7 @@ static void printTypeImpl(llvm::raw_ostream &os, LLVMTypeNew type, printFunctionType(os, type.cast(), stack); } -void mlir::LLVM::detail::printType(LLVMTypeNew type, - DialectAsmPrinter &printer) { +void mlir::LLVM::detail::printType(LLVMType type, DialectAsmPrinter &printer) { llvm::SetVector stack; return printTypeImpl(printer.getStream(), type, stack); } @@ -204,13 +202,13 @@ void mlir::LLVM::detail::printType(LLVMTypeNew type, // Parsing. //===----------------------------------------------------------------------===// -static LLVMTypeNew parseTypeImpl(DialectAsmParser &parser, - llvm::SetVector &stack); +static LLVMType parseTypeImpl(DialectAsmParser &parser, + llvm::SetVector &stack); /// Helper to be chained with other parsing functions. static ParseResult parseTypeImpl(DialectAsmParser &parser, llvm::SetVector &stack, - LLVMTypeNew &result) { + LLVMType &result) { result = parseTypeImpl(parser, stack); return success(result != nullptr); } @@ -219,7 +217,7 @@ static ParseResult parseTypeImpl(DialectAsmParser &parser, /// llvm-type :: = `func<` llvm-type `(` llvm-type-list `...`? `)>` static LLVMFunctionType parseFunctionType(DialectAsmParser &parser, llvm::SetVector &stack) { - LLVMTypeNew returnType; + LLVMType returnType; if (parser.parseLess() || parseTypeImpl(parser, stack, returnType) || parser.parseLParen()) return LLVMFunctionType(); @@ -232,7 +230,7 @@ static LLVMFunctionType parseFunctionType(DialectAsmParser &parser, } // Parse arguments. - SmallVector argTypes; + SmallVector argTypes; do { if (succeeded(parser.parseOptionalEllipsis())) { if (parser.parseOptionalRParen() || parser.parseOptionalGreater()) @@ -254,7 +252,7 @@ static LLVMFunctionType parseFunctionType(DialectAsmParser &parser, /// llvm-type ::= `ptr<` llvm-type (`,` integer)? `>` static LLVMPointerType parsePointerType(DialectAsmParser &parser, llvm::SetVector &stack) { - LLVMTypeNew elementType; + LLVMType elementType; if (parser.parseLess() || parseTypeImpl(parser, stack, elementType)) return LLVMPointerType(); @@ -274,7 +272,7 @@ static LLVMVectorType parseVectorType(DialectAsmParser &parser, llvm::SetVector &stack) { SmallVector dims; llvm::SMLoc dimPos; - LLVMTypeNew elementType; + LLVMType elementType; if (parser.parseLess() || parser.getCurrentLocation(&dimPos) || parser.parseDimensionList(dims, /*allowDynamic=*/true) || parseTypeImpl(parser, stack, elementType) || parser.parseGreater()) @@ -304,7 +302,7 @@ static LLVMArrayType parseArrayType(DialectAsmParser &parser, llvm::SetVector &stack) { SmallVector dims; llvm::SMLoc sizePos; - LLVMTypeNew elementType; + LLVMType elementType; if (parser.parseLess() || parser.getCurrentLocation(&sizePos) || parser.parseDimensionList(dims, /*allowDynamic=*/false) || parseTypeImpl(parser, stack, elementType) || parser.parseGreater()) @@ -322,7 +320,7 @@ static LLVMArrayType parseArrayType(DialectAsmParser &parser, /// error at `subtypesLoc` in case of failure, uses `stack` to make sure the /// types printed in the error message look like they did when parsed. static LLVMStructType trySetStructBody(LLVMStructType type, - ArrayRef subtypes, + ArrayRef subtypes, bool isPacked, DialectAsmParser &parser, llvm::SMLoc subtypesLoc, llvm::SetVector &stack) { @@ -398,12 +396,12 @@ static LLVMStructType parseStructType(DialectAsmParser &parser, // Parse subtypes. For identified structs, put the identifier of the struct on // the stack to support self-references in the recursive calls. - SmallVector subtypes; + SmallVector subtypes; llvm::SMLoc subtypesLoc = parser.getCurrentLocation(); do { if (isIdentified) stack.insert(name); - LLVMTypeNew type = parseTypeImpl(parser, stack); + LLVMType type = parseTypeImpl(parser, stack); if (!type) return LLVMStructType(); subtypes.push_back(type); @@ -422,8 +420,8 @@ static LLVMStructType parseStructType(DialectAsmParser &parser, } /// Parses one of the LLVM dialect types. -static LLVMTypeNew parseTypeImpl(DialectAsmParser &parser, - llvm::SetVector &stack) { +static LLVMType parseTypeImpl(DialectAsmParser &parser, + llvm::SetVector &stack) { // Special case for integers (i[1-9][0-9]*) that are literals rather than // keywords for the parser, so they are not caught by the main dispatch below. // Try parsing it a built-in integer type instead. @@ -433,11 +431,11 @@ static LLVMTypeNew parseTypeImpl(DialectAsmParser &parser, OptionalParseResult result = parser.parseOptionalType(maybeIntegerType); if (result.hasValue()) { if (failed(*result)) - return LLVMTypeNew(); + return LLVMType(); if (!maybeIntegerType.isSignlessInteger()) { parser.emitError(keyLoc) << "unexpected type, expected i* or keyword"; - return LLVMTypeNew(); + return LLVMType(); } return LLVMIntegerType::get(ctx, maybeIntegerType.getIntOrFloatBitWidth()); } @@ -445,9 +443,9 @@ static LLVMTypeNew parseTypeImpl(DialectAsmParser &parser, // Dispatch to concrete functions. StringRef key; if (failed(parser.parseKeyword(&key))) - return LLVMTypeNew(); + return LLVMType(); - return llvm::StringSwitch>(key) + return llvm::StringSwitch>(key) .Case("void", [&] { return LLVMVoidType::get(ctx); }) .Case("half", [&] { return LLVMHalfType::get(ctx); }) .Case("bfloat", [&] { return LLVMBFloatType::get(ctx); }) @@ -467,11 +465,11 @@ static LLVMTypeNew parseTypeImpl(DialectAsmParser &parser, .Case("struct", [&] { return parseStructType(parser, stack); }) .Default([&] { parser.emitError(keyLoc) << "unknown LLVM type: " << key; - return LLVMTypeNew(); + return LLVMType(); })(); } -LLVMTypeNew mlir::LLVM::detail::parseType(DialectAsmParser &parser) { +LLVMType mlir::LLVM::detail::parseType(DialectAsmParser &parser) { llvm::SetVector stack; return parseTypeImpl(parser, stack); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index abecbccb1d4aa..fa25f2dcdad85 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -24,92 +24,89 @@ using namespace mlir; using namespace mlir::LLVM; //===----------------------------------------------------------------------===// -// LLVMTypeNew. +// LLVMType. //===----------------------------------------------------------------------===// -// TODO: when these types are registered with the LLVMDialect, this method -// should be removed and the regular Type::getDialect should just work. -LLVMDialect &LLVMTypeNew::getDialect() { - return *getContext()->getRegisteredDialect(); +LLVMDialect &LLVMType::getDialect() { + return static_cast(Type::getDialect()); } //----------------------------------------------------------------------------// // Integer type utilities. -bool LLVMTypeNew::isIntegerTy(unsigned bitwidth) { +bool LLVMType::isIntegerTy(unsigned bitwidth) { if (auto intType = dyn_cast()) return intType.getBitWidth() == bitwidth; return false; } - -unsigned LLVMTypeNew::getIntegerBitWidth() { +unsigned LLVMType::getIntegerBitWidth() { return cast().getBitWidth(); } -LLVMTypeNew LLVMTypeNew::getArrayElementType() { +LLVMType LLVMType::getArrayElementType() { return cast().getElementType(); } //----------------------------------------------------------------------------// // Array type utilities. -unsigned LLVMTypeNew::getArrayNumElements() { +unsigned LLVMType::getArrayNumElements() { return cast().getNumElements(); } -bool LLVMTypeNew::isArrayTy() { return isa(); } +bool LLVMType::isArrayTy() { return isa(); } //----------------------------------------------------------------------------// // Vector type utilities. -LLVMTypeNew LLVMTypeNew::getVectorElementType() { +LLVMType LLVMType::getVectorElementType() { return cast().getElementType(); } -unsigned LLVMTypeNew::getVectorNumElements() { +unsigned LLVMType::getVectorNumElements() { return cast().getNumElements(); } -llvm::ElementCount LLVMTypeNew::getVectorElementCount() { +llvm::ElementCount LLVMType::getVectorElementCount() { return cast().getElementCount(); } -bool LLVMTypeNew::isVectorTy() { return isa(); } +bool LLVMType::isVectorTy() { return isa(); } //----------------------------------------------------------------------------// // Function type utilities. -LLVMTypeNew LLVMTypeNew::getFunctionParamType(unsigned argIdx) { +LLVMType LLVMType::getFunctionParamType(unsigned argIdx) { return cast().getParamType(argIdx); } -unsigned LLVMTypeNew::getFunctionNumParams() { +unsigned LLVMType::getFunctionNumParams() { return cast().getNumParams(); } -LLVMTypeNew LLVMTypeNew::getFunctionResultType() { +LLVMType LLVMType::getFunctionResultType() { return cast().getReturnType(); } -bool LLVMTypeNew::isFunctionTy() { return isa(); } +bool LLVMType::isFunctionTy() { return isa(); } -bool LLVMTypeNew::isFunctionVarArg() { +bool LLVMType::isFunctionVarArg() { return cast().isVarArg(); } //----------------------------------------------------------------------------// // Pointer type utilities. -LLVMTypeNew LLVMTypeNew::getPointerTo(unsigned addrSpace) { +LLVMType LLVMType::getPointerTo(unsigned addrSpace) { return LLVMPointerType::get(*this, addrSpace); } -LLVMTypeNew LLVMTypeNew::getPointerElementTy() { +LLVMType LLVMType::getPointerElementTy() { return cast().getElementType(); } -bool LLVMTypeNew::isPointerTy() { return isa(); } +bool LLVMType::isPointerTy() { return isa(); } -bool LLVMTypeNew::isValidPointerElementType(LLVMTypeNew type) { +bool LLVMType::isValidPointerElementType(LLVMType type) { return !type.isa() && !type.isa() && !type.isa() && !type.isa(); } @@ -117,91 +114,86 @@ bool LLVMTypeNew::isValidPointerElementType(LLVMTypeNew type) { //----------------------------------------------------------------------------// // Struct type utilities. -LLVMTypeNew LLVMTypeNew::getStructElementType(unsigned i) { +LLVMType LLVMType::getStructElementType(unsigned i) { return cast().getBody()[i]; } -unsigned LLVMTypeNew::getStructNumElements() { +unsigned LLVMType::getStructNumElements() { return cast().getBody().size(); } -bool LLVMTypeNew::isStructTy() { return isa(); } +bool LLVMType::isStructTy() { return isa(); } //----------------------------------------------------------------------------// // Utilities used to generate floating point types. -LLVMTypeNew LLVMTypeNew::getDoubleTy(LLVMDialect *dialect) { +LLVMType LLVMType::getDoubleTy(LLVMDialect *dialect) { return LLVMDoubleType::get(dialect->getContext()); } -LLVMTypeNew LLVMTypeNew::getFloatTy(LLVMDialect *dialect) { +LLVMType LLVMType::getFloatTy(LLVMDialect *dialect) { return LLVMFloatType::get(dialect->getContext()); } -LLVMTypeNew LLVMTypeNew::getBFloatTy(LLVMDialect *dialect) { +LLVMType LLVMType::getBFloatTy(LLVMDialect *dialect) { return LLVMBFloatType::get(dialect->getContext()); } -LLVMTypeNew LLVMTypeNew::getHalfTy(LLVMDialect *dialect) { +LLVMType LLVMType::getHalfTy(LLVMDialect *dialect) { return LLVMHalfType::get(dialect->getContext()); } -LLVMTypeNew LLVMTypeNew::getFP128Ty(LLVMDialect *dialect) { +LLVMType LLVMType::getFP128Ty(LLVMDialect *dialect) { return LLVMFP128Type::get(dialect->getContext()); } -LLVMTypeNew LLVMTypeNew::getX86_FP80Ty(LLVMDialect *dialect) { +LLVMType LLVMType::getX86_FP80Ty(LLVMDialect *dialect) { return LLVMX86FP80Type::get(dialect->getContext()); } //----------------------------------------------------------------------------// // Utilities used to generate integer types. -LLVMTypeNew LLVMTypeNew::getIntNTy(LLVMDialect *dialect, unsigned numBits) { +LLVMType LLVMType::getIntNTy(LLVMDialect *dialect, unsigned numBits) { return LLVMIntegerType::get(dialect->getContext(), numBits); } //----------------------------------------------------------------------------// // Utilities used to generate other miscellaneous types. -LLVMTypeNew LLVMTypeNew::getArrayTy(LLVMTypeNew elementType, - uint64_t numElements) { +LLVMType LLVMType::getArrayTy(LLVMType elementType, uint64_t numElements) { return LLVMArrayType::get(elementType, numElements); } -LLVMTypeNew LLVMTypeNew::getFunctionTy(LLVMTypeNew result, - ArrayRef params, - bool isVarArg) { +LLVMType LLVMType::getFunctionTy(LLVMType result, ArrayRef params, + bool isVarArg) { return LLVMFunctionType::get(result, params, isVarArg); } -LLVMTypeNew LLVMTypeNew::getStructTy(LLVMDialect *dialect, - ArrayRef elements, - bool isPacked) { +LLVMType LLVMType::getStructTy(LLVMDialect *dialect, + ArrayRef elements, bool isPacked) { return LLVMStructType::getLiteral(dialect->getContext(), elements, isPacked); } -LLVMTypeNew LLVMTypeNew::getVectorTy(LLVMTypeNew elementType, - unsigned numElements) { +LLVMType LLVMType::getVectorTy(LLVMType elementType, unsigned numElements) { return LLVMFixedVectorType::get(elementType, numElements); } //----------------------------------------------------------------------------// // Void type utilities. -LLVMTypeNew LLVMTypeNew::getVoidTy(LLVMDialect *dialect) { +LLVMType LLVMType::getVoidTy(LLVMDialect *dialect) { return LLVMVoidType::get(dialect->getContext()); } -bool LLVMTypeNew::isVoidTy() { return isa(); } +bool LLVMType::isVoidTy() { return isa(); } //----------------------------------------------------------------------------// // Creation and setting of LLVM's identified struct types -LLVMTypeNew LLVMTypeNew::createStructTy(LLVMDialect *dialect, - ArrayRef elements, - Optional name, - bool isPacked) { +LLVMType LLVMType::createStructTy(LLVMDialect *dialect, + ArrayRef elements, + Optional name, bool isPacked) { assert(name.hasValue() && "identified structs with no identifier not supported"); StringRef stringNameBase = name.getValueOr(""); @@ -220,9 +212,8 @@ LLVMTypeNew LLVMTypeNew::createStructTy(LLVMDialect *dialect, } while (true); } -LLVMTypeNew LLVMTypeNew::setStructTyBody(LLVMTypeNew structType, - ArrayRef elements, - bool isPacked) { +LLVMType LLVMType::setStructTyBody(LLVMType structType, + ArrayRef elements, bool isPacked) { LogicalResult couldSet = structType.cast().setBody(elements, isPacked); assert(succeeded(couldSet) && "failed to set the body"); @@ -233,29 +224,28 @@ LLVMTypeNew LLVMTypeNew::setStructTyBody(LLVMTypeNew structType, //===----------------------------------------------------------------------===// // Array type. -LLVMArrayType LLVMArrayType::get(LLVMTypeNew elementType, - unsigned numElements) { +LLVMArrayType LLVMArrayType::get(LLVMType elementType, unsigned numElements) { assert(elementType && "expected non-null subtype"); - return Base::get(elementType.getContext(), LLVMTypeNew::ArrayType, - elementType, numElements); + return Base::get(elementType.getContext(), LLVMType::ArrayType, elementType, + numElements); } -LLVMTypeNew LLVMArrayType::getElementType() { return getImpl()->elementType; } +LLVMType LLVMArrayType::getElementType() { return getImpl()->elementType; } unsigned LLVMArrayType::getNumElements() { return getImpl()->numElements; } //===----------------------------------------------------------------------===// // Function type. -LLVMFunctionType LLVMFunctionType::get(LLVMTypeNew result, - ArrayRef arguments, +LLVMFunctionType LLVMFunctionType::get(LLVMType result, + ArrayRef arguments, bool isVarArg) { assert(result && "expected non-null result"); - return Base::get(result.getContext(), LLVMTypeNew::FunctionType, result, + return Base::get(result.getContext(), LLVMType::FunctionType, result, arguments, isVarArg); } -LLVMTypeNew LLVMFunctionType::getReturnType() { +LLVMType LLVMFunctionType::getReturnType() { return getImpl()->getReturnType(); } @@ -263,13 +253,13 @@ unsigned LLVMFunctionType::getNumParams() { return getImpl()->getArgumentTypes().size(); } -LLVMTypeNew LLVMFunctionType::getParamType(unsigned i) { +LLVMType LLVMFunctionType::getParamType(unsigned i) { return getImpl()->getArgumentTypes()[i]; } bool LLVMFunctionType::isVarArg() { return getImpl()->isVariadic(); } -ArrayRef LLVMFunctionType::getParams() { +ArrayRef LLVMFunctionType::getParams() { return getImpl()->getArgumentTypes(); } @@ -277,7 +267,7 @@ ArrayRef LLVMFunctionType::getParams() { // Integer type. LLVMIntegerType LLVMIntegerType::get(MLIRContext *ctx, unsigned bitwidth) { - return Base::get(ctx, LLVMTypeNew::IntegerType, bitwidth); + return Base::get(ctx, LLVMType::IntegerType, bitwidth); } unsigned LLVMIntegerType::getBitWidth() { return getImpl()->bitwidth; } @@ -285,14 +275,13 @@ unsigned LLVMIntegerType::getBitWidth() { return getImpl()->bitwidth; } //===----------------------------------------------------------------------===// // Pointer type. -LLVMPointerType LLVMPointerType::get(LLVMTypeNew pointee, - unsigned addressSpace) { +LLVMPointerType LLVMPointerType::get(LLVMType pointee, unsigned addressSpace) { assert(pointee && "expected non-null subtype"); - return Base::get(pointee.getContext(), LLVMTypeNew::PointerType, pointee, + return Base::get(pointee.getContext(), LLVMType::PointerType, pointee, addressSpace); } -LLVMTypeNew LLVMPointerType::getElementType() { return getImpl()->pointeeType; } +LLVMType LLVMPointerType::getElementType() { return getImpl()->pointeeType; } unsigned LLVMPointerType::getAddressSpace() { return getImpl()->addressSpace; } @@ -301,21 +290,20 @@ unsigned LLVMPointerType::getAddressSpace() { return getImpl()->addressSpace; } LLVMStructType LLVMStructType::getIdentified(MLIRContext *context, StringRef name) { - return Base::get(context, LLVMTypeNew::StructType, name, /*opaque=*/false); + return Base::get(context, LLVMType::StructType, name, /*opaque=*/false); } LLVMStructType LLVMStructType::getLiteral(MLIRContext *context, - ArrayRef types, + ArrayRef types, bool isPacked) { - return Base::get(context, LLVMTypeNew::StructType, types, isPacked); + return Base::get(context, LLVMType::StructType, types, isPacked); } LLVMStructType LLVMStructType::getOpaque(StringRef name, MLIRContext *context) { - return Base::get(context, LLVMTypeNew::StructType, name, /*opaque=*/true); + return Base::get(context, LLVMType::StructType, name, /*opaque=*/true); } -LogicalResult LLVMStructType::setBody(ArrayRef types, - bool isPacked) { +LogicalResult LLVMStructType::setBody(ArrayRef types, bool isPacked) { assert(isIdentified() && "can only set bodies of identified structs"); return Base::mutate(types, isPacked); } @@ -327,7 +315,7 @@ bool LLVMStructType::isOpaque() { } bool LLVMStructType::isInitialized() { return getImpl()->isInitialized(); } StringRef LLVMStructType::getName() { return getImpl()->getIdentifier(); } -ArrayRef LLVMStructType::getBody() { +ArrayRef LLVMStructType::getBody() { return isIdentified() ? getImpl()->getIdentifiedStructBody() : getImpl()->getTypeList(); } @@ -335,7 +323,7 @@ ArrayRef LLVMStructType::getBody() { //===----------------------------------------------------------------------===// // Vector types. -LLVMTypeNew LLVMVectorType::getElementType() { +LLVMType LLVMVectorType::getElementType() { // Both derived classes share the implementation type. return static_cast(impl)->elementType; } @@ -347,10 +335,10 @@ llvm::ElementCount LLVMVectorType::getElementCount() { this->isa()); } -LLVMFixedVectorType LLVMFixedVectorType::get(LLVMTypeNew elementType, +LLVMFixedVectorType LLVMFixedVectorType::get(LLVMType elementType, unsigned numElements) { assert(elementType && "expected non-null subtype"); - return Base::get(elementType.getContext(), LLVMTypeNew::FixedVectorType, + return Base::get(elementType.getContext(), LLVMType::FixedVectorType, elementType, numElements) .cast(); } @@ -359,10 +347,10 @@ unsigned LLVMFixedVectorType::getNumElements() { return getImpl()->numElements; } -LLVMScalableVectorType LLVMScalableVectorType::get(LLVMTypeNew elementType, +LLVMScalableVectorType LLVMScalableVectorType::get(LLVMType elementType, unsigned minNumElements) { assert(elementType && "expected non-null subtype"); - return Base::get(elementType.getContext(), LLVMTypeNew::ScalableVectorType, + return Base::get(elementType.getContext(), LLVMType::ScalableVectorType, elementType, minNumElements) .cast(); } diff --git a/mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h b/mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h index 2b72e43e51648..3f2cc13299a48 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h +++ b/mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h @@ -72,7 +72,7 @@ struct LLVMStructTypeStorage : public TypeStorage { Key(StringRef name, bool opaque) : name(name), identified(true), packed(false), opaque(opaque) {} /// Constructs a key for a literal struct. - Key(ArrayRef types, bool packed) + Key(ArrayRef types, bool packed) : types(types), identified(false), packed(packed), opaque(false) {} /// Checks a specific property of the struct. @@ -96,7 +96,7 @@ struct LLVMStructTypeStorage : public TypeStorage { } /// Returns the list of type contained in the key of a literal struct. - ArrayRef getTypeList() const { + ArrayRef getTypeList() const { assert(!isIdentified() && "identified struct key cannot have a type list"); return types; @@ -138,7 +138,7 @@ struct LLVMStructTypeStorage : public TypeStorage { } private: - ArrayRef types; + ArrayRef types; StringRef name; bool identified; bool packed; @@ -153,19 +153,18 @@ struct LLVMStructTypeStorage : public TypeStorage { } /// Returns the list of types (partially) identifying a literal struct. - ArrayRef getTypeList() const { + ArrayRef getTypeList() const { // If this triggers, use getIdentifiedStructBody() instead. assert(!isIdentified() && "requested typelist on an identified struct"); - return ArrayRef(static_cast(keyPtr), - keySize()); + return ArrayRef(static_cast(keyPtr), keySize()); } /// Returns the list of types contained in an identified struct. - ArrayRef getIdentifiedStructBody() const { + ArrayRef getIdentifiedStructBody() const { // If this triggers, use getTypeList() instead. assert(isIdentified() && "requested struct body on a non-identified struct"); - return ArrayRef(identifiedBodyArray, identifiedBodySize()); + return ArrayRef(identifiedBodyArray, identifiedBodySize()); } /// Checks whether the struct is identified. @@ -200,7 +199,7 @@ struct LLVMStructTypeStorage : public TypeStorage { /// as initalized and can no longer be mutated. LLVMStructTypeStorage(const KeyTy &key) { if (!key.isIdentified()) { - ArrayRef types = key.getTypeList(); + ArrayRef types = key.getTypeList(); keyPtr = static_cast(types.data()); setKeySize(types.size()); llvm::Bitfield::set(keySizeAndFlags, key.isPacked()); @@ -233,8 +232,8 @@ struct LLVMStructTypeStorage : public TypeStorage { /// initialized, succeeds only if the body is equal to the current body. Fails /// if the struct is marked as intentionally opaque. The struct will be marked /// as initialized as a result of this operation and can no longer be changed. - LogicalResult mutate(TypeStorageAllocator &allocator, - ArrayRef body, bool packed) { + LogicalResult mutate(TypeStorageAllocator &allocator, ArrayRef body, + bool packed) { if (!isIdentified()) return failure(); if (isInitialized()) @@ -245,7 +244,7 @@ struct LLVMStructTypeStorage : public TypeStorage { true); llvm::Bitfield::set(identifiedBodySizeAndFlags, packed); - ArrayRef typesInAllocator = allocator.copyInto(body); + ArrayRef typesInAllocator = allocator.copyInto(body); identifiedBodyArray = typesInAllocator.data(); setIdentifiedBodySize(typesInAllocator.size()); @@ -311,7 +310,7 @@ struct LLVMStructTypeStorage : public TypeStorage { const void *keyPtr = nullptr; /// Pointer to the first type contained in an identified struct. - const LLVMTypeNew *identifiedBodyArray = nullptr; + const LLVMType *identifiedBodyArray = nullptr; /// Size of the uniquing key combined with identified/literal and /// packedness bits. Must only be used through the Key* bitfields. @@ -329,11 +328,11 @@ struct LLVMStructTypeStorage : public TypeStorage { /// Type storage for LLVM dialect function types. These are uniqued using the /// list of types they contain and the vararg bit. struct LLVMFunctionTypeStorage : public TypeStorage { - using KeyTy = std::tuple, bool>; + using KeyTy = std::tuple, bool>; /// Construct a storage from the given components. The list is expected to be /// allocated in the context. - LLVMFunctionTypeStorage(LLVMTypeNew result, ArrayRef arguments, + LLVMFunctionTypeStorage(LLVMType result, ArrayRef arguments, bool variadic) : argumentTypes(arguments) { returnTypeAndVariadic.setPointerAndInt(result, variadic); @@ -360,21 +359,19 @@ struct LLVMFunctionTypeStorage : public TypeStorage { } /// Returns the list of function argument types. - ArrayRef getArgumentTypes() const { return argumentTypes; } + ArrayRef getArgumentTypes() const { return argumentTypes; } /// Checks whether the function type is variadic. bool isVariadic() const { return returnTypeAndVariadic.getInt(); } /// Returns the function result type. - LLVMTypeNew getReturnType() const { - return returnTypeAndVariadic.getPointer(); - } + LLVMType getReturnType() const { return returnTypeAndVariadic.getPointer(); } private: /// Function result type packed with the variadic bit. - llvm::PointerIntPair returnTypeAndVariadic; + llvm::PointerIntPair returnTypeAndVariadic; /// Argument types. - ArrayRef argumentTypes; + ArrayRef argumentTypes; }; //===----------------------------------------------------------------------===// @@ -405,7 +402,7 @@ struct LLVMIntegerTypeStorage : public TypeStorage { /// Storage type for LLVM dialect pointer types. These are uniqued by a pair of /// element type and address space. struct LLVMPointerTypeStorage : public TypeStorage { - using KeyTy = std::tuple; + using KeyTy = std::tuple; LLVMPointerTypeStorage(const KeyTy &key) : pointeeType(std::get<0>(key)), addressSpace(std::get<1>(key)) {} @@ -420,7 +417,7 @@ struct LLVMPointerTypeStorage : public TypeStorage { return std::make_tuple(pointeeType, addressSpace) == key; } - LLVMTypeNew pointeeType; + LLVMType pointeeType; unsigned addressSpace; }; @@ -432,7 +429,7 @@ struct LLVMPointerTypeStorage : public TypeStorage { /// number: arrays, fixed and scalable vectors. The actual semantics of the /// type is defined by its kind. struct LLVMTypeAndSizeStorage : public TypeStorage { - using KeyTy = std::tuple; + using KeyTy = std::tuple; LLVMTypeAndSizeStorage(const KeyTy &key) : elementType(std::get<0>(key)), numElements(std::get<1>(key)) {} @@ -447,7 +444,7 @@ struct LLVMTypeAndSizeStorage : public TypeStorage { return std::make_tuple(elementType, numElements) == key; } - LLVMTypeNew elementType; + LLVMType elementType; unsigned numElements; }; diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 6b068660d98ff..5107efe28971c 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -20,6 +20,7 @@ #include "mlir/IR/Module.h" #include "mlir/IR/StandardTypes.h" #include "mlir/Support/LLVM.h" +#include "mlir/Target/LLVMIR/TypeTranslation.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/ADT/SetVector.h" @@ -932,7 +933,9 @@ LogicalResult ModuleTranslation::convertFunctions() { } llvm::Type *ModuleTranslation::convertType(LLVMType type) { - return LLVM::convertLLVMType(type); + // Lock the LLVM context as we create types in it. + llvm::sys::SmartScopedLock lock(llvmDialect->getLLVMContextMutex()); + return LLVM::translateTypeToLLVMIR(type, llvmDialect->getLLVMContext()); } /// A helper to look up remapped operands in the value remapping table.` diff --git a/mlir/lib/Target/LLVMIR/TypeTranslation.cpp b/mlir/lib/Target/LLVMIR/TypeTranslation.cpp index 6163334d3b4ef..15e0f158007a5 100644 --- a/mlir/lib/Target/LLVMIR/TypeTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/TypeTranslation.cpp @@ -24,14 +24,14 @@ class TypeToLLVMIRTranslator { TypeToLLVMIRTranslator(llvm::LLVMContext &context) : context(context) {} /// Translates a single type. - llvm::Type *translateType(LLVM::LLVMTypeNew type) { + llvm::Type *translateType(LLVM::LLVMType type) { // If the conversion is already known, just return it. if (knownTranslations.count(type)) return knownTranslations.lookup(type); // Dispatch to an appropriate function. llvm::Type *translated = - llvm::TypeSwitch(type) + llvm::TypeSwitch(type) .Case([this](LLVM::LLVMVoidType) { return llvm::Type::getVoidTy(context); }) @@ -73,7 +73,7 @@ class TypeToLLVMIRTranslator { LLVM::LLVMStructType, LLVM::LLVMFixedVectorType, LLVM::LLVMScalableVectorType>( [this](auto array) { return translate(array); }) - .Default([](LLVM::LLVMTypeNew t) -> llvm::Type * { + .Default([](LLVM::LLVMType t) -> llvm::Type * { llvm_unreachable("unknown LLVM dialect type"); }); @@ -144,7 +144,7 @@ class TypeToLLVMIRTranslator { } /// Translates a list of types. - void translateTypes(ArrayRef types, + void translateTypes(ArrayRef types, SmallVectorImpl &result) { result.reserve(result.size() + types.size()); for (auto type : types) @@ -158,14 +158,14 @@ class TypeToLLVMIRTranslator { /// results to avoid repeated recursive calls and makes sure identified /// structs with the same name (that is, equal) are resolved to an existing /// type instead of creating a new type. - llvm::DenseMap knownTranslations; + llvm::DenseMap knownTranslations; }; } // end namespace /// Translates a type from MLIR LLVM dialect to LLVM IR. This does not maintain /// the mapping for identified structs so new structs will be created with /// auto-renaming on each call. This is intended exclusively for testing. -llvm::Type *mlir::LLVM::translateTypeToLLVMIR(LLVM::LLVMTypeNew type, +llvm::Type *mlir::LLVM::translateTypeToLLVMIR(LLVM::LLVMType type, llvm::LLVMContext &context) { return TypeToLLVMIRTranslator(context).translateType(type); } @@ -178,12 +178,12 @@ class TypeFromLLVMIRTranslator { TypeFromLLVMIRTranslator(MLIRContext &context) : context(context) {} /// Translates the given type. - LLVM::LLVMTypeNew translateType(llvm::Type *type) { + LLVM::LLVMType translateType(llvm::Type *type) { if (knownTranslations.count(type)) return knownTranslations.lookup(type); - LLVM::LLVMTypeNew translated = - llvm::TypeSwitch(type) + LLVM::LLVMType translated = + llvm::TypeSwitch(type) .Case( @@ -198,7 +198,7 @@ class TypeFromLLVMIRTranslator { private: /// Translates the given primitive, i.e. non-parametric in MLIR nomenclature, /// type. - LLVM::LLVMTypeNew translatePrimitiveType(llvm::Type *type) { + LLVM::LLVMType translatePrimitiveType(llvm::Type *type) { if (type->isVoidTy()) return LLVM::LLVMVoidType::get(&context); if (type->isHalfTy()) @@ -225,33 +225,33 @@ class TypeFromLLVMIRTranslator { } /// Translates the given array type. - LLVM::LLVMTypeNew translate(llvm::ArrayType *type) { + LLVM::LLVMType translate(llvm::ArrayType *type) { return LLVM::LLVMArrayType::get(translateType(type->getElementType()), type->getNumElements()); } /// Translates the given function type. - LLVM::LLVMTypeNew translate(llvm::FunctionType *type) { - SmallVector paramTypes; + LLVM::LLVMType translate(llvm::FunctionType *type) { + SmallVector paramTypes; translateTypes(type->params(), paramTypes); return LLVM::LLVMFunctionType::get(translateType(type->getReturnType()), paramTypes, type->isVarArg()); } /// Translates the given integer type. - LLVM::LLVMTypeNew translate(llvm::IntegerType *type) { + LLVM::LLVMType translate(llvm::IntegerType *type) { return LLVM::LLVMIntegerType::get(&context, type->getBitWidth()); } /// Translates the given pointer type. - LLVM::LLVMTypeNew translate(llvm::PointerType *type) { + LLVM::LLVMType translate(llvm::PointerType *type) { return LLVM::LLVMPointerType::get(translateType(type->getElementType()), type->getAddressSpace()); } /// Translates the given structure type. - LLVM::LLVMTypeNew translate(llvm::StructType *type) { - SmallVector subtypes; + LLVM::LLVMType translate(llvm::StructType *type) { + SmallVector subtypes; if (type->isLiteral()) { translateTypes(type->subtypes(), subtypes); return LLVM::LLVMStructType::getLiteral(&context, subtypes, @@ -273,20 +273,20 @@ class TypeFromLLVMIRTranslator { } /// Translates the given fixed-vector type. - LLVM::LLVMTypeNew translate(llvm::FixedVectorType *type) { + LLVM::LLVMType translate(llvm::FixedVectorType *type) { return LLVM::LLVMFixedVectorType::get(translateType(type->getElementType()), type->getNumElements()); } /// Translates the given scalable-vector type. - LLVM::LLVMTypeNew translate(llvm::ScalableVectorType *type) { + LLVM::LLVMType translate(llvm::ScalableVectorType *type) { return LLVM::LLVMScalableVectorType::get( translateType(type->getElementType()), type->getMinNumElements()); } /// Translates a list of types. void translateTypes(ArrayRef types, - SmallVectorImpl &result) { + SmallVectorImpl &result) { result.reserve(result.size() + types.size()); for (llvm::Type *type : types) result.push_back(translateType(type)); @@ -294,7 +294,7 @@ class TypeFromLLVMIRTranslator { /// Map of known translations. Serves as a cache and as recursion stopper for /// translating recursive structs. - llvm::DenseMap knownTranslations; + llvm::DenseMap knownTranslations; /// The context in which MLIR types are created. MLIRContext &context; @@ -303,7 +303,7 @@ class TypeFromLLVMIRTranslator { /// Translates a type from LLVM IR to MLIR LLVM dialect. This is intended /// exclusively for testing. -LLVM::LLVMTypeNew mlir::LLVM::translateTypeFromLLVMIR(llvm::Type *type, - MLIRContext &context) { +LLVM::LLVMType mlir::LLVM::translateTypeFromLLVMIR(llvm::Type *type, + MLIRContext &context) { return TypeFromLLVMIRTranslator(context).translateType(type); } diff --git a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir index bdcde0be60c20..f5c085224f07a 100644 --- a/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-launch-func-to-gpu-runtime-calls.mlir @@ -8,27 +8,27 @@ module attributes {gpu.container_module} { // ROCDL: llvm.mlir.global internal constant @[[global:.*]]("HSACO") gpu.module @kernel_module attributes {nvvm.cubin = "CUBIN", rocdl.hsaco = "HSACO"} { - llvm.func @kernel(%arg0: !llvm.float, %arg1: !llvm<"float*">) attributes {gpu.kernel} { + llvm.func @kernel(%arg0: !llvm.float, %arg1: !llvm.ptr) attributes {gpu.kernel} { llvm.return } } llvm.func @foo() { %0 = "op"() : () -> (!llvm.float) - %1 = "op"() : () -> (!llvm<"float*">) + %1 = "op"() : () -> (!llvm.ptr) %cst = llvm.mlir.constant(8 : index) : !llvm.i64 // CHECK: %[[addressof:.*]] = llvm.mlir.addressof @[[global]] // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) // CHECK: %[[binary:.*]] = llvm.getelementptr %[[addressof]][%[[c0]], %[[c0]]] - // CHECK-SAME: -> !llvm<"i8*"> - // CHECK: %[[module:.*]] = llvm.call @mgpuModuleLoad(%[[binary]]) : (!llvm<"i8*">) -> !llvm<"i8*"> - // CHECK: %[[func:.*]] = llvm.call @mgpuModuleGetFunction(%[[module]], {{.*}}) : (!llvm<"i8*">, !llvm<"i8*">) -> !llvm<"i8*"> + // CHECK-SAME: -> !llvm.ptr + // CHECK: %[[module:.*]] = llvm.call @mgpuModuleLoad(%[[binary]]) : (!llvm.ptr) -> !llvm.ptr + // CHECK: %[[func:.*]] = llvm.call @mgpuModuleGetFunction(%[[module]], {{.*}}) : (!llvm.ptr, !llvm.ptr) -> !llvm.ptr // CHECK: llvm.call @mgpuStreamCreate // CHECK: llvm.call @mgpuLaunchKernel // CHECK: llvm.call @mgpuStreamSynchronize "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel_module::@kernel } - : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.float, !llvm<"float*">) -> () + : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.float, !llvm.ptr) -> () llvm.return } diff --git a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir index c13afefdb78dc..a2764d4242058 100644 --- a/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir +++ b/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir @@ -6,13 +6,13 @@ gpu.module @kernel { gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) { // Allocate private memory inside the function. // NVVM: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64 - // NVVM: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> + // NVVM: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm.ptr // ROCDL: %[[size:.*]] = llvm.mlir.constant(4 : i64) : !llvm.i64 - // ROCDL: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm<"float addrspace(5)*"> + // ROCDL: %[[raw:.*]] = llvm.alloca %[[size]] x !llvm.float : (!llvm.i64) -> !llvm.ptr // Populate the memref descriptor. - // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // NVVM: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] // NVVM: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 @@ -22,7 +22,7 @@ gpu.module @kernel { // NVVM: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // NVVM: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] - // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(5)*, float addrspace(5)*, i64, [1 x i64], [1 x i64] }"> + // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // ROCDL: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] // ROCDL: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 @@ -54,11 +54,11 @@ gpu.module @kernel { // Workgroup buffers are allocated as globals. // NVVM: llvm.mlir.global internal @[[$buffer:.*]]() // NVVM-SAME: addr_space = 3 - // NVVM-SAME: !llvm<"[4 x float]"> + // NVVM-SAME: !llvm.array<4 x float> // ROCDL: llvm.mlir.global internal @[[$buffer:.*]]() // ROCDL-SAME: addr_space = 3 - // ROCDL-SAME: !llvm<"[4 x float]"> + // ROCDL-SAME: !llvm.array<4 x float> // NVVM-LABEL: llvm.func @workgroup // NVVM-SAME: { @@ -68,17 +68,17 @@ gpu.module @kernel { gpu.func @workgroup(%arg0: f32) workgroup(%arg1: memref<4xf32, 3>) { // Get the address of the first element in the global array. // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // NVVM: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm<"[4 x float] addrspace(3)*"> + // NVVM: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm.ptr, 3> // NVVM: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] - // NVVM-SAME: !llvm<"float addrspace(3)*"> + // NVVM-SAME: !llvm.ptr // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // ROCDL: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm<"[4 x float] addrspace(3)*"> + // ROCDL: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm.ptr, 3> // ROCDL: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] - // ROCDL-SAME: !llvm<"float addrspace(3)*"> + // ROCDL-SAME: !llvm.ptr // Populate the memref descriptor. - // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [1 x i64], [1 x i64] }"> + // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // NVVM: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] // NVVM: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 @@ -88,7 +88,7 @@ gpu.module @kernel { // NVVM: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // NVVM: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] - // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [1 x i64], [1 x i64] }"> + // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // ROCDL: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] // ROCDL: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 @@ -120,28 +120,28 @@ gpu.module @kernel { // Check that the total size was computed correctly. // NVVM: llvm.mlir.global internal @[[$buffer:.*]]() // NVVM-SAME: addr_space = 3 - // NVVM-SAME: !llvm<"[48 x float]"> + // NVVM-SAME: !llvm.array<48 x float> // ROCDL: llvm.mlir.global internal @[[$buffer:.*]]() // ROCDL-SAME: addr_space = 3 - // ROCDL-SAME: !llvm<"[48 x float]"> + // ROCDL-SAME: !llvm.array<48 x float> // NVVM-LABEL: llvm.func @workgroup3d // ROCDL-LABEL: llvm.func @workgroup3d gpu.func @workgroup3d(%arg0: f32) workgroup(%arg1: memref<4x2x6xf32, 3>) { // Get the address of the first element in the global array. // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // NVVM: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm<"[48 x float] addrspace(3)*"> + // NVVM: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm.ptr, 3> // NVVM: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] - // NVVM-SAME: !llvm<"float addrspace(3)*"> + // NVVM-SAME: !llvm.ptr // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // ROCDL: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm<"[48 x float] addrspace(3)*"> + // ROCDL: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm.ptr, 3> // ROCDL: %[[raw:.*]] = llvm.getelementptr %[[addr]][%[[c0]], %[[c0]]] - // ROCDL-SAME: !llvm<"float addrspace(3)*"> + // ROCDL-SAME: !llvm.ptr // Populate the memref descriptor. - // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [3 x i64], [3 x i64] }"> + // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // NVVM: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] // NVVM: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 @@ -159,7 +159,7 @@ gpu.module @kernel { // NVVM: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // NVVM: %[[descr10:.*]] = llvm.insertvalue %[[c1]], %[[descr9]][4, 2] - // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [3 x i64], [3 x i64] }"> + // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // ROCDL: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] // ROCDL: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 @@ -188,14 +188,14 @@ gpu.module @kernel { gpu.module @kernel { // Check that several buffers are defined. // NVVM: llvm.mlir.global internal @[[$buffer1:.*]]() - // NVVM-SAME: !llvm<"[1 x float]"> + // NVVM-SAME: !llvm.array<1 x float> // NVVM: llvm.mlir.global internal @[[$buffer2:.*]]() - // NVVM-SAME: !llvm<"[2 x float]"> + // NVVM-SAME: !llvm.array<2 x float> // ROCDL: llvm.mlir.global internal @[[$buffer1:.*]]() - // ROCDL-SAME: !llvm<"[1 x float]"> + // ROCDL-SAME: !llvm.array<1 x float> // ROCDL: llvm.mlir.global internal @[[$buffer2:.*]]() - // ROCDL-SAME: !llvm<"[2 x float]"> + // ROCDL-SAME: !llvm.array<2 x float> // NVVM-LABEL: llvm.func @multiple // ROCDL-LABEL: llvm.func @multiple @@ -212,14 +212,14 @@ gpu.module @kernel { // Private buffers. // NVVM: %[[c3:.*]] = llvm.mlir.constant(3 : i64) - // NVVM: llvm.alloca %[[c3]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> + // NVVM: llvm.alloca %[[c3]] x !llvm.float : (!llvm.i64) -> !llvm.ptr // NVVM: %[[c4:.*]] = llvm.mlir.constant(4 : i64) - // NVVM: llvm.alloca %[[c4]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> + // NVVM: llvm.alloca %[[c4]] x !llvm.float : (!llvm.i64) -> !llvm.ptr // ROCDL: %[[c3:.*]] = llvm.mlir.constant(3 : i64) - // ROCDL: llvm.alloca %[[c3]] x !llvm.float : (!llvm.i64) -> !llvm<"float addrspace(5)*"> + // ROCDL: llvm.alloca %[[c3]] x !llvm.float : (!llvm.i64) -> !llvm.ptr // ROCDL: %[[c4:.*]] = llvm.mlir.constant(4 : i64) - // ROCDL: llvm.alloca %[[c4]] x !llvm.float : (!llvm.i64) -> !llvm<"float addrspace(5)*"> + // ROCDL: llvm.alloca %[[c4]] x !llvm.float : (!llvm.i64) -> !llvm.ptr %c0 = constant 0 : index store %arg0, %arg1[%c0] : memref<1xf32, 3> diff --git a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir index 78b9f56b6202c..d103031fbd54d 100644 --- a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir +++ b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir @@ -2,7 +2,7 @@ // CHECK: attributes {nvvm.cubin = "CUBIN"} gpu.module @foo { - llvm.func @kernel(%arg0 : !llvm.float, %arg1 : !llvm<"float*">) + llvm.func @kernel(%arg0 : !llvm.float, %arg1 : !llvm.ptr) // CHECK: attributes {gpu.kernel} attributes { gpu.kernel } { llvm.return diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index fa9a478c1d83b..df38df1749cbc 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -120,9 +120,9 @@ gpu.module @test_module { // CHECK: %[[#SHL:]] = llvm.shl %[[#ONE]], %[[#WIDTH]] : !llvm.i32 // CHECK: %[[#MASK:]] = llvm.sub %[[#SHL]], %[[#ONE]] : !llvm.i32 // CHECK: %[[#CLAMP:]] = llvm.sub %[[#WIDTH]], %[[#ONE]] : !llvm.i32 - // CHECK: %[[#SHFL:]] = nvvm.shfl.sync.bfly %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] : !llvm<"{ float, i1 }"> - // CHECK: llvm.extractvalue %[[#SHFL]][0 : index] : !llvm<"{ float, i1 }"> - // CHECK: llvm.extractvalue %[[#SHFL]][1 : index] : !llvm<"{ float, i1 }"> + // CHECK: %[[#SHFL:]] = nvvm.shfl.sync.bfly %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] : !llvm.struct<(float, i1)> + // CHECK: llvm.extractvalue %[[#SHFL]][0 : index] : !llvm.struct<(float, i1)> + // CHECK: llvm.extractvalue %[[#SHFL]][1 : index] : !llvm.struct<(float, i1)> %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "xor" } : (f32, i32, i32) -> (f32, i1) std.return %shfl : f32 diff --git a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir index 5ee3bb21aa916..d88f842a2186f 100644 --- a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir +++ b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir @@ -2,7 +2,7 @@ // CHECK: attributes {rocdl.hsaco = "HSACO"} gpu.module @foo { - llvm.func @kernel(%arg0 : !llvm.float, %arg1 : !llvm<"float*">) + llvm.func @kernel(%arg0 : !llvm.float, %arg1 : !llvm.ptr) // CHECK: attributes {gpu.kernel} attributes { gpu.kernel } { llvm.return diff --git a/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir b/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir index f944d007ebaa9..e39bfa41e692b 100644 --- a/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir +++ b/mlir/test/Conversion/GPUToVulkan/invoke-vulkan.mlir @@ -2,60 +2,60 @@ // CHECK: llvm.mlir.global internal constant @kernel_spv_entry_point_name // CHECK: llvm.mlir.global internal constant @SPIRV_BIN -// CHECK: %[[Vulkan_Runtime_ptr:.*]] = llvm.call @initVulkan() : () -> !llvm<"i8*"> +// CHECK: %[[Vulkan_Runtime_ptr:.*]] = llvm.call @initVulkan() : () -> !llvm.ptr // CHECK: %[[addressof_SPIRV_BIN:.*]] = llvm.mlir.addressof @SPIRV_BIN // CHECK: %[[SPIRV_BIN_ptr:.*]] = llvm.getelementptr %[[addressof_SPIRV_BIN]] // CHECK: %[[SPIRV_BIN_size:.*]] = llvm.mlir.constant -// CHECK: llvm.call @bindMemRef1DFloat(%[[Vulkan_Runtime_ptr]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm<"i8*">, !llvm.i32, !llvm.i32, !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*">) -> !llvm.void -// CHECK: llvm.call @setBinaryShader(%[[Vulkan_Runtime_ptr]], %[[SPIRV_BIN_ptr]], %[[SPIRV_BIN_size]]) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32) -> !llvm.void +// CHECK: llvm.call @bindMemRef1DFloat(%[[Vulkan_Runtime_ptr]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.i32, !llvm.i32, !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>>) -> !llvm.void +// CHECK: llvm.call @setBinaryShader(%[[Vulkan_Runtime_ptr]], %[[SPIRV_BIN_ptr]], %[[SPIRV_BIN_size]]) : (!llvm.ptr, !llvm.ptr, !llvm.i32) -> !llvm.void // CHECK: %[[addressof_entry_point:.*]] = llvm.mlir.addressof @kernel_spv_entry_point_name // CHECK: %[[entry_point_ptr:.*]] = llvm.getelementptr %[[addressof_entry_point]] -// CHECK: llvm.call @setEntryPoint(%[[Vulkan_Runtime_ptr]], %[[entry_point_ptr]]) : (!llvm<"i8*">, !llvm<"i8*">) -> !llvm.void -// CHECK: llvm.call @setNumWorkGroups(%[[Vulkan_Runtime_ptr]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm<"i8*">, !llvm.i64, !llvm.i64, !llvm.i64) -> !llvm.void -// CHECK: llvm.call @runOnVulkan(%[[Vulkan_Runtime_ptr]]) : (!llvm<"i8*">) -> !llvm.void -// CHECK: llvm.call @deinitVulkan(%[[Vulkan_Runtime_ptr]]) : (!llvm<"i8*">) -> !llvm.void +// CHECK: llvm.call @setEntryPoint(%[[Vulkan_Runtime_ptr]], %[[entry_point_ptr]]) : (!llvm.ptr, !llvm.ptr) -> !llvm.void +// CHECK: llvm.call @setNumWorkGroups(%[[Vulkan_Runtime_ptr]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.i64, !llvm.i64, !llvm.i64) -> !llvm.void +// CHECK: llvm.call @runOnVulkan(%[[Vulkan_Runtime_ptr]]) : (!llvm.ptr) -> !llvm.void +// CHECK: llvm.call @deinitVulkan(%[[Vulkan_Runtime_ptr]]) : (!llvm.ptr) -> !llvm.void module attributes {gpu.container_module} { - llvm.func @malloc(!llvm.i64) -> !llvm<"i8*"> + llvm.func @malloc(!llvm.i64) -> !llvm.ptr llvm.func @foo() { %0 = llvm.mlir.constant(12 : index) : !llvm.i64 - %1 = llvm.mlir.null : !llvm<"float*"> + %1 = llvm.mlir.null : !llvm.ptr %2 = llvm.mlir.constant(1 : index) : !llvm.i64 - %3 = llvm.getelementptr %1[%2] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %4 = llvm.ptrtoint %3 : !llvm<"float*"> to !llvm.i64 + %3 = llvm.getelementptr %1[%2] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %4 = llvm.ptrtoint %3 : !llvm.ptr to !llvm.i64 %5 = llvm.mul %0, %4 : !llvm.i64 - %6 = llvm.call @malloc(%5) : (!llvm.i64) -> !llvm<"i8*"> - %7 = llvm.bitcast %6 : !llvm<"i8*"> to !llvm<"float*"> - %8 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %9 = llvm.insertvalue %7, %8[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %10 = llvm.insertvalue %7, %9[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %6 = llvm.call @malloc(%5) : (!llvm.i64) -> !llvm.ptr + %7 = llvm.bitcast %6 : !llvm.ptr to !llvm.ptr + %8 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %9 = llvm.insertvalue %7, %8[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %10 = llvm.insertvalue %7, %9[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %11 = llvm.mlir.constant(0 : index) : !llvm.i64 - %12 = llvm.insertvalue %11, %10[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %12 = llvm.insertvalue %11, %10[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %13 = llvm.mlir.constant(1 : index) : !llvm.i64 - %14 = llvm.insertvalue %0, %12[3, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %15 = llvm.insertvalue %13, %14[4, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %14 = llvm.insertvalue %0, %12[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %15 = llvm.insertvalue %13, %14[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %16 = llvm.mlir.constant(1 : index) : !llvm.i64 - %17 = llvm.extractvalue %15[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %18 = llvm.extractvalue %15[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %19 = llvm.extractvalue %15[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %20 = llvm.extractvalue %15[3, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %21 = llvm.extractvalue %15[4, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %17 = llvm.extractvalue %15[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %18 = llvm.extractvalue %15[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %19 = llvm.extractvalue %15[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %20 = llvm.extractvalue %15[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %21 = llvm.extractvalue %15[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> llvm.call @vulkanLaunch(%16, %16, %16, %17, %18, %19, %20, %21) {spirv_blob = "\03\02#\07\00", spirv_entry_point = "kernel"} - : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm<"float*">, !llvm<"float*">, !llvm.i64, !llvm.i64, !llvm.i64) -> () + : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm.ptr, !llvm.ptr, !llvm.i64, !llvm.i64, !llvm.i64) -> () llvm.return } - llvm.func @vulkanLaunch(%arg0: !llvm.i64, %arg1: !llvm.i64, %arg2: !llvm.i64, %arg6: !llvm<"float*">, %arg7: !llvm<"float*">, %arg8: !llvm.i64, %arg9: !llvm.i64, %arg10: !llvm.i64) { - %0 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %1 = llvm.insertvalue %arg6, %0[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %2 = llvm.insertvalue %arg7, %1[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %3 = llvm.insertvalue %arg8, %2[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %4 = llvm.insertvalue %arg9, %3[3, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %5 = llvm.insertvalue %arg10, %4[4, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + llvm.func @vulkanLaunch(%arg0: !llvm.i64, %arg1: !llvm.i64, %arg2: !llvm.i64, %arg6: !llvm.ptr, %arg7: !llvm.ptr, %arg8: !llvm.i64, %arg9: !llvm.i64, %arg10: !llvm.i64) { + %0 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %1 = llvm.insertvalue %arg6, %0[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %2 = llvm.insertvalue %arg7, %1[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %3 = llvm.insertvalue %arg8, %2[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %4 = llvm.insertvalue %arg9, %3[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %5 = llvm.insertvalue %arg10, %4[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %6 = llvm.mlir.constant(1 : index) : !llvm.i64 - %7 = llvm.alloca %6 x !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> : (!llvm.i64) -> !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*"> - llvm.store %5, %7 : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*"> - llvm.call @_mlir_ciface_vulkanLaunch(%arg0, %arg1, %arg2, %7) : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*">) -> () + %7 = llvm.alloca %6 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (!llvm.i64) -> !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.store %5, %7 : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> + llvm.call @_mlir_ciface_vulkanLaunch(%arg0, %arg1, %arg2, %7) : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>>) -> () llvm.return } - llvm.func @_mlir_ciface_vulkanLaunch(!llvm.i64, !llvm.i64, !llvm.i64, !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*">) + llvm.func @_mlir_ciface_vulkanLaunch(!llvm.i64, !llvm.i64, !llvm.i64, !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>>) } diff --git a/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir index 8ae0d98407131..a37e82bf59953 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/arithmetic-ops-to-llvm.mlir @@ -13,7 +13,7 @@ func @iadd_scalar(%arg0: i32, %arg1: i32) { // CHECK-LABEL: @iadd_vector func @iadd_vector(%arg0: vector<4xi64>, %arg1: vector<4xi64>) { - // CHECK: llvm.add %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + // CHECK: llvm.add %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> %0 = spv.IAdd %arg0, %arg1 : vector<4xi64> return } @@ -31,7 +31,7 @@ func @isub_scalar(%arg0: i8, %arg1: i8) { // CHECK-LABEL: @isub_vector func @isub_vector(%arg0: vector<2xi16>, %arg1: vector<2xi16>) { - // CHECK: llvm.sub %{{.*}}, %{{.*}} : !llvm<"<2 x i16>"> + // CHECK: llvm.sub %{{.*}}, %{{.*}} : !llvm.vec<2 x i16> %0 = spv.ISub %arg0, %arg1 : vector<2xi16> return } @@ -49,7 +49,7 @@ func @imul_scalar(%arg0: i32, %arg1: i32) { // CHECK-LABEL: @imul_vector func @imul_vector(%arg0: vector<3xi32>, %arg1: vector<3xi32>) { - // CHECK: llvm.mul %{{.*}}, %{{.*}} : !llvm<"<3 x i32>"> + // CHECK: llvm.mul %{{.*}}, %{{.*}} : !llvm.vec<3 x i32> %0 = spv.IMul %arg0, %arg1 : vector<3xi32> return } @@ -67,7 +67,7 @@ func @fadd_scalar(%arg0: f16, %arg1: f16) { // CHECK-LABEL: @fadd_vector func @fadd_vector(%arg0: vector<4xf32>, %arg1: vector<4xf32>) { - // CHECK: llvm.fadd %{{.*}}, %{{.*}} : !llvm<"<4 x float>"> + // CHECK: llvm.fadd %{{.*}}, %{{.*}} : !llvm.vec<4 x float> %0 = spv.FAdd %arg0, %arg1 : vector<4xf32> return } @@ -85,7 +85,7 @@ func @fsub_scalar(%arg0: f32, %arg1: f32) { // CHECK-LABEL: @fsub_vector func @fsub_vector(%arg0: vector<2xf32>, %arg1: vector<2xf32>) { - // CHECK: llvm.fsub %{{.*}}, %{{.*}} : !llvm<"<2 x float>"> + // CHECK: llvm.fsub %{{.*}}, %{{.*}} : !llvm.vec<2 x float> %0 = spv.FSub %arg0, %arg1 : vector<2xf32> return } @@ -103,7 +103,7 @@ func @fdiv_scalar(%arg0: f32, %arg1: f32) { // CHECK-LABEL: @fdiv_vector func @fdiv_vector(%arg0: vector<3xf64>, %arg1: vector<3xf64>) { - // CHECK: llvm.fdiv %{{.*}}, %{{.*}} : !llvm<"<3 x double>"> + // CHECK: llvm.fdiv %{{.*}}, %{{.*}} : !llvm.vec<3 x double> %0 = spv.FDiv %arg0, %arg1 : vector<3xf64> return } @@ -121,7 +121,7 @@ func @fmul_scalar(%arg0: f32, %arg1: f32) { // CHECK-LABEL: @fmul_vector func @fmul_vector(%arg0: vector<2xf32>, %arg1: vector<2xf32>) { - // CHECK: llvm.fmul %{{.*}}, %{{.*}} : !llvm<"<2 x float>"> + // CHECK: llvm.fmul %{{.*}}, %{{.*}} : !llvm.vec<2 x float> %0 = spv.FMul %arg0, %arg1 : vector<2xf32> return } @@ -139,7 +139,7 @@ func @frem_scalar(%arg0: f32, %arg1: f32) { // CHECK-LABEL: @frem_vector func @frem_vector(%arg0: vector<3xf64>, %arg1: vector<3xf64>) { - // CHECK: llvm.frem %{{.*}}, %{{.*}} : !llvm<"<3 x double>"> + // CHECK: llvm.frem %{{.*}}, %{{.*}} : !llvm.vec<3 x double> %0 = spv.FRem %arg0, %arg1 : vector<3xf64> return } @@ -157,7 +157,7 @@ func @fneg_scalar(%arg: f64) { // CHECK-LABEL: @fneg_vector func @fneg_vector(%arg: vector<2xf32>) { - // CHECK: llvm.fneg %{{.*}} : !llvm<"<2 x float>"> + // CHECK: llvm.fneg %{{.*}} : !llvm.vec<2 x float> %0 = spv.FNegate %arg : vector<2xf32> return } @@ -175,7 +175,7 @@ func @udiv_scalar(%arg0: i32, %arg1: i32) { // CHECK-LABEL: @udiv_vector func @udiv_vector(%arg0: vector<3xi64>, %arg1: vector<3xi64>) { - // CHECK: llvm.udiv %{{.*}}, %{{.*}} : !llvm<"<3 x i64>"> + // CHECK: llvm.udiv %{{.*}}, %{{.*}} : !llvm.vec<3 x i64> %0 = spv.UDiv %arg0, %arg1 : vector<3xi64> return } @@ -193,7 +193,7 @@ func @umod_scalar(%arg0: i32, %arg1: i32) { // CHECK-LABEL: @umod_vector func @umod_vector(%arg0: vector<3xi64>, %arg1: vector<3xi64>) { - // CHECK: llvm.urem %{{.*}}, %{{.*}} : !llvm<"<3 x i64>"> + // CHECK: llvm.urem %{{.*}}, %{{.*}} : !llvm.vec<3 x i64> %0 = spv.UMod %arg0, %arg1 : vector<3xi64> return } @@ -211,7 +211,7 @@ func @sdiv_scalar(%arg0: i16, %arg1: i16) { // CHECK-LABEL: @sdiv_vector func @sdiv_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.sdiv %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.sdiv %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.SDiv %arg0, %arg1 : vector<2xi64> return } @@ -229,7 +229,7 @@ func @srem_scalar(%arg0: i32, %arg1: i32) { // CHECK-LABEL: @srem_vector func @srem_vector(%arg0: vector<4xi32>, %arg1: vector<4xi32>) { - // CHECK: llvm.srem %{{.*}}, %{{.*}} : !llvm<"<4 x i32>"> + // CHECK: llvm.srem %{{.*}}, %{{.*}} : !llvm.vec<4 x i32> %0 = spv.SRem %arg0, %arg1 : vector<4xi32> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir index aed82d218db21..e37072c7282da 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir @@ -13,7 +13,7 @@ func @bitcount_scalar(%arg0: i16) { // CHECK-LABEL: @bitcount_vector func @bitcount_vector(%arg0: vector<3xi32>) { - // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (!llvm<"<3 x i32>">) -> !llvm<"<3 x i32>"> + // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (!llvm.vec<3 x i32>) -> !llvm.vec<3 x i32> %0 = spv.BitCount %arg0: vector<3xi32> return } @@ -31,7 +31,7 @@ func @bitreverse_scalar(%arg0: i64) { // CHECK-LABEL: @bitreverse_vector func @bitreverse_vector(%arg0: vector<4xi32>) { - // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (!llvm<"<4 x i32>">) -> !llvm<"<4 x i32>"> + // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (!llvm.vec<4 x i32>) -> !llvm.vec<4 x i32> %0 = spv.BitReverse %arg0: vector<4xi32> return } @@ -89,27 +89,27 @@ func @bitfield_insert_scalar_greater_bit_width(%base: i16, %insert: i16, %offset return } -// CHECK-LABEL: @bitfield_insert_vector -// CHECK-SAME: %[[BASE:.*]]: !llvm<"<2 x i32>">, %[[INSERT:.*]]: !llvm<"<2 x i32>">, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 +// CHECK-LABEL: func @bitfield_insert_vector +// CHECK-SAME: %[[BASE:.*]]: !llvm.vec<2 x i32>, %[[INSERT:.*]]: !llvm.vec<2 x i32>, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 func @bitfield_insert_vector(%base: vector<2xi32>, %insert: vector<2xi32>, %offset: i32, %count: i32) { - // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm.vec<2 x i32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm.vec<2 x i32> + // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm.vec<2 x i32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm<"<2 x i32>"> - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm<"<2 x i32>"> - // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm<"<2 x i32>"> - // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> - // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm<"<2 x i32>"> - // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm<"<2 x i32>"> - // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> - // CHECK: llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm<"<2 x i32>"> + // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm.vec<2 x i32> + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm.vec<2 x i32> + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm.vec<2 x i32> + // CHECK: %[[T1:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.vec<2 x i32> + // CHECK: %[[T2:.*]] = llvm.shl %[[T1]], %[[OFFSET_V2]] : !llvm.vec<2 x i32> + // CHECK: %[[MASK:.*]] = llvm.xor %[[T2]], %[[MINUS_ONE]] : !llvm.vec<2 x i32> + // CHECK: %[[NEW_BASE:.*]] = llvm.and %[[BASE]], %[[MASK]] : !llvm.vec<2 x i32> + // CHECK: %[[SHIFTED_INSERT:.*]] = llvm.shl %[[INSERT]], %[[OFFSET_V2]] : !llvm.vec<2 x i32> + // CHECK: llvm.or %[[NEW_BASE]], %[[SHIFTED_INSERT]] : !llvm.vec<2 x i32> %0 = spv.BitFieldInsert %base, %insert, %offset, %count : vector<2xi32>, i32, i32 return } @@ -161,25 +161,25 @@ func @bitfield_sextract_scalar_greater_bit_width(%base: i32, %offset: i64, %coun return } -// CHECK-LABEL: @bitfield_sextract_vector -// CHECK-SAME: %[[BASE:.*]]: !llvm<"<2 x i32>">, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 +// CHECK-LABEL: func @bitfield_sextract_vector +// CHECK-SAME: %[[BASE:.*]]: !llvm.vec<2 x i32>, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 func @bitfield_sextract_vector(%base: vector<2xi32>, %offset: i32, %count: i32) { - // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm.vec<2 x i32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm.vec<2 x i32> + // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm.vec<2 x i32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(dense<32> : vector<2xi32>) : !llvm<"<2 x i32>"> - // CHECK: %[[T0:.*]] = llvm.add %[[COUNT_V2]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> - // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm<"<2 x i32>"> - // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm<"<2 x i32>"> - // CHECK: %[[T2:.*]] = llvm.add %[[OFFSET_V2]], %[[T1]] : !llvm<"<2 x i32>"> - // CHECK: llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm<"<2 x i32>"> + // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm.vec<2 x i32> + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(dense<32> : vector<2xi32>) : !llvm.vec<2 x i32> + // CHECK: %[[T0:.*]] = llvm.add %[[COUNT_V2]], %[[OFFSET_V2]] : !llvm.vec<2 x i32> + // CHECK: %[[T1:.*]] = llvm.sub %[[SIZE]], %[[T0]] : !llvm.vec<2 x i32> + // CHECK: %[[SHIFTED_LEFT:.*]] = llvm.shl %[[BASE]], %[[T1]] : !llvm.vec<2 x i32> + // CHECK: %[[T2:.*]] = llvm.add %[[OFFSET_V2]], %[[T1]] : !llvm.vec<2 x i32> + // CHECK: llvm.ashr %[[SHIFTED_LEFT]], %[[T2]] : !llvm.vec<2 x i32> %0 = spv.BitFieldSExtract %base, %offset, %count : vector<2xi32>, i32, i32 return } @@ -227,24 +227,24 @@ func @bitfield_uextract_scalar_greater_bit_width(%base: i8, %offset: i16, %count return } -// CHECK-LABEL: @bitfield_uextract_vector -// CHECK-SAME: %[[BASE:.*]]: !llvm<"<2 x i32>">, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 +// CHECK-LABEL: func @bitfield_uextract_vector +// CHECK-SAME: %[[BASE:.*]]: !llvm.vec<2 x i32>, %[[OFFSET:.*]]: !llvm.i32, %[[COUNT:.*]]: !llvm.i32 func @bitfield_uextract_vector(%base: vector<2xi32>, %offset: i32, %count: i32) { - // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[OFFSET_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[OFFSET_V1:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V0]][%[[ZERO]] : !llvm.i32] : !llvm.vec<2 x i32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %[[OFFSET_V2:.*]] = llvm.insertelement %[[OFFSET]], %[[OFFSET_V1]][%[[ONE]] : !llvm.i32] : !llvm.vec<2 x i32> + // CHECK: %[[COUNT_V0:.*]] = llvm.mlir.undef : !llvm.vec<2 x i32> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm<"<2 x i32>"> + // CHECK: %[[COUNT_V1:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V0]][%[[ZERO]] : !llvm.i32] : !llvm.vec<2 x i32> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm<"<2 x i32>"> - // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm<"<2 x i32>"> - // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm<"<2 x i32>"> - // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm<"<2 x i32>"> - // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[OFFSET_V2]] : !llvm<"<2 x i32>"> - // CHECK: llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm<"<2 x i32>"> + // CHECK: %[[COUNT_V2:.*]] = llvm.insertelement %[[COUNT]], %[[COUNT_V1]][%[[ONE]] : !llvm.i32] : !llvm.vec<2 x i32> + // CHECK: %[[MINUS_ONE:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi32>) : !llvm.vec<2 x i32> + // CHECK: %[[T0:.*]] = llvm.shl %[[MINUS_ONE]], %[[COUNT_V2]] : !llvm.vec<2 x i32> + // CHECK: %[[MASK:.*]] = llvm.xor %[[T0]], %[[MINUS_ONE]] : !llvm.vec<2 x i32> + // CHECK: %[[SHIFTED_BASE:.*]] = llvm.lshr %[[BASE]], %[[OFFSET_V2]] : !llvm.vec<2 x i32> + // CHECK: llvm.and %[[SHIFTED_BASE]], %[[MASK]] : !llvm.vec<2 x i32> %0 = spv.BitFieldUExtract %base, %offset, %count : vector<2xi32>, i32, i32 return } @@ -262,7 +262,7 @@ func @bitwise_and_scalar(%arg0: i32, %arg1: i32) { // CHECK-LABEL: @bitwise_and_vector func @bitwise_and_vector(%arg0: vector<4xi64>, %arg1: vector<4xi64>) { - // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> %0 = spv.BitwiseAnd %arg0, %arg1 : vector<4xi64> return } @@ -280,7 +280,7 @@ func @bitwise_or_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @bitwise_or_vector func @bitwise_or_vector(%arg0: vector<3xi8>, %arg1: vector<3xi8>) { - // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm<"<3 x i8>"> + // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm.vec<3 x i8> %0 = spv.BitwiseOr %arg0, %arg1 : vector<3xi8> return } @@ -298,7 +298,7 @@ func @bitwise_xor_scalar(%arg0: i32, %arg1: i32) { // CHECK-LABEL: @bitwise_xor_vector func @bitwise_xor_vector(%arg0: vector<2xi16>, %arg1: vector<2xi16>) { - // CHECK: llvm.xor %{{.*}}, %{{.*}} : !llvm<"<2 x i16>"> + // CHECK: llvm.xor %{{.*}}, %{{.*}} : !llvm.vec<2 x i16> %0 = spv.BitwiseXor %arg0, %arg1 : vector<2xi16> return } @@ -317,8 +317,8 @@ func @not_scalar(%arg0: i32) { // CHECK-LABEL: @not_vector func @not_vector(%arg0: vector<2xi16>) { - // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi16>) : !llvm<"<2 x i16>"> - // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm<"<2 x i16>"> + // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense<-1> : vector<2xi16>) : !llvm.vec<2 x i16> + // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm.vec<2 x i16> %0 = spv.Not %arg0 : vector<2xi16> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir index a0434aad3bea7..5071783073c27 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/cast-ops-to-llvm.mlir @@ -13,35 +13,35 @@ func @bitcast_float_to_integer_scalar(%arg0 : f32) { // CHECK-LABEL: @bitcast_float_to_integer_vector func @bitcast_float_to_integer_vector(%arg0 : vector<3xf32>) { - // CHECK: llvm.bitcast {{.*}} : !llvm<"<3 x float>"> to !llvm<"<3 x i32>"> + // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm.vec<3 x float> to !llvm.vec<3 x i32> %0 = spv.Bitcast %arg0: vector<3xf32> to vector<3xi32> return } // CHECK-LABEL: @bitcast_vector_to_scalar func @bitcast_vector_to_scalar(%arg0 : vector<2xf32>) { - // CHECK: llvm.bitcast {{.*}} : !llvm<"<2 x float>"> to !llvm.i64 + // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm.vec<2 x float> to !llvm.i64 %0 = spv.Bitcast %arg0: vector<2xf32> to i64 return } // CHECK-LABEL: @bitcast_scalar_to_vector func @bitcast_scalar_to_vector(%arg0 : f64) { - // CHECK: llvm.bitcast {{.*}} : !llvm.double to !llvm<"<2 x i32>"> + // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm.double to !llvm.vec<2 x i32> %0 = spv.Bitcast %arg0: f64 to vector<2xi32> return } // CHECK-LABEL: @bitcast_vector_to_vector func @bitcast_vector_to_vector(%arg0 : vector<4xf32>) { - // CHECK: llvm.bitcast {{.*}} : !llvm<"<4 x float>"> to !llvm<"<2 x i64>"> + // CHECK: {{.*}} = llvm.bitcast {{.*}} : !llvm.vec<4 x float> to !llvm.vec<2 x i64> %0 = spv.Bitcast %arg0: vector<4xf32> to vector<2xi64> return } // CHECK-LABEL: @bitcast_pointer func @bitcast_pointer(%arg0: !spv.ptr) { - // CHECK: llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"i32*"> + // CHECK: llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr %0 = spv.Bitcast %arg0 : !spv.ptr to !spv.ptr return } @@ -59,8 +59,8 @@ func @convert_float_to_signed_scalar(%arg0: f32) { // CHECK-LABEL: @convert_float_to_signed_vector func @convert_float_to_signed_vector(%arg0: vector<2xf32>) { - // CHECK: llvm.fptosi %{{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x i32>"> - %0 = spv.ConvertFToS %arg0: vector<2xf32> to vector<2xi32> + // CHECK: llvm.fptosi %{{.*}} : !llvm.vec<2 x float> to !llvm.vec<2 x i32> + %0 = spv.ConvertFToS %arg0: vector<2xf32> to vector<2xi32> return } @@ -77,8 +77,8 @@ func @convert_float_to_unsigned_scalar(%arg0: f32) { // CHECK-LABEL: @convert_float_to_unsigned_vector func @convert_float_to_unsigned_vector(%arg0: vector<2xf32>) { - // CHECK: llvm.fptoui %{{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x i32>"> - %0 = spv.ConvertFToU %arg0: vector<2xf32> to vector<2xi32> + // CHECK: llvm.fptoui %{{.*}} : !llvm.vec<2 x float> to !llvm.vec<2 x i32> + %0 = spv.ConvertFToU %arg0: vector<2xf32> to vector<2xi32> return } @@ -95,8 +95,8 @@ func @convert_signed_to_float_scalar(%arg0: i32) { // CHECK-LABEL: @convert_signed_to_float_vector func @convert_signed_to_float_vector(%arg0: vector<3xi32>) { - // CHECK: llvm.sitofp %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x float>"> - %0 = spv.ConvertSToF %arg0: vector<3xi32> to vector<3xf32> + // CHECK: llvm.sitofp %{{.*}} : !llvm.vec<3 x i32> to !llvm.vec<3 x float> + %0 = spv.ConvertSToF %arg0: vector<3xi32> to vector<3xf32> return } @@ -113,8 +113,8 @@ func @convert_unsigned_to_float_scalar(%arg0: i32) { // CHECK-LABEL: @convert_unsigned_to_float_vector func @convert_unsigned_to_float_vector(%arg0: vector<3xi32>) { - // CHECK: llvm.uitofp %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x float>"> - %0 = spv.ConvertUToF %arg0: vector<3xi32> to vector<3xf32> + // CHECK: llvm.uitofp %{{.*}} : !llvm.vec<3 x i32> to !llvm.vec<3 x float> + %0 = spv.ConvertUToF %arg0: vector<3xi32> to vector<3xf32> return } @@ -134,10 +134,10 @@ func @fconvert_scalar(%arg0: f32, %arg1: f64) { // CHECK-LABEL: @fconvert_vector func @fconvert_vector(%arg0: vector<2xf32>, %arg1: vector<2xf64>) { - // CHECK: llvm.fpext %{{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x double>"> + // CHECK: llvm.fpext %{{.*}} : !llvm.vec<2 x float> to !llvm.vec<2 x double> %0 = spv.FConvert %arg0: vector<2xf32> to vector<2xf64> - // CHECK: llvm.fptrunc %{{.*}} : !llvm<"<2 x double>"> to !llvm<"<2 x float>"> + // CHECK: llvm.fptrunc %{{.*}} : !llvm.vec<2 x double> to !llvm.vec<2 x float> %1 = spv.FConvert %arg1: vector<2xf64> to vector<2xf32> return } @@ -158,10 +158,10 @@ func @sconvert_scalar(%arg0: i32, %arg1: i64) { // CHECK-LABEL: @sconvert_vector func @sconvert_vector(%arg0: vector<3xi32>, %arg1: vector<3xi64>) { - // CHECK: llvm.sext %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x i64>"> + // CHECK: llvm.sext %{{.*}} : !llvm.vec<3 x i32> to !llvm.vec<3 x i64> %0 = spv.SConvert %arg0: vector<3xi32> to vector<3xi64> - // CHECK: llvm.trunc %{{.*}} : !llvm<"<3 x i64>"> to !llvm<"<3 x i32>"> + // CHECK: llvm.trunc %{{.*}} : !llvm.vec<3 x i64> to !llvm.vec<3 x i32> %1 = spv.SConvert %arg1: vector<3xi64> to vector<3xi32> return } @@ -182,10 +182,10 @@ func @uconvert_scalar(%arg0: i32, %arg1: i64) { // CHECK-LABEL: @uconvert_vector func @uconvert_vector(%arg0: vector<3xi32>, %arg1: vector<3xi64>) { - // CHECK: llvm.zext %{{.*}} : !llvm<"<3 x i32>"> to !llvm<"<3 x i64>"> + // CHECK: llvm.zext %{{.*}} : !llvm.vec<3 x i32> to !llvm.vec<3 x i64> %0 = spv.UConvert %arg0: vector<3xi32> to vector<3xi64> - // CHECK: llvm.trunc %{{.*}} : !llvm<"<3 x i64>"> to !llvm<"<3 x i32>"> + // CHECK: llvm.trunc %{{.*}} : !llvm.vec<3 x i64> to !llvm.vec<3 x i32> %1 = spv.UConvert %arg1: vector<3xi64> to vector<3xi32> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir index bcfe234f7080d..0592d59ba6eef 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/comparison-ops-to-llvm.mlir @@ -13,7 +13,7 @@ func @i_equal_scalar(%arg0: i32, %arg1: i32) { // CHECK-LABEL: @i_equal_vector func @i_equal_vector(%arg0: vector<4xi64>, %arg1: vector<4xi64>) { - // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> %0 = spv.IEqual %arg0, %arg1 : vector<4xi64> return } @@ -31,7 +31,7 @@ func @i_not_equal_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @i_not_equal_vector func @i_not_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.INotEqual %arg0, %arg1 : vector<2xi64> return } @@ -49,7 +49,7 @@ func @s_greater_than_equal_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @s_greater_than_equal_vector func @s_greater_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.icmp "sge" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.icmp "sge" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.SGreaterThanEqual %arg0, %arg1 : vector<2xi64> return } @@ -67,7 +67,7 @@ func @s_greater_than_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @s_greater_than_vector func @s_greater_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.icmp "sgt" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.icmp "sgt" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.SGreaterThan %arg0, %arg1 : vector<2xi64> return } @@ -85,7 +85,7 @@ func @s_less_than_equal_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @s_less_than_equal_vector func @s_less_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.icmp "sle" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.icmp "sle" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.SLessThanEqual %arg0, %arg1 : vector<2xi64> return } @@ -103,7 +103,7 @@ func @s_less_than_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @s_less_than_vector func @s_less_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.icmp "slt" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.icmp "slt" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.SLessThan %arg0, %arg1 : vector<2xi64> return } @@ -121,7 +121,7 @@ func @u_greater_than_equal_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @u_greater_than_equal_vector func @u_greater_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.icmp "uge" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.icmp "uge" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.UGreaterThanEqual %arg0, %arg1 : vector<2xi64> return } @@ -139,7 +139,7 @@ func @u_greater_than_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @u_greater_than_vector func @u_greater_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.icmp "ugt" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.icmp "ugt" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.UGreaterThan %arg0, %arg1 : vector<2xi64> return } @@ -157,7 +157,7 @@ func @u_less_than_equal_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @u_less_than_equal_vector func @u_less_than_equal_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.icmp "ule" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.icmp "ule" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.ULessThanEqual %arg0, %arg1 : vector<2xi64> return } @@ -175,7 +175,7 @@ func @u_less_than_scalar(%arg0: i64, %arg1: i64) { // CHECK-LABEL: @u_less_than_vector func @u_less_than_vector(%arg0: vector<2xi64>, %arg1: vector<2xi64>) { - // CHECK: llvm.icmp "ult" %{{.*}}, %{{.*}} : !llvm<"<2 x i64>"> + // CHECK: llvm.icmp "ult" %{{.*}}, %{{.*}} : !llvm.vec<2 x i64> %0 = spv.ULessThan %arg0, %arg1 : vector<2xi64> return } @@ -193,7 +193,7 @@ func @f_ord_equal_scalar(%arg0: f32, %arg1: f32) { // CHECK-LABEL: @f_ord_equal_vector func @f_ord_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) { - // CHECK: llvm.fcmp "oeq" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> + // CHECK: llvm.fcmp "oeq" %{{.*}}, %{{.*}} : !llvm.vec<4 x double> %0 = spv.FOrdEqual %arg0, %arg1 : vector<4xf64> return } @@ -211,9 +211,9 @@ func @f_ord_greater_than_equal_scalar(%arg0: f64, %arg1: f64) { // CHECK-LABEL: @f_ord_greater_than_equal_vector func @f_ord_greater_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: llvm.fcmp "oge" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> - %0 = spv.FOrdGreaterThanEqual %arg0, %arg1 : vector<2xf64> - return + // CHECK: llvm.fcmp "oge" %{{.*}}, %{{.*}} : !llvm.vec<2 x double> + %0 = spv.FOrdGreaterThanEqual %arg0, %arg1 : vector<2xf64> + return } //===----------------------------------------------------------------------===// @@ -229,7 +229,7 @@ func @f_ord_greater_than_scalar(%arg0: f64, %arg1: f64) { // CHECK-LABEL: @f_ord_greater_than_vector func @f_ord_greater_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: llvm.fcmp "ogt" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + // CHECK: llvm.fcmp "ogt" %{{.*}}, %{{.*}} : !llvm.vec<2 x double> %0 = spv.FOrdGreaterThan %arg0, %arg1 : vector<2xf64> return } @@ -247,7 +247,7 @@ func @f_ord_less_than_scalar(%arg0: f64, %arg1: f64) { // CHECK-LABEL: @f_ord_less_than_vector func @f_ord_less_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: llvm.fcmp "olt" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + // CHECK: llvm.fcmp "olt" %{{.*}}, %{{.*}} : !llvm.vec<2 x double> %0 = spv.FOrdLessThan %arg0, %arg1 : vector<2xf64> return } @@ -265,7 +265,7 @@ func @f_ord_less_than_equal_scalar(%arg0: f64, %arg1: f64) { // CHECK-LABEL: @f_ord_less_than_equal_vector func @f_ord_less_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: llvm.fcmp "ole" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + // CHECK: llvm.fcmp "ole" %{{.*}}, %{{.*}} : !llvm.vec<2 x double> %0 = spv.FOrdLessThanEqual %arg0, %arg1 : vector<2xf64> return } @@ -283,7 +283,7 @@ func @f_ord_not_equal_scalar(%arg0: f32, %arg1: f32) { // CHECK-LABEL: @f_ord_not_equal_vector func @f_ord_not_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) { - // CHECK: llvm.fcmp "one" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> + // CHECK: llvm.fcmp "one" %{{.*}}, %{{.*}} : !llvm.vec<4 x double> %0 = spv.FOrdNotEqual %arg0, %arg1 : vector<4xf64> return } @@ -301,7 +301,7 @@ func @f_unord_equal_scalar(%arg0: f32, %arg1: f32) { // CHECK-LABEL: @f_unord_equal_vector func @f_unord_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) { - // CHECK: llvm.fcmp "ueq" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> + // CHECK: llvm.fcmp "ueq" %{{.*}}, %{{.*}} : !llvm.vec<4 x double> %0 = spv.FUnordEqual %arg0, %arg1 : vector<4xf64> return } @@ -319,7 +319,7 @@ func @f_unord_greater_than_equal_scalar(%arg0: f64, %arg1: f64) { // CHECK-LABEL: @f_unord_greater_than_equal_vector func @f_unord_greater_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: llvm.fcmp "uge" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + // CHECK: llvm.fcmp "uge" %{{.*}}, %{{.*}} : !llvm.vec<2 x double> %0 = spv.FUnordGreaterThanEqual %arg0, %arg1 : vector<2xf64> return } @@ -337,7 +337,7 @@ func @f_unord_greater_than_scalar(%arg0: f64, %arg1: f64) { // CHECK-LABEL: @f_unord_greater_than_vector func @f_unord_greater_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: llvm.fcmp "ugt" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + // CHECK: llvm.fcmp "ugt" %{{.*}}, %{{.*}} : !llvm.vec<2 x double> %0 = spv.FUnordGreaterThan %arg0, %arg1 : vector<2xf64> return } @@ -355,7 +355,7 @@ func @f_unord_less_than_scalar(%arg0: f64, %arg1: f64) { // CHECK-LABEL: @f_unord_less_than_vector func @f_unord_less_than_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: llvm.fcmp "ult" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + // CHECK: llvm.fcmp "ult" %{{.*}}, %{{.*}} : !llvm.vec<2 x double> %0 = spv.FUnordLessThan %arg0, %arg1 : vector<2xf64> return } @@ -373,7 +373,7 @@ func @f_unord_less_than_equal_scalar(%arg0: f64, %arg1: f64) { // CHECK-LABEL: @f_unord_less_than_equal_vector func @f_unord_less_than_equal_vector(%arg0: vector<2xf64>, %arg1: vector<2xf64>) { - // CHECK: llvm.fcmp "ule" %{{.*}}, %{{.*}} : !llvm<"<2 x double>"> + // CHECK: llvm.fcmp "ule" %{{.*}}, %{{.*}} : !llvm.vec<2 x double> %0 = spv.FUnordLessThanEqual %arg0, %arg1 : vector<2xf64> return } @@ -391,7 +391,7 @@ func @f_unord_not_equal_scalar(%arg0: f32, %arg1: f32) { // CHECK-LABEL: @f_unord_not_equal_vector func @f_unord_not_equal_vector(%arg0: vector<4xf64>, %arg1: vector<4xf64>) { - // CHECK: llvm.fcmp "une" %{{.*}}, %{{.*}} : !llvm<"<4 x double>"> + // CHECK: llvm.fcmp "une" %{{.*}}, %{{.*}} : !llvm.vec<4 x double> %0 = spv.FUnordNotEqual %arg0, %arg1 : vector<4xf64> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir index b9605e7918577..dc84f404906c7 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir @@ -13,9 +13,9 @@ func @bool_constant_scalar() { } func @bool_constant_vector() { - // CHECK: {{.*}} = llvm.mlir.constant(dense<[true, false]> : vector<2xi1>) : !llvm<"<2 x i1>"> + // CHECK: {{.*}} = llvm.mlir.constant(dense<[true, false]> : vector<2xi1>) : !llvm.vec<2 x i1> %0 = constant dense<[true, false]> : vector<2xi1> - // CHECK: {{.*}} = llvm.mlir.constant(dense : vector<3xi1>) : !llvm<"<3 x i1>"> + // CHECK: {{.*}} = llvm.mlir.constant(dense : vector<3xi1>) : !llvm.vec<3 x i1> %1 = constant dense : vector<3xi1> return } @@ -31,11 +31,11 @@ func @integer_constant_scalar() { } func @integer_constant_vector() { - // CHECK: {{.*}} = llvm.mlir.constant(dense<[2, 3]> : vector<2xi32>) : !llvm<"<2 x i32>"> + // CHECK: {{.*}} = llvm.mlir.constant(dense<[2, 3]> : vector<2xi32>) : !llvm.vec<2 x i32> %0 = spv.constant dense<[2, 3]> : vector<2xi32> - // CHECK: {{.*}} = llvm.mlir.constant(dense<-4> : vector<2xi32>) : !llvm<"<2 x i32>"> + // CHECK: {{.*}} = llvm.mlir.constant(dense<-4> : vector<2xi32>) : !llvm.vec<2 x i32> %1 = spv.constant dense<-4> : vector<2xsi32> - // CHECK: {{.*}} = llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : !llvm<"<3 x i32>"> + // CHECK: {{.*}} = llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : !llvm.vec<3 x i32> %2 = spv.constant dense<[2, 3, 4]> : vector<3xui32> return } @@ -49,7 +49,7 @@ func @float_constant_scalar() { } func @float_constant_vector() { - // CHECK: {{.*}} = llvm.mlir.constant(dense<[2.000000e+00, 3.000000e+00]> : vector<2xf32>) : !llvm<"<2 x float>"> + // CHECK: {{.*}} = llvm.mlir.constant(dense<[2.000000e+00, 3.000000e+00]> : vector<2xf32>) : !llvm.vec<2 x float> %0 = spv.constant dense<[2.000000e+00, 3.000000e+00]> : vector<2xf32> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/func-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/func-to-llvm.mlir index 23174f34fcecf..7e7820dee6bef 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/func-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/func-to-llvm.mlir @@ -52,7 +52,7 @@ spv.func @scalar_types(%arg0: i32, %arg1: i1, %arg2: f64, %arg3: f32) -> () "Non spv.Return } -// CHECK-LABEL: llvm.func @vector_types(%arg0: !llvm<"<2 x i64>">, %arg1: !llvm<"<2 x i64>">) -> !llvm<"<2 x i64>"> +// CHECK-LABEL: llvm.func @vector_types(%arg0: !llvm.vec<2 x i64>, %arg1: !llvm.vec<2 x i64>) -> !llvm.vec<2 x i64> spv.func @vector_types(%arg0: vector<2xi64>, %arg1: vector<2xi64>) -> vector<2xi64> "None" { %0 = spv.IAdd %arg0, %arg1 : vector<2xi64> spv.ReturnValue %0 : vector<2xi64> @@ -63,15 +63,15 @@ spv.func @vector_types(%arg0: vector<2xi64>, %arg1: vector<2xi64>) -> vector<2xi //===----------------------------------------------------------------------===// // CHECK-LABEL: llvm.func @function_calls -// CHECK-SAME: %[[ARG0:.*]]: !llvm.i32, %[[ARG1:.*]]: !llvm.i1, %[[ARG2:.*]]: !llvm.double, %[[ARG3:.*]]: !llvm<"<2 x i64>">, %[[ARG4:.*]]: !llvm<"<2 x float>"> +// CHECK-SAME: %[[ARG0:.*]]: !llvm.i32, %[[ARG1:.*]]: !llvm.i1, %[[ARG2:.*]]: !llvm.double, %[[ARG3:.*]]: !llvm.vec<2 x i64>, %[[ARG4:.*]]: !llvm.vec<2 x float> spv.func @function_calls(%arg0: i32, %arg1: i1, %arg2: f64, %arg3: vector<2xi64>, %arg4: vector<2xf32>) -> () "None" { // CHECK: llvm.call @void_1() : () -> () spv.FunctionCall @void_1() : () -> () - // CHECK: llvm.call @void_2(%[[ARG3]]) : (!llvm<"<2 x i64>">) -> () + // CHECK: llvm.call @void_2(%[[ARG3]]) : (!llvm.vec<2 x i64>) -> () spv.FunctionCall @void_2(%arg3) : (vector<2xi64>) -> () // CHECK: %{{.*}} = llvm.call @value_scalar(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!llvm.i32, !llvm.i1, !llvm.double) -> !llvm.i32 %0 = spv.FunctionCall @value_scalar(%arg0, %arg1, %arg2) : (i32, i1, f64) -> i32 - // CHECK: %{{.*}} = llvm.call @value_vector(%[[ARG3]], %[[ARG4]]) : (!llvm<"<2 x i64>">, !llvm<"<2 x float>">) -> !llvm<"<2 x float>"> + // CHECK: %{{.*}} = llvm.call @value_vector(%[[ARG3]], %[[ARG4]]) : (!llvm.vec<2 x i64>, !llvm.vec<2 x float>) -> !llvm.vec<2 x float> %1 = spv.FunctionCall @value_vector(%arg3, %arg4) : (vector<2xi64>, vector<2xf32>) -> vector<2xf32> spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/glsl-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/glsl-ops-to-llvm.mlir index ecb9843d64fdd..0cb975da56532 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/glsl-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/glsl-ops-to-llvm.mlir @@ -6,11 +6,11 @@ // CHECK-LABEL: @ceil func @ceil(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.ceil"(%{{.*}}) : (!llvm.float) -> !llvm.float - %0 = spv.GLSL.Ceil %arg0 : f32 - // CHECK: "llvm.intr.ceil"(%{{.*}}) : (!llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.Ceil %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.ceil"(%{{.*}}) : (!llvm.float) -> !llvm.float + %0 = spv.GLSL.Ceil %arg0 : f32 + // CHECK: "llvm.intr.ceil"(%{{.*}}) : (!llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.Ceil %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -19,11 +19,11 @@ func @ceil(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @cos func @cos(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.cos"(%{{.*}}) : (!llvm.float) -> !llvm.float - %0 = spv.GLSL.Cos %arg0 : f32 - // CHECK: "llvm.intr.cos"(%{{.*}}) : (!llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.Cos %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.cos"(%{{.*}}) : (!llvm.float) -> !llvm.float + %0 = spv.GLSL.Cos %arg0 : f32 + // CHECK: "llvm.intr.cos"(%{{.*}}) : (!llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.Cos %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -32,11 +32,11 @@ func @cos(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @exp func @exp(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.exp"(%{{.*}}) : (!llvm.float) -> !llvm.float - %0 = spv.GLSL.Exp %arg0 : f32 - // CHECK: "llvm.intr.exp"(%{{.*}}) : (!llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.Exp %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.exp"(%{{.*}}) : (!llvm.float) -> !llvm.float + %0 = spv.GLSL.Exp %arg0 : f32 + // CHECK: "llvm.intr.exp"(%{{.*}}) : (!llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.Exp %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -45,11 +45,11 @@ func @exp(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @fabs func @fabs(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.fabs"(%{{.*}}) : (!llvm.float) -> !llvm.float - %0 = spv.GLSL.FAbs %arg0 : f32 - // CHECK: "llvm.intr.fabs"(%{{.*}}) : (!llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.FAbs %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.fabs"(%{{.*}}) : (!llvm.float) -> !llvm.float + %0 = spv.GLSL.FAbs %arg0 : f32 + // CHECK: "llvm.intr.fabs"(%{{.*}}) : (!llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.FAbs %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -58,11 +58,11 @@ func @fabs(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @floor func @floor(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.floor"(%{{.*}}) : (!llvm.float) -> !llvm.float - %0 = spv.GLSL.Floor %arg0 : f32 - // CHECK: "llvm.intr.floor"(%{{.*}}) : (!llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.Floor %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.floor"(%{{.*}}) : (!llvm.float) -> !llvm.float + %0 = spv.GLSL.Floor %arg0 : f32 + // CHECK: "llvm.intr.floor"(%{{.*}}) : (!llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.Floor %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -71,11 +71,11 @@ func @floor(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @fmax func @fmax(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (!llvm.float, !llvm.float) -> !llvm.float - %0 = spv.GLSL.FMax %arg0, %arg0 : f32 - // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (!llvm<"<3 x half>">, !llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.FMax %arg1, %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (!llvm.float, !llvm.float) -> !llvm.float + %0 = spv.GLSL.FMax %arg0, %arg0 : f32 + // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (!llvm.vec<3 x half>, !llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.FMax %arg1, %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -84,11 +84,11 @@ func @fmax(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @fmin func @fmin(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (!llvm.float, !llvm.float) -> !llvm.float - %0 = spv.GLSL.FMin %arg0, %arg0 : f32 - // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (!llvm<"<3 x half>">, !llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.FMin %arg1, %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (!llvm.float, !llvm.float) -> !llvm.float + %0 = spv.GLSL.FMin %arg0, %arg0 : f32 + // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (!llvm.vec<3 x half>, !llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.FMin %arg1, %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -97,11 +97,11 @@ func @fmin(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @log func @log(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.log"(%{{.*}}) : (!llvm.float) -> !llvm.float - %0 = spv.GLSL.Log %arg0 : f32 - // CHECK: "llvm.intr.log"(%{{.*}}) : (!llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.Log %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.log"(%{{.*}}) : (!llvm.float) -> !llvm.float + %0 = spv.GLSL.Log %arg0 : f32 + // CHECK: "llvm.intr.log"(%{{.*}}) : (!llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.Log %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -110,11 +110,11 @@ func @log(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @sin func @sin(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.sin"(%{{.*}}) : (!llvm.float) -> !llvm.float - %0 = spv.GLSL.Sin %arg0 : f32 - // CHECK: "llvm.intr.sin"(%{{.*}}) : (!llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.Sin %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.sin"(%{{.*}}) : (!llvm.float) -> !llvm.float + %0 = spv.GLSL.Sin %arg0 : f32 + // CHECK: "llvm.intr.sin"(%{{.*}}) : (!llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.Sin %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -123,11 +123,11 @@ func @sin(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @smax func @smax(%arg0: i16, %arg1: vector<3xi32>) { - // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (!llvm.i16, !llvm.i16) -> !llvm.i16 - %0 = spv.GLSL.SMax %arg0, %arg0 : i16 - // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (!llvm<"<3 x i32>">, !llvm<"<3 x i32>">) -> !llvm<"<3 x i32>"> - %1 = spv.GLSL.SMax %arg1, %arg1 : vector<3xi32> - return + // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (!llvm.i16, !llvm.i16) -> !llvm.i16 + %0 = spv.GLSL.SMax %arg0, %arg0 : i16 + // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (!llvm.vec<3 x i32>, !llvm.vec<3 x i32>) -> !llvm.vec<3 x i32> + %1 = spv.GLSL.SMax %arg1, %arg1 : vector<3xi32> + return } //===----------------------------------------------------------------------===// @@ -136,11 +136,11 @@ func @smax(%arg0: i16, %arg1: vector<3xi32>) { // CHECK-LABEL: @smin func @smin(%arg0: i16, %arg1: vector<3xi32>) { - // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (!llvm.i16, !llvm.i16) -> !llvm.i16 - %0 = spv.GLSL.SMin %arg0, %arg0 : i16 - // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (!llvm<"<3 x i32>">, !llvm<"<3 x i32>">) -> !llvm<"<3 x i32>"> - %1 = spv.GLSL.SMin %arg1, %arg1 : vector<3xi32> - return + // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (!llvm.i16, !llvm.i16) -> !llvm.i16 + %0 = spv.GLSL.SMin %arg0, %arg0 : i16 + // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (!llvm.vec<3 x i32>, !llvm.vec<3 x i32>) -> !llvm.vec<3 x i32> + %1 = spv.GLSL.SMin %arg1, %arg1 : vector<3xi32> + return } //===----------------------------------------------------------------------===// @@ -149,11 +149,11 @@ func @smin(%arg0: i16, %arg1: vector<3xi32>) { // CHECK-LABEL: @sqrt func @sqrt(%arg0: f32, %arg1: vector<3xf16>) { - // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (!llvm.float) -> !llvm.float - %0 = spv.GLSL.Sqrt %arg0 : f32 - // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (!llvm<"<3 x half>">) -> !llvm<"<3 x half>"> - %1 = spv.GLSL.Sqrt %arg1 : vector<3xf16> - return + // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (!llvm.float) -> !llvm.float + %0 = spv.GLSL.Sqrt %arg0 : f32 + // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (!llvm.vec<3 x half>) -> !llvm.vec<3 x half> + %1 = spv.GLSL.Sqrt %arg1 : vector<3xf16> + return } //===----------------------------------------------------------------------===// @@ -162,11 +162,11 @@ func @sqrt(%arg0: f32, %arg1: vector<3xf16>) { // CHECK-LABEL: @tan func @tan(%arg0: f32) { - // CHECK: %[[SIN:.*]] = "llvm.intr.sin"(%{{.*}}) : (!llvm.float) -> !llvm.float + // CHECK: %[[SIN:.*]] = "llvm.intr.sin"(%{{.*}}) : (!llvm.float) -> !llvm.float // CHECK: %[[COS:.*]] = "llvm.intr.cos"(%{{.*}}) : (!llvm.float) -> !llvm.float - // CHECK: llvm.fdiv %[[SIN]], %[[COS]] : !llvm.float - %0 = spv.GLSL.Tan %arg0 : f32 - return + // CHECK: llvm.fdiv %[[SIN]], %[[COS]] : !llvm.float + %0 = spv.GLSL.Tan %arg0 : f32 + return } //===----------------------------------------------------------------------===// @@ -175,15 +175,15 @@ func @tan(%arg0: f32) { // CHECK-LABEL: @tanh func @tanh(%arg0: f32) { - // CHECK: %[[TWO:.*]] = llvm.mlir.constant(2.000000e+00 : f32) : !llvm.float + // CHECK: %[[TWO:.*]] = llvm.mlir.constant(2.000000e+00 : f32) : !llvm.float // CHECK: %[[X2:.*]] = llvm.fmul %[[TWO]], %{{.*}} : !llvm.float // CHECK: %[[EXP:.*]] = "llvm.intr.exp"(%[[X2]]) : (!llvm.float) -> !llvm.float // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float // CHECK: %[[T0:.*]] = llvm.fsub %[[EXP]], %[[ONE]] : !llvm.float // CHECK: %[[T1:.*]] = llvm.fadd %[[EXP]], %[[ONE]] : !llvm.float // CHECK: llvm.fdiv %[[T0]], %[[T1]] : !llvm.float - %0 = spv.GLSL.Tanh %arg0 : f32 - return + %0 = spv.GLSL.Tanh %arg0 : f32 + return } //===----------------------------------------------------------------------===// @@ -192,9 +192,9 @@ func @tanh(%arg0: f32) { // CHECK-LABEL: @inverse_sqrt func @inverse_sqrt(%arg0: f32) { - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%{{.*}}) : (!llvm.float) -> !llvm.float - // CHECK: llvm.fdiv %[[ONE]], %[[SQRT]] : !llvm.float - %0 = spv.GLSL.InverseSqrt %arg0 : f32 - return + // CHECK: llvm.fdiv %[[ONE]], %[[SQRT]] : !llvm.float + %0 = spv.GLSL.InverseSqrt %arg0 : f32 + return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir index a816424d5b1c3..9c9035ae89339 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/logical-ops-to-llvm.mlir @@ -13,7 +13,7 @@ func @logical_equal_scalar(%arg0: i1, %arg1: i1) { // CHECK-LABEL: @logical_equal_vector func @logical_equal_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { - // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> + // CHECK: llvm.icmp "eq" %{{.*}}, %{{.*}} : !llvm.vec<4 x i1> %0 = spv.LogicalEqual %arg0, %arg0 : vector<4xi1> return } @@ -31,7 +31,7 @@ func @logical_not_equal_scalar(%arg0: i1, %arg1: i1) { // CHECK-LABEL: @logical_not_equal_vector func @logical_not_equal_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { - // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> + // CHECK: llvm.icmp "ne" %{{.*}}, %{{.*}} : !llvm.vec<4 x i1> %0 = spv.LogicalNotEqual %arg0, %arg0 : vector<4xi1> return } @@ -50,8 +50,8 @@ func @logical_not_scalar(%arg0: i1) { // CHECK-LABEL: @logical_not_vector func @logical_not_vector(%arg0: vector<4xi1>) { - // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense : vector<4xi1>) : !llvm<"<4 x i1>"> - // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm<"<4 x i1>"> + // CHECK: %[[CONST:.*]] = llvm.mlir.constant(dense : vector<4xi1>) : !llvm.vec<4 x i1> + // CHECK: llvm.xor %{{.*}}, %[[CONST]] : !llvm.vec<4 x i1> %0 = spv.LogicalNot %arg0 : vector<4xi1> return } @@ -69,7 +69,7 @@ func @logical_and_scalar(%arg0: i1, %arg1: i1) { // CHECK-LABEL: @logical_and_vector func @logical_and_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { - // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> + // CHECK: llvm.and %{{.*}}, %{{.*}} : !llvm.vec<4 x i1> %0 = spv.LogicalAnd %arg0, %arg0 : vector<4xi1> return } @@ -87,7 +87,7 @@ func @logical_or_scalar(%arg0: i1, %arg1: i1) { // CHECK-LABEL: @logical_or_vector func @logical_or_vector(%arg0: vector<4xi1>, %arg1: vector<4xi1>) { - // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm<"<4 x i1>"> + // CHECK: llvm.or %{{.*}}, %{{.*}} : !llvm.vec<4 x i1> %0 = spv.LogicalOr %arg0, %arg0 : vector<4xi1> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir index f717948f8069d..6dafab982e35b 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir @@ -6,35 +6,35 @@ func @load() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load %{{.*}} : !llvm<"float*"> + // CHECK: %{{.*}} = llvm.load %{{.*}} : !llvm.ptr %1 = spv.Load "Function" %0 : f32 return } func @load_none() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load %{{.*}} : !llvm<"float*"> + // CHECK: %{{.*}} = llvm.load %{{.*}} : !llvm.ptr %1 = spv.Load "Function" %0 ["None"] : f32 return } func @load_with_alignment() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load %{{.*}} {alignment = 4 : i64} : !llvm<"float*"> + // CHECK: %{{.*}} = llvm.load %{{.*}} {alignment = 4 : i64} : !llvm.ptr %1 = spv.Load "Function" %0 ["Aligned", 4] : f32 return } func @load_volatile() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load volatile %{{.*}} : !llvm<"float*"> + // CHECK: %{{.*}} = llvm.load volatile %{{.*}} : !llvm.ptr %1 = spv.Load "Function" %0 ["Volatile"] : f32 return } func @load_nontemporal() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load %{{.*}} {nontemporal} : !llvm<"float*"> + // CHECK: %{{.*}} = llvm.load %{{.*}} {nontemporal} : !llvm.ptr %1 = spv.Load "Function" %0 ["Nontemporal"] : f32 return } @@ -45,35 +45,35 @@ func @load_nontemporal() { func @store(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr - // CHECK: llvm.store %{{.*}}, %{{.*}} : !llvm<"float*"> + // CHECK: llvm.store %{{.*}}, %{{.*}} : !llvm.ptr spv.Store "Function" %0, %arg0 : f32 return } func @store_composite(%arg0 : !spv.struct) -> () { %0 = spv.Variable : !spv.ptr, Function> - // CHECK: llvm.store %{{.*}}, %{{.*}} : !llvm<"<{ double }>*"> + // CHECK: llvm.store %{{.*}}, %{{.*}} : !llvm.ptr> spv.Store "Function" %0, %arg0 : !spv.struct return } func @store_with_alignment(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr - // CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 4 : i64} : !llvm<"float*"> + // CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 4 : i64} : !llvm.ptr spv.Store "Function" %0, %arg0 ["Aligned", 4] : f32 return } func @store_volatile(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr - // CHECK: llvm.store volatile %{{.*}}, %{{.*}} : !llvm<"float*"> + // CHECK: llvm.store volatile %{{.*}}, %{{.*}} : !llvm.ptr spv.Store "Function" %0, %arg0 ["Volatile"] : f32 return } func @store_nontemporal(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr - // CHECK: llvm.store %{{.*}}, %{{.*}} {nontemporal} : !llvm<"float*"> + // CHECK: llvm.store %{{.*}}, %{{.*}} {nontemporal} : !llvm.ptr spv.Store "Function" %0, %arg0 ["Nontemporal"] : f32 return } @@ -83,45 +83,45 @@ func @store_nontemporal(%arg0 : f32) -> () { //===----------------------------------------------------------------------===// func @variable_scalar() { - // CHECK: %[[SIZE1:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %{{.*}} = llvm.alloca %[[SIZE1]] x !llvm.float : (!llvm.i32) -> !llvm<"float*"> - %0 = spv.Variable : !spv.ptr - // CHECK: %[[SIZE2:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %{{.*}} = llvm.alloca %[[SIZE2]] x !llvm.i8 : (!llvm.i32) -> !llvm<"i8*"> - %1 = spv.Variable : !spv.ptr + // CHECK: %[[SIZE1:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %{{.*}} = llvm.alloca %[[SIZE1]] x !llvm.float : (!llvm.i32) -> !llvm.ptr + %0 = spv.Variable : !spv.ptr + // CHECK: %[[SIZE2:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %{{.*}} = llvm.alloca %[[SIZE2]] x !llvm.i8 : (!llvm.i32) -> !llvm.ptr + %1 = spv.Variable : !spv.ptr return } func @variable_scalar_with_initialization() { - // CHECK: %[[VALUE:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 - // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[ALLOCATED:.*]] = llvm.alloca %[[SIZE]] x !llvm.i64 : (!llvm.i32) -> !llvm<"i64*"> - // CHECK: llvm.store %[[VALUE]], %[[ALLOCATED]] : !llvm<"i64*"> - %c = spv.constant 0 : i64 - %0 = spv.Variable init(%c) : !spv.ptr + // CHECK: %[[VALUE:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %[[ALLOCATED:.*]] = llvm.alloca %[[SIZE]] x !llvm.i64 : (!llvm.i32) -> !llvm.ptr + // CHECK: llvm.store %[[VALUE]], %[[ALLOCATED]] : !llvm.ptr + %c = spv.constant 0 : i64 + %0 = spv.Variable init(%c) : !spv.ptr return } func @variable_vector() { - // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %{{.*}} = llvm.alloca %[[SIZE]] x !llvm<"<3 x float>"> : (!llvm.i32) -> !llvm<"<3 x float>*"> - %0 = spv.Variable : !spv.ptr, Function> - return + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %{{.*}} = llvm.alloca %[[SIZE]] x !llvm.vec<3 x float> : (!llvm.i32) -> !llvm.ptr> + %0 = spv.Variable : !spv.ptr, Function> + return } func @variable_vector_with_initialization() { - // CHECK: %[[VALUE:.*]] = llvm.mlir.constant(dense : vector<3xi1>) : !llvm<"<3 x i1>"> - // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %[[ALLOCATED:.*]] = llvm.alloca %[[SIZE]] x !llvm<"<3 x i1>"> : (!llvm.i32) -> !llvm<"<3 x i1>*"> - // CHECK: llvm.store %[[VALUE]], %[[ALLOCATED]] : !llvm<"<3 x i1>*"> - %c = spv.constant dense : vector<3xi1> - %0 = spv.Variable init(%c) : !spv.ptr, Function> + // CHECK: %[[VALUE:.*]] = llvm.mlir.constant(dense : vector<3xi1>) : !llvm.vec<3 x i1> + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %[[ALLOCATED:.*]] = llvm.alloca %[[SIZE]] x !llvm.vec<3 x i1> : (!llvm.i32) -> !llvm.ptr> + // CHECK: llvm.store %[[VALUE]], %[[ALLOCATED]] : !llvm.ptr> + %c = spv.constant dense : vector<3xi1> + %0 = spv.Variable init(%c) : !spv.ptr, Function> return } func @variable_array() { - // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %{{.*}} = llvm.alloca %[[SIZE]] x !llvm<"[10 x i32]"> : (!llvm.i32) -> !llvm<"[10 x i32]*"> - %0 = spv.Variable : !spv.ptr, Function> - return + // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + // CHECK: %{{.*}} = llvm.alloca %[[SIZE]] x !llvm.array<10 x i32> : (!llvm.i32) -> !llvm.ptr> + %0 = spv.Variable : !spv.ptr, Function> + return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir index 8f407ba8f596c..60fe79c0007e8 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir @@ -5,7 +5,7 @@ //===----------------------------------------------------------------------===// func @select_scalar(%arg0: i1, %arg1: vector<3xi32>, %arg2: f32) { - // CHECK: %{{.*}} = llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i1, !llvm<"<3 x i32>"> + // CHECK: %{{.*}} = llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i1, !llvm.vec<3 x i32> %0 = spv.Select %arg0, %arg1, %arg1 : i1, vector<3xi32> // CHECK: %{{.*}} = llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i1, !llvm.float %1 = spv.Select %arg0, %arg2, %arg2 : i1, f32 @@ -13,7 +13,7 @@ func @select_scalar(%arg0: i1, %arg1: vector<3xi32>, %arg2: f32) { } func @select_vector(%arg0: vector<2xi1>, %arg1: vector<2xi32>) { - // CHECK: %{{.*}} = llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm<"<2 x i1>">, !llvm<"<2 x i32>"> + // CHECK: %{{.*}} = llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.vec<2 x i1>, !llvm.vec<2 x i32> %0 = spv.Select %arg0, %arg1, %arg1 : vector<2xi1>, vector<2xi32> return } @@ -29,7 +29,7 @@ func @undef_scalar() { } func @undef_vector() { - // CHECK: %{{.*}} = llvm.mlir.undef : !llvm<"<2 x i32>"> + // CHECK: %{{.*}} = llvm.mlir.undef : !llvm.vec<2 x i32> %0 = spv.undef : vector<2xi32> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/shifts-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/shifts-to-llvm.mlir index 09e396d6400fd..41431ea3b3b20 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/shifts-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/shifts-to-llvm.mlir @@ -22,18 +22,18 @@ func @shift_right_arithmetic_scalar(%arg0: i32, %arg1: si32, %arg2 : i16, %arg3 } func @shift_right_arithmetic_vector(%arg0: vector<4xi64>, %arg1: vector<4xui64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) { - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> %0 = spv.ShiftRightArithmetic %arg0, %arg0 : vector<4xi64>, vector<4xi64> - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> %1 = spv.ShiftRightArithmetic %arg0, %arg1 : vector<4xi64>, vector<4xui64> - // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm<"<4 x i32>"> to !llvm<"<4 x i64>"> - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %[[EXT1:.*]]: !llvm<"<4 x i64>"> + // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %[[EXT1:.*]]: !llvm.vec<4 x i64> %2 = spv.ShiftRightArithmetic %arg0, %arg2 : vector<4xi64>, vector<4xi32> - // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm<"<4 x i32>"> to !llvm<"<4 x i64>"> - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %[[EXT2:.*]]: !llvm<"<4 x i64>"> + // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %[[EXT2:.*]]: !llvm.vec<4 x i64> %3 = spv.ShiftRightArithmetic %arg0, %arg3 : vector<4xi64>, vector<4xui32> return } @@ -60,18 +60,18 @@ func @shift_right_logical_scalar(%arg0: i32, %arg1: si32, %arg2 : si16, %arg3 : } func @shift_right_logical_vector(%arg0: vector<4xi64>, %arg1: vector<4xsi64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) { - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> %0 = spv.ShiftRightLogical %arg0, %arg0 : vector<4xi64>, vector<4xi64> - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> %1 = spv.ShiftRightLogical %arg0, %arg1 : vector<4xi64>, vector<4xsi64> - // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm<"<4 x i32>"> to !llvm<"<4 x i64>"> - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %[[EXT1:.*]]: !llvm<"<4 x i64>"> + // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %[[EXT1:.*]]: !llvm.vec<4 x i64> %2 = spv.ShiftRightLogical %arg0, %arg2 : vector<4xi64>, vector<4xi32> - // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm<"<4 x i32>"> to !llvm<"<4 x i64>"> - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %[[EXT2:.*]]: !llvm<"<4 x i64>"> + // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %[[EXT2:.*]]: !llvm.vec<4 x i64> %3 = spv.ShiftRightLogical %arg0, %arg3 : vector<4xi64>, vector<4xui32> return } @@ -98,18 +98,18 @@ func @shift_left_logical_scalar(%arg0: i32, %arg1: si32, %arg2 : i16, %arg3 : ui } func @shift_left_logical_vector(%arg0: vector<4xi64>, %arg1: vector<4xsi64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) { - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + // CHECK: %{{.*}} = llvm.shl %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> %0 = spv.ShiftLeftLogical %arg0, %arg0 : vector<4xi64>, vector<4xi64> - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %{{.*}} : !llvm<"<4 x i64>"> + // CHECK: %{{.*}} = llvm.shl %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> %1 = spv.ShiftLeftLogical %arg0, %arg1 : vector<4xi64>, vector<4xsi64> - // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm<"<4 x i32>"> to !llvm<"<4 x i64>"> - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %[[EXT1:.*]]: !llvm<"<4 x i64>"> + // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: %{{.*}} = llvm.shl %{{.*}}, %[[EXT1:.*]]: !llvm.vec<4 x i64> %2 = spv.ShiftLeftLogical %arg0, %arg2 : vector<4xi64>, vector<4xi32> - // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm<"<4 x i32>"> to !llvm<"<4 x i64>"> - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %[[EXT2:.*]]: !llvm<"<4 x i64>"> + // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: %{{.*}} = llvm.shl %{{.*}}, %[[EXT2:.*]]: !llvm.vec<4 x i64> %3 = spv.ShiftLeftLogical %arg0, %arg3 : vector<4xi64>, vector<4xui32> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir index 466f8ad4a9eac..d6618a7de7fed 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir @@ -4,35 +4,35 @@ // Array type //===----------------------------------------------------------------------===// -// CHECK-LABEL: @array(!llvm<"[16 x float]">, !llvm<"[32 x <4 x float>]">) +// CHECK-LABEL: @array(!llvm.array<16 x float>, !llvm.array<32 x vec<4 x float>>) func @array(!spv.array<16xf32>, !spv.array< 32 x vector<4xf32> >) -> () //===----------------------------------------------------------------------===// // Pointer type //===----------------------------------------------------------------------===// -// CHECK-LABEL: @pointer_scalar(!llvm<"i1*">, !llvm<"float*">) +// CHECK-LABEL: @pointer_scalar(!llvm.ptr, !llvm.ptr) func @pointer_scalar(!spv.ptr, !spv.ptr) -> () -// CHECK-LABEL: @pointer_vector(!llvm<"<4 x i32>*">) +// CHECK-LABEL: @pointer_vector(!llvm.ptr>) func @pointer_vector(!spv.ptr, Function>) -> () //===----------------------------------------------------------------------===// // Runtime array type //===----------------------------------------------------------------------===// -// CHECK-LABEL: @runtime_array_vector(!llvm<"[0 x <4 x float>]">) +// CHECK-LABEL: @runtime_array_vector(!llvm.array<0 x vec<4 x float>>) func @runtime_array_vector(!spv.rtarray< vector<4xf32> >) -> () -// CHECK-LABEL: @runtime_array_scalar(!llvm<"[0 x float]">) +// CHECK-LABEL: @runtime_array_scalar(!llvm.array<0 x float>) func @runtime_array_scalar(!spv.rtarray) -> () //===----------------------------------------------------------------------===// // Struct type //===----------------------------------------------------------------------===// -// CHECK-LABEL: @struct(!llvm<"<{ double }>">) +// CHECK-LABEL: @struct(!llvm.struct) func @struct(!spv.struct) -> () -// CHECK-LABEL: @struct_nested(!llvm<"<{ i32, <{ i64, i32 }> }>">) +// CHECK-LABEL: @struct_nested(!llvm.struct)>) func @struct_nested(!spv.struct>) diff --git a/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir b/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir index e17bf3e24225a..4cdf9d6bace87 100644 --- a/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir +++ b/mlir/test/Conversion/StandardToLLVM/calling-convention.mlir @@ -8,11 +8,11 @@ // An external function is transformed into the glue around calling an interface function. // CHECK-LABEL: @external -// CHECK: %[[ALLOC0:.*]]: !llvm<"float*">, %[[ALIGN0:.*]]: !llvm<"float*">, %[[OFFSET0:.*]]: !llvm.i64, %[[SIZE00:.*]]: !llvm.i64, %[[SIZE01:.*]]: !llvm.i64, %[[STRIDE00:.*]]: !llvm.i64, %[[STRIDE01:.*]]: !llvm.i64, -// CHECK: %[[ALLOC1:.*]]: !llvm<"float*">, %[[ALIGN1:.*]]: !llvm<"float*">, %[[OFFSET1:.*]]: !llvm.i64) +// CHECK: %[[ALLOC0:.*]]: !llvm.ptr, %[[ALIGN0:.*]]: !llvm.ptr, %[[OFFSET0:.*]]: !llvm.i64, %[[SIZE00:.*]]: !llvm.i64, %[[SIZE01:.*]]: !llvm.i64, %[[STRIDE00:.*]]: !llvm.i64, %[[STRIDE01:.*]]: !llvm.i64, +// CHECK: %[[ALLOC1:.*]]: !llvm.ptr, %[[ALIGN1:.*]]: !llvm.ptr, %[[OFFSET1:.*]]: !llvm.i64) func @external(%arg0: memref, %arg1: memref) // Populate the descriptor for arg0. - // CHECK: %[[DESC00:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC00:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESC01:.*]] = llvm.insertvalue %arg0, %[[DESC00]][0] // CHECK: %[[DESC02:.*]] = llvm.insertvalue %arg1, %[[DESC01]][1] // CHECK: %[[DESC03:.*]] = llvm.insertvalue %arg2, %[[DESC02]][2] @@ -23,18 +23,18 @@ func @external(%arg0: memref, %arg1: memref) // Allocate on stack and store to comply with C calling convention. // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) - // CHECK: %[[DESC0_ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC0_ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.store %[[DESC07]], %[[DESC0_ALLOCA]] // Populate the descriptor for arg1. - // CHECK: %[[DESC10:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64 }"> - // CHECK: %[[DESC11:.*]] = llvm.insertvalue %arg7, %[[DESC10]][0] : !llvm<"{ float*, float*, i64 }"> - // CHECK: %[[DESC12:.*]] = llvm.insertvalue %arg8, %[[DESC11]][1] : !llvm<"{ float*, float*, i64 }"> - // CHECK: %[[DESC13:.*]] = llvm.insertvalue %arg9, %[[DESC12]][2] : !llvm<"{ float*, float*, i64 }"> + // CHECK: %[[DESC10:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> + // CHECK: %[[DESC11:.*]] = llvm.insertvalue %arg7, %[[DESC10]][0] : !llvm.struct<(ptr, ptr, i64)> + // CHECK: %[[DESC12:.*]] = llvm.insertvalue %arg8, %[[DESC11]][1] : !llvm.struct<(ptr, ptr, i64)> + // CHECK: %[[DESC13:.*]] = llvm.insertvalue %arg9, %[[DESC12]][2] : !llvm.struct<(ptr, ptr, i64)> // Allocate on stack and store to comply with C calling convention. // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) - // CHECK: %[[DESC1_ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm<"{ float*, float*, i64 }"> + // CHECK: %[[DESC1_ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr, ptr, i64)> // CHECK: llvm.store %[[DESC13]], %[[DESC1_ALLOCA]] // Call the interface function. @@ -42,18 +42,18 @@ func @external(%arg0: memref, %arg1: memref) // Verify that an interface function is emitted. // CHECK-LABEL: llvm.func @_mlir_ciface_external -// CHECK: (!llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*">, !llvm<"{ float*, float*, i64 }*">) +// CHECK: (!llvm.ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>>, !llvm.ptr, ptr, i64)>>) // Verify that the return value is not affected. // CHECK-LABEL: @returner -// CHECK: -> !llvm<"{ { float*, float*, i64, [2 x i64], [2 x i64] }, { float*, float*, i64 } }"> +// CHECK: -> !llvm.struct<(struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>, struct<(ptr, ptr, i64)>)> func @returner() -> (memref, memref) // CHECK-LABEL: @caller func @caller() { %0:2 = call @returner() : () -> (memref, memref) // Extract individual values from the descriptor for the first memref. - // CHECK: %[[ALLOC0:.*]] = llvm.extractvalue %[[DESC0:.*]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[ALLOC0:.*]] = llvm.extractvalue %[[DESC0:.*]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[ALIGN0:.*]] = llvm.extractvalue %[[DESC0]][1] // CHECK: %[[OFFSET0:.*]] = llvm.extractvalue %[[DESC0]][2] // CHECK: %[[SIZE00:.*]] = llvm.extractvalue %[[DESC0]][3, 0] @@ -62,12 +62,12 @@ func @caller() { // CHECK: %[[STRIDE01:.*]] = llvm.extractvalue %[[DESC0]][4, 1] // Extract individual values from the descriptor for the second memref. - // CHECK: %[[ALLOC1:.*]] = llvm.extractvalue %[[DESC1:.*]][0] : !llvm<"{ float*, float*, i64 }"> + // CHECK: %[[ALLOC1:.*]] = llvm.extractvalue %[[DESC1:.*]][0] : !llvm.struct<(ptr, ptr, i64)> // CHECK: %[[ALIGN1:.*]] = llvm.extractvalue %[[DESC1]][1] // CHECK: %[[OFFSET1:.*]] = llvm.extractvalue %[[DESC1]][2] // Forward the values to the call. - // CHECK: llvm.call @external(%[[ALLOC0]], %[[ALIGN0]], %[[OFFSET0]], %[[SIZE00]], %[[SIZE01]], %[[STRIDE00]], %[[STRIDE01]], %[[ALLOC1]], %[[ALIGN1]], %[[OFFSET1]]) : (!llvm<"float*">, !llvm<"float*">, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm<"float*">, !llvm<"float*">, !llvm.i64) -> () + // CHECK: llvm.call @external(%[[ALLOC0]], %[[ALIGN0]], %[[OFFSET0]], %[[SIZE00]], %[[SIZE01]], %[[STRIDE00]], %[[STRIDE01]], %[[ALLOC1]], %[[ALIGN1]], %[[OFFSET1]]) : (!llvm.ptr, !llvm.ptr, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.ptr, !llvm.ptr, !llvm.i64) -> () call @external(%0#0, %0#1) : (memref, memref) -> () return } @@ -81,9 +81,9 @@ func @callee(%arg0: memref, %arg1: index) { // Verify that an interface function is emitted. // CHECK-LABEL: @_mlir_ciface_callee -// CHECK: %[[ARG0:.*]]: !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*"> +// CHECK: %[[ARG0:.*]]: !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> // Load the memref descriptor pointer. - // CHECK: %[[DESC:.*]] = llvm.load %[[ARG0]] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }*"> + // CHECK: %[[DESC:.*]] = llvm.load %[[ARG0]] : !llvm.ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>> // Extract individual components of the descriptor. // CHECK: %[[ALLOC:.*]] = llvm.extractvalue %[[DESC]][0] @@ -93,7 +93,7 @@ func @callee(%arg0: memref, %arg1: index) { // CHECK: %[[STRIDE:.*]] = llvm.extractvalue %[[DESC]][4, 0] // Forward the descriptor components to the call. - // CHECK: llvm.call @callee(%[[ALLOC]], %[[ALIGN]], %[[OFFSET]], %[[SIZE]], %[[STRIDE]], %{{.*}}) : (!llvm<"float*">, !llvm<"float*">, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64) -> () + // CHECK: llvm.call @callee(%[[ALLOC]], %[[ALIGN]], %[[OFFSET]], %[[SIZE]], %[[STRIDE]], %{{.*}}) : (!llvm.ptr, !llvm.ptr, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64) -> () // EMIT_C_ATTRIBUTE-NOT: @mlir_ciface_callee @@ -126,7 +126,7 @@ func @return_var_memref_caller(%arg0: memref<4x3xf32>) { // CHECK: %[[IDX_SIZE:.*]] = llvm.mlir.constant // CHECK: %[[DOUBLE_PTR_SIZE:.*]] = llvm.mul %[[TWO]], %[[PTR_SIZE]] - // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm<"{ i64, i8* }"> + // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm.struct<(i64, ptr)> // CHECK: %[[DOUBLE_RANK:.*]] = llvm.mul %[[TWO]], %[[RANK]] // CHECK: %[[DOUBLE_RANK_INC:.*]] = llvm.add %[[DOUBLE_RANK]], %[[ONE]] // CHECK: %[[TABLES_SIZE:.*]] = llvm.mul %[[DOUBLE_RANK_INC]], %[[IDX_SIZE]] @@ -136,8 +136,8 @@ func @return_var_memref_caller(%arg0: memref<4x3xf32>) { // CHECK: %[[SOURCE:.*]] = llvm.extractvalue %[[CALL_RES]][1] // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SOURCE]], %[[ALLOC_SIZE]], %[[FALSE]]) // CHECK: llvm.call @free(%[[SOURCE]]) - // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ i64, i8* }"> - // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm<"{ i64, i8* }"> + // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm.struct<(i64, ptr)> // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[DESC]][0] // CHECK: llvm.insertvalue %[[ALLOCA]], %[[DESC_1]][1] return @@ -148,7 +148,7 @@ func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> { // Match the construction of the unranked descriptor. // CHECK: %[[ALLOCA:.*]] = llvm.alloca // CHECK: %[[MEMORY:.*]] = llvm.bitcast %[[ALLOCA]] - // CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm<"{ i64, i8* }"> + // CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_0]][0] // CHECK: %[[DESC_2:.*]] = llvm.insertvalue %[[MEMORY]], %[[DESC_1]][1] %0 = memref_cast %arg0: memref<4x3xf32> to memref<*xf32> @@ -160,7 +160,7 @@ func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> { // CHECK: %[[IDX_SIZE:.*]] = llvm.mlir.constant // CHECK: %[[DOUBLE_PTR_SIZE:.*]] = llvm.mul %[[TWO]], %[[PTR_SIZE]] - // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_2]][0] : !llvm<"{ i64, i8* }"> + // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_2]][0] : !llvm.struct<(i64, ptr)> // CHECK: %[[DOUBLE_RANK:.*]] = llvm.mul %[[TWO]], %[[RANK]] // CHECK: %[[DOUBLE_RANK_INC:.*]] = llvm.add %[[DOUBLE_RANK]], %[[ONE]] // CHECK: %[[TABLES_SIZE:.*]] = llvm.mul %[[DOUBLE_RANK_INC]], %[[IDX_SIZE]] @@ -169,8 +169,8 @@ func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> { // CHECK: %[[ALLOCATED:.*]] = llvm.call @malloc(%[[ALLOC_SIZE]]) // CHECK: %[[SOURCE:.*]] = llvm.extractvalue %[[DESC_2]][1] // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED]], %[[SOURCE]], %[[ALLOC_SIZE]], %[[FALSE]]) - // CHECK: %[[NEW_DESC:.*]] = llvm.mlir.undef : !llvm<"{ i64, i8* }"> - // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_2]][0] : !llvm<"{ i64, i8* }"> + // CHECK: %[[NEW_DESC:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> + // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[DESC_2]][0] : !llvm.struct<(i64, ptr)> // CHECK: %[[NEW_DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[NEW_DESC]][0] // CHECK: %[[NEW_DESC_2:.*]] = llvm.insertvalue %[[ALLOCATED]], %[[NEW_DESC_1]][1] // CHECL: llvm.return %[[NEW_DESC_2]] @@ -210,7 +210,7 @@ func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memref<*x // Match the construction of the unranked descriptor. // CHECK: %[[ALLOCA:.*]] = llvm.alloca // CHECK: %[[MEMORY:.*]] = llvm.bitcast %[[ALLOCA]] - // CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm<"{ i64, i8* }"> + // CHECK: %[[DESC_0:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr)> // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_0]][0] // CHECK: %[[DESC_2:.*]] = llvm.insertvalue %[[MEMORY]], %[[DESC_1]][1] %0 = memref_cast %arg0 : memref<4x3xf32> to memref<*xf32> @@ -233,7 +233,7 @@ func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memref<*x // CHECK: %[[RES_21:.*]] = llvm.insertvalue %{{.*}}, %[[RES_2]][0] // CHECK: %[[RES_22:.*]] = llvm.insertvalue %[[ALLOCATED_2]], %[[RES_21]][1] - // CHECK: %[[RESULTS:.*]] = llvm.mlir.undef : !llvm<"{ { i64, i8* }, { i64, i8* } }"> + // CHECK: %[[RESULTS:.*]] = llvm.mlir.undef : !llvm.struct<(struct<(i64, ptr)>, struct<(i64, ptr)>)> // CHECK: %[[RESULTS_1:.*]] = llvm.insertvalue %[[RES_12]], %[[RESULTS]] // CHECK: %[[RESULTS_2:.*]] = llvm.insertvalue %[[RES_22]], %[[RESULTS_1]] // CHECK: llvm.return %[[RESULTS_2]] diff --git a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir index 32365f362d047..e5d8ca72d9ad6 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-dynamic-memref-ops.mlir @@ -2,11 +2,11 @@ // RUN: mlir-opt -convert-std-to-llvm='use-aligned-alloc=1' %s | FileCheck %s --check-prefix=ALIGNED-ALLOC // CHECK-LABEL: func @check_strided_memref_arguments( -// CHECK-COUNT-2: !llvm<"float*"> +// CHECK-COUNT-2: !llvm.ptr // CHECK-COUNT-5: !llvm.i64 -// CHECK-COUNT-2: !llvm<"float*"> +// CHECK-COUNT-2: !llvm.ptr // CHECK-COUNT-5: !llvm.i64 -// CHECK-COUNT-2: !llvm<"float*"> +// CHECK-COUNT-2: !llvm.ptr // CHECK-COUNT-5: !llvm.i64 func @check_strided_memref_arguments(%static: memref<10x20xf32, affine_map<(i,j)->(20 * i + j + 1)>>, %dynamic : memref(M * i + j + 1)>>, @@ -15,148 +15,148 @@ func @check_strided_memref_arguments(%static: memref<10x20xf32, affine_map<(i,j) } // CHECK-LABEL: func @check_arguments -// CHECK-COUNT-2: !llvm<"float*"> +// CHECK-COUNT-2: !llvm.ptr // CHECK-COUNT-5: !llvm.i64 -// CHECK-COUNT-2: !llvm<"float*"> +// CHECK-COUNT-2: !llvm.ptr // CHECK-COUNT-5: !llvm.i64 -// CHECK-COUNT-2: !llvm<"float*"> +// CHECK-COUNT-2: !llvm.ptr // CHECK-COUNT-5: !llvm.i64 func @check_arguments(%static: memref<10x20xf32>, %dynamic : memref, %mixed : memref<10x?xf32>) { return } // CHECK-LABEL: func @mixed_alloc( -// CHECK: %[[M:.*]]: !llvm.i64, %[[N:.*]]: !llvm.i64) -> !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> { +// CHECK: %[[M:.*]]: !llvm.i64, %[[N:.*]]: !llvm.i64) -> !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> { func @mixed_alloc(%arg0: index, %arg1: index) -> memref { // CHECK: %[[c42:.*]] = llvm.mlir.constant(42 : index) : !llvm.i64 // CHECK-NEXT: llvm.mul %[[M]], %[[c42]] : !llvm.i64 // CHECK-NEXT: %[[sz:.*]] = llvm.mul %{{.*}}, %[[N]] : !llvm.i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[sz]], %[[sizeof]] : !llvm.i64 // CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (!llvm.i64) -> !llvm<"i8*"> -// CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> -// CHECK-NEXT: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (!llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: llvm.insertvalue %[[off]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[off]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK-NEXT: %[[st2:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[st1:.*]] = llvm.mul %{{.*}}, %[[N]] : !llvm.i64 // CHECK-NEXT: %[[st0:.*]] = llvm.mul %{{.*}}, %[[c42]] : !llvm.i64 -// CHECK-NEXT: llvm.insertvalue %[[M]], %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[st0]], %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[c42]], %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[st1]], %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[3, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[st2]], %{{.*}}[4, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[M]], %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[st0]], %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[c42]], %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[st1]], %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[st2]], %{{.*}}[4, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> %0 = alloc(%arg0, %arg1) : memref -// CHECK-NEXT: llvm.return %{{.*}} : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK-NEXT: llvm.return %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> return %0 : memref } // CHECK-LABEL: func @mixed_dealloc func @mixed_dealloc(%arg0: memref) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK-NEXT: %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm<"float*"> to !llvm<"i8*"> -// CHECK-NEXT: llvm.call @free(%[[ptri8]]) : (!llvm<"i8*">) -> () +// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK-NEXT: %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: llvm.call @free(%[[ptri8]]) : (!llvm.ptr) -> () dealloc %arg0 : memref // CHECK-NEXT: llvm.return return } // CHECK-LABEL: func @dynamic_alloc( -// CHECK: %[[M:.*]]: !llvm.i64, %[[N:.*]]: !llvm.i64) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { +// CHECK: %[[M:.*]]: !llvm.i64, %[[N:.*]]: !llvm.i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { func @dynamic_alloc(%arg0: index, %arg1: index) -> memref { // CHECK: %[[sz:.*]] = llvm.mul %[[M]], %[[N]] : !llvm.i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[sz]], %[[sizeof]] : !llvm.i64 // CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (!llvm.i64) -> !llvm<"i8*"> -// CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> -// CHECK-NEXT: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (!llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: llvm.insertvalue %[[off]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[off]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[st0:.*]] = llvm.mul %{{.*}}, %[[N]] : !llvm.i64 -// CHECK-NEXT: llvm.insertvalue %[[M]], %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[st0]], %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[st1]], %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[M]], %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[st0]], %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[st1]], %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %0 = alloc(%arg0, %arg1) : memref -// CHECK-NEXT: llvm.return %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.return %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> return %0 : memref } // ----- // CHECK-LABEL: func @dynamic_alloca -// CHECK: %[[M:.*]]: !llvm.i64, %[[N:.*]]: !llvm.i64) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { +// CHECK: %[[M:.*]]: !llvm.i64, %[[N:.*]]: !llvm.i64) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { func @dynamic_alloca(%arg0: index, %arg1: index) -> memref { // CHECK: %[[num_elems:.*]] = llvm.mul %[[M]], %[[N]] : !llvm.i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 // CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[sz_bytes]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[sz_bytes]] x !llvm.float : (!llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: llvm.insertvalue %[[off]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[off]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[st0:.*]] = llvm.mul %{{.*}}, %[[N]] : !llvm.i64 -// CHECK-NEXT: llvm.insertvalue %[[M]], %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[st0]], %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[st1]], %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[M]], %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[st0]], %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[N]], %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[st1]], %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %0 = alloca(%arg0, %arg1) : memref // Test with explicitly specified alignment. llvm.alloca takes care of the // alignment. The same pointer is thus used for allocation and aligned // accesses. -// CHECK: %[[alloca_aligned:.*]] = llvm.alloca %{{.*}} x !llvm.float {alignment = 32 : i64} : (!llvm.i64) -> !llvm<"float*"> -// CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[alloca_aligned:.*]] = llvm.alloca %{{.*}} x !llvm.float {alignment = 32 : i64} : (!llvm.i64) -> !llvm.ptr +// CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> alloca(%arg0, %arg1) {alignment = 32} : memref return %0 : memref } // CHECK-LABEL: func @dynamic_dealloc func @dynamic_dealloc(%arg0: memref) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm<"float*"> to !llvm<"i8*"> -// CHECK-NEXT: llvm.call @free(%[[ptri8]]) : (!llvm<"i8*">) -> () +// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: llvm.call @free(%[[ptri8]]) : (!llvm.ptr) -> () dealloc %arg0 : memref return } -// CHECK-LABEL: func @stdlib_aligned_alloc({{.*}}) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { -// ALIGNED-ALLOC-LABEL: func @stdlib_aligned_alloc({{.*}}) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { +// CHECK-LABEL: func @stdlib_aligned_alloc({{.*}}) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { +// ALIGNED-ALLOC-LABEL: func @stdlib_aligned_alloc({{.*}}) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> { // ALIGNED-ALLOC-NEXT: %[[sz1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 // ALIGNED-ALLOC-NEXT: %[[sz2:.*]] = llvm.mlir.constant(18 : index) : !llvm.i64 // ALIGNED-ALLOC-NEXT: %[[num_elems:.*]] = llvm.mul %0, %1 : !llvm.i64 -// ALIGNED-ALLOC-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// ALIGNED-ALLOC-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // ALIGNED-ALLOC-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// ALIGNED-ALLOC-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// ALIGNED-ALLOC-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// ALIGNED-ALLOC-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// ALIGNED-ALLOC-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // ALIGNED-ALLOC-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 // ALIGNED-ALLOC-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // ALIGNED-ALLOC-NEXT: %[[alignment:.*]] = llvm.mlir.constant(32 : i64) : !llvm.i64 -// ALIGNED-ALLOC-NEXT: %[[allocated:.*]] = llvm.call @aligned_alloc(%[[alignment]], %[[bytes]]) : (!llvm.i64, !llvm.i64) -> !llvm<"i8*"> -// ALIGNED-ALLOC-NEXT: llvm.bitcast %[[allocated]] : !llvm<"i8*"> to !llvm<"float*"> +// ALIGNED-ALLOC-NEXT: %[[allocated:.*]] = llvm.call @aligned_alloc(%[[alignment]], %[[bytes]]) : (!llvm.i64, !llvm.i64) -> !llvm.ptr +// ALIGNED-ALLOC-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr %0 = alloc() {alignment = 32} : memref<32x18xf32> // Do another alloc just to test that we have a unique declaration for // aligned_alloc. @@ -190,28 +190,28 @@ func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> { } // CHECK-LABEL: func @mixed_load( -// CHECK-COUNT-2: !llvm<"float*">, +// CHECK-COUNT-2: !llvm.ptr, // CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i64 // CHECK: %[[I:.*]]: !llvm.i64, // CHECK: %[[J:.*]]: !llvm.i64) func @mixed_load(%mixed : memref<42x?xf32>, %i : index, %j : index) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : !llvm.i64 // CHECK-NEXT: %[[off0:.*]] = llvm.add %[[off]], %[[offI]] : !llvm.i64 // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[offJ:.*]] = llvm.mul %[[J]], %[[st1]] : !llvm.i64 // CHECK-NEXT: %[[off1:.*]] = llvm.add %[[off0]], %[[offJ]] : !llvm.i64 -// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: llvm.load %[[addr]] : !llvm<"float*"> +// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.load %[[addr]] : !llvm.ptr %0 = load %mixed[%i, %j] : memref<42x?xf32> return } // CHECK-LABEL: func @dynamic_load( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm<"float*"> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm<"float*"> +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]*]]: !llvm.i64 @@ -220,23 +220,23 @@ func @mixed_load(%mixed : memref<42x?xf32>, %i : index, %j : index) { // CHECK-SAME: %[[I:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[J:[a-zA-Z0-9]*]]: !llvm.i64 func @dynamic_load(%dynamic : memref, %i : index, %j : index) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : !llvm.i64 // CHECK-NEXT: %[[off0:.*]] = llvm.add %[[off]], %[[offI]] : !llvm.i64 // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[offJ:.*]] = llvm.mul %[[J]], %[[st1]] : !llvm.i64 // CHECK-NEXT: %[[off1:.*]] = llvm.add %[[off0]], %[[offJ]] : !llvm.i64 -// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: llvm.load %[[addr]] : !llvm<"float*"> +// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.load %[[addr]] : !llvm.ptr %0 = load %dynamic[%i, %j] : memref return } // CHECK-LABEL: func @prefetch -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm<"float*"> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm<"float*"> +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]*]]: !llvm.i64 @@ -245,36 +245,36 @@ func @dynamic_load(%dynamic : memref, %i : index, %j : index) { // CHECK-SAME: %[[I:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[J:[a-zA-Z0-9]*]]: !llvm.i64 func @prefetch(%A : memref, %i : index, %j : index) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : !llvm.i64 // CHECK-NEXT: %[[off0:.*]] = llvm.add %[[off]], %[[offI]] : !llvm.i64 // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[offJ:.*]] = llvm.mul %[[J]], %[[st1]] : !llvm.i64 // CHECK-NEXT: %[[off1:.*]] = llvm.add %[[off0]], %[[offJ]] : !llvm.i64 -// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> +// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK-NEXT: [[C1:%.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 // CHECK-NEXT: [[C3:%.*]] = llvm.mlir.constant(3 : i32) : !llvm.i32 // CHECK-NEXT: [[C1_1:%.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 -// CHECK-NEXT: "llvm.intr.prefetch"(%[[addr]], [[C1]], [[C3]], [[C1_1]]) : (!llvm<"float*">, !llvm.i32, !llvm.i32, !llvm.i32) -> () +// CHECK-NEXT: "llvm.intr.prefetch"(%[[addr]], [[C1]], [[C3]], [[C1_1]]) : (!llvm.ptr, !llvm.i32, !llvm.i32, !llvm.i32) -> () prefetch %A[%i, %j], write, locality<3>, data : memref // CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: [[C0_1:%.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: [[C1_2:%.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 -// CHECK: "llvm.intr.prefetch"(%{{.*}}, [[C0]], [[C0_1]], [[C1_2]]) : (!llvm<"float*">, !llvm.i32, !llvm.i32, !llvm.i32) -> () +// CHECK: "llvm.intr.prefetch"(%{{.*}}, [[C0]], [[C0_1]], [[C1_2]]) : (!llvm.ptr, !llvm.i32, !llvm.i32, !llvm.i32) -> () prefetch %A[%i, %j], read, locality<0>, data : memref // CHECK: [[C0_2:%.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : i32) : !llvm.i32 // CHECK: [[C0_3:%.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: "llvm.intr.prefetch"(%{{.*}}, [[C0_2]], [[C2]], [[C0_3]]) : (!llvm<"float*">, !llvm.i32, !llvm.i32, !llvm.i32) -> () +// CHECK: "llvm.intr.prefetch"(%{{.*}}, [[C0_2]], [[C2]], [[C0_3]]) : (!llvm.ptr, !llvm.i32, !llvm.i32, !llvm.i32) -> () prefetch %A[%i, %j], read, locality<2>, instr : memref return } // CHECK-LABEL: func @dynamic_store -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm<"float*"> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm<"float*"> +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]*]]: !llvm.i64 @@ -283,23 +283,23 @@ func @prefetch(%A : memref, %i : index, %j : index) { // CHECK-SAME: %[[I:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[J:[a-zA-Z0-9]*]]: !llvm.i64 func @dynamic_store(%dynamic : memref, %i : index, %j : index, %val : f32) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : !llvm.i64 // CHECK-NEXT: %[[off0:.*]] = llvm.add %[[off]], %[[offI]] : !llvm.i64 // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[offJ:.*]] = llvm.mul %[[J]], %[[st1]] : !llvm.i64 // CHECK-NEXT: %[[off1:.*]] = llvm.add %[[off0]], %[[offJ]] : !llvm.i64 -// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm<"float*"> +// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm.ptr store %val, %dynamic[%i, %j] : memref return } // CHECK-LABEL: func @mixed_store -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm<"float*"> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm<"float*"> +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]*]]: !llvm.i64 @@ -308,65 +308,65 @@ func @dynamic_store(%dynamic : memref, %i : index, %j : index, %val : f // CHECK-SAME: %[[I:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[J:[a-zA-Z0-9]*]]: !llvm.i64 func @mixed_store(%mixed : memref<42x?xf32>, %i : index, %j : index, %val : f32) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-NEXT: %[[st0:.*]] = llvm.extractvalue %[[ld]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : !llvm.i64 // CHECK-NEXT: %[[off0:.*]] = llvm.add %[[off]], %[[offI]] : !llvm.i64 // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[offJ:.*]] = llvm.mul %[[J]], %[[st1]] : !llvm.i64 // CHECK-NEXT: %[[off1:.*]] = llvm.add %[[off0]], %[[offJ]] : !llvm.i64 -// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm<"float*"> +// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm.ptr store %val, %mixed[%i, %j] : memref<42x?xf32> return } // CHECK-LABEL: func @memref_cast_static_to_dynamic func @memref_cast_static_to_dynamic(%static : memref<10x42xf32>) { -// CHECK: llvm.bitcast %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> to !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.bitcast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %0 = memref_cast %static : memref<10x42xf32> to memref return } // CHECK-LABEL: func @memref_cast_static_to_mixed func @memref_cast_static_to_mixed(%static : memref<10x42xf32>) { -// CHECK: llvm.bitcast %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> to !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.bitcast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %0 = memref_cast %static : memref<10x42xf32> to memref return } // CHECK-LABEL: func @memref_cast_dynamic_to_static func @memref_cast_dynamic_to_static(%dynamic : memref) { -// CHECK: llvm.bitcast %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> to !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.bitcast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %0 = memref_cast %dynamic : memref to memref<10x12xf32> return } // CHECK-LABEL: func @memref_cast_dynamic_to_mixed func @memref_cast_dynamic_to_mixed(%dynamic : memref) { -// CHECK: llvm.bitcast %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> to !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.bitcast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %0 = memref_cast %dynamic : memref to memref return } // CHECK-LABEL: func @memref_cast_mixed_to_dynamic func @memref_cast_mixed_to_dynamic(%mixed : memref<42x?xf32>) { -// CHECK: llvm.bitcast %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> to !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.bitcast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %0 = memref_cast %mixed : memref<42x?xf32> to memref return } // CHECK-LABEL: func @memref_cast_mixed_to_static func @memref_cast_mixed_to_static(%mixed : memref<42x?xf32>) { -// CHECK: llvm.bitcast %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> to !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.bitcast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %0 = memref_cast %mixed : memref<42x?xf32> to memref<42x1xf32> return } // CHECK-LABEL: func @memref_cast_mixed_to_mixed func @memref_cast_mixed_to_mixed(%mixed : memref<42x?xf32>) { -// CHECK: llvm.bitcast %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> to !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.bitcast %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %0 = memref_cast %mixed : memref<42x?xf32> to memref return } @@ -374,21 +374,21 @@ func @memref_cast_mixed_to_mixed(%mixed : memref<42x?xf32>) { // CHECK-LABEL: func @memref_cast_ranked_to_unranked func @memref_cast_ranked_to_unranked(%arg : memref<42x2x?xf32>) { // CHECK-DAG: %[[c:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-DAG: %[[p:.*]] = llvm.alloca %[[c]] x !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> : (!llvm.i64) -> !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }*"> -// CHECK-DAG: llvm.store %{{.*}}, %[[p]] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }*"> -// CHECK-DAG: %[[p2:.*]] = llvm.bitcast %[[p]] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }*"> to !llvm<"i8*"> +// CHECK-DAG: %[[p:.*]] = llvm.alloca %[[c]] x !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> : (!llvm.i64) -> !llvm.ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>> +// CHECK-DAG: llvm.store %{{.*}}, %[[p]] : !llvm.ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>> +// CHECK-DAG: %[[p2:.*]] = llvm.bitcast %[[p]] : !llvm.ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>> to !llvm.ptr // CHECK-DAG: %[[r:.*]] = llvm.mlir.constant(3 : i64) : !llvm.i64 -// CHECK : llvm.mlir.undef : !llvm<"{ i64, i8* }"> -// CHECK-DAG: llvm.insertvalue %[[r]], %{{.*}}[0] : !llvm<"{ i64, i8* }"> -// CHECK-DAG: llvm.insertvalue %[[p2]], %{{.*}}[1] : !llvm<"{ i64, i8* }"> +// CHECK : llvm.mlir.undef : !llvm.struct<(i64, ptr)> +// CHECK-DAG: llvm.insertvalue %[[r]], %{{.*}}[0] : !llvm.struct<(i64, ptr)> +// CHECK-DAG: llvm.insertvalue %[[p2]], %{{.*}}[1] : !llvm.struct<(i64, ptr)> %0 = memref_cast %arg : memref<42x2x?xf32> to memref<*xf32> return } // CHECK-LABEL: func @memref_cast_unranked_to_ranked func @memref_cast_unranked_to_ranked(%arg : memref<*xf32>) { -// CHECK: %[[p:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ i64, i8* }"> -// CHECK-NEXT: llvm.bitcast %[[p]] : !llvm<"i8*"> to !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }*"> +// CHECK: %[[p:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(i64, ptr)> +// CHECK-NEXT: llvm.bitcast %[[p]] : !llvm.ptr to !llvm.ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>> %0 = memref_cast %arg : memref<*xf32> to memref return } @@ -398,25 +398,25 @@ func @mixed_memref_dim(%mixed : memref<42x?x?x13x?xf32>) { // CHECK: llvm.mlir.constant(42 : index) : !llvm.i64 %c0 = constant 0 : index %0 = dim %mixed, %c0 : memref<42x?x?x13x?xf32> -// CHECK: llvm.extractvalue %[[ld:.*]][3, 1] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.extractvalue %[[ld:.*]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> %c1 = constant 1 : index %1 = dim %mixed, %c1 : memref<42x?x?x13x?xf32> -// CHECK: llvm.extractvalue %[[ld]][3, 2] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.extractvalue %[[ld]][3, 2] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> %c2 = constant 2 : index %2 = dim %mixed, %c2 : memref<42x?x?x13x?xf32> // CHECK: llvm.mlir.constant(13 : index) : !llvm.i64 %c3 = constant 3 : index %3 = dim %mixed, %c3 : memref<42x?x?x13x?xf32> -// CHECK: llvm.extractvalue %[[ld]][3, 4] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.extractvalue %[[ld]][3, 4] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> %c4 = constant 4 : index %4 = dim %mixed, %c4 : memref<42x?x?x13x?xf32> return } // CHECK-LABEL: @memref_dim_with_dyn_index -// CHECK-SAME: %[[ALLOC_PTR:.*]]: !llvm<"float*">, %[[ALIGN_PTR:.*]]: !llvm<"float*">, %[[OFFSET:.*]]: !llvm.i64, %[[SIZE0:.*]]: !llvm.i64, %[[SIZE1:.*]]: !llvm.i64, %[[STRIDE0:.*]]: !llvm.i64, %[[STRIDE1:.*]]: !llvm.i64, %[[IDX:.*]]: !llvm.i64) -> !llvm.i64 +// CHECK-SAME: %[[ALLOC_PTR:.*]]: !llvm.ptr, %[[ALIGN_PTR:.*]]: !llvm.ptr, %[[OFFSET:.*]]: !llvm.i64, %[[SIZE0:.*]]: !llvm.i64, %[[SIZE1:.*]]: !llvm.i64, %[[STRIDE0:.*]]: !llvm.i64, %[[STRIDE1:.*]]: !llvm.i64, %[[IDX:.*]]: !llvm.i64) -> !llvm.i64 func @memref_dim_with_dyn_index(%arg : memref<3x?xf32>, %idx : index) -> index { - // CHECK-NEXT: %[[DESCR0:.*]] = llvm.mlir.undef : [[DESCR_TY:!llvm<"{ float\*, float\*, i64, \[2 x i64\], \[2 x i64\] }">]] + // CHECK-NEXT: %[[DESCR0:.*]] = llvm.mlir.undef : [[DESCR_TY:!llvm.struct<\(ptr, ptr, i64, array<2 x i64>, array<2 x i64>\)>]] // CHECK-NEXT: %[[DESCR1:.*]] = llvm.insertvalue %[[ALLOC_PTR]], %[[DESCR0]][0] : [[DESCR_TY]] // CHECK-NEXT: %[[DESCR2:.*]] = llvm.insertvalue %[[ALIGN_PTR]], %[[DESCR1]][1] : [[DESCR_TY]] // CHECK-NEXT: %[[DESCR3:.*]] = llvm.insertvalue %[[OFFSET]], %[[DESCR2]][2] : [[DESCR_TY]] @@ -427,10 +427,10 @@ func @memref_dim_with_dyn_index(%arg : memref<3x?xf32>, %idx : index) -> index { // CHECK-DAG: %[[C0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-DAG: %[[C1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-DAG: %[[SIZES:.*]] = llvm.extractvalue %[[DESCR7]][3] : [[DESCR_TY]] - // CHECK-DAG: %[[SIZES_PTR:.*]] = llvm.alloca %[[C1]] x !llvm<"[2 x i64]"> : (!llvm.i64) -> !llvm<"[2 x i64]*"> - // CHECK-DAG: llvm.store %[[SIZES]], %[[SIZES_PTR]] : !llvm<"[2 x i64]*"> - // CHECK-DAG: %[[RESULT_PTR:.*]] = llvm.getelementptr %[[SIZES_PTR]][%[[C0]], %[[IDX]]] : (!llvm<"[2 x i64]*">, !llvm.i64, !llvm.i64) -> !llvm<"i64*"> - // CHECK-DAG: %[[RESULT:.*]] = llvm.load %[[RESULT_PTR]] : !llvm<"i64*"> + // CHECK-DAG: %[[SIZES_PTR:.*]] = llvm.alloca %[[C1]] x !llvm.array<2 x i64> : (!llvm.i64) -> !llvm.ptr> + // CHECK-DAG: llvm.store %[[SIZES]], %[[SIZES_PTR]] : !llvm.ptr> + // CHECK-DAG: %[[RESULT_PTR:.*]] = llvm.getelementptr %[[SIZES_PTR]][%[[C0]], %[[IDX]]] : (!llvm.ptr>, !llvm.i64, !llvm.i64) -> !llvm.ptr + // CHECK-DAG: %[[RESULT:.*]] = llvm.load %[[RESULT_PTR]] : !llvm.ptr // CHECK-DAG: llvm.return %[[RESULT]] : !llvm.i64 %result = dim %arg, %idx : memref<3x?xf32> return %result : index diff --git a/mlir/test/Conversion/StandardToLLVM/convert-funcs.mlir b/mlir/test/Conversion/StandardToLLVM/convert-funcs.mlir index 562918b71ac83..d2a88ed952f7c 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-funcs.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-funcs.mlir @@ -1,39 +1,39 @@ // RUN: mlir-opt -convert-std-to-llvm %s | FileCheck %s -//CHECK: llvm.func @second_order_arg(!llvm<"void ()*">) +//CHECK: llvm.func @second_order_arg(!llvm.ptr>) func @second_order_arg(%arg0 : () -> ()) -//CHECK: llvm.func @second_order_result() -> !llvm<"void ()*"> +//CHECK: llvm.func @second_order_result() -> !llvm.ptr> func @second_order_result() -> (() -> ()) -//CHECK: llvm.func @second_order_multi_result() -> !llvm<"{ i32 ()*, i64 ()*, float ()* }"> +//CHECK: llvm.func @second_order_multi_result() -> !llvm.struct<(ptr>, ptr>, ptr>)> func @second_order_multi_result() -> (() -> (i32), () -> (i64), () -> (f32)) -//CHECK: llvm.func @third_order(!llvm<"void ()* (void ()*)*">) -> !llvm<"void ()* (void ()*)*"> +//CHECK: llvm.func @third_order(!llvm.ptr> (ptr>)>>) -> !llvm.ptr> (ptr>)>> func @third_order(%arg0 : (() -> ()) -> (() -> ())) -> ((() -> ()) -> (() -> ())) -//CHECK: llvm.func @fifth_order_left(!llvm<"void (void (void (void ()*)*)*)*">) +//CHECK: llvm.func @fifth_order_left(!llvm.ptr>)>>)>>)>>) func @fifth_order_left(%arg0: (((() -> ()) -> ()) -> ()) -> ()) -//CHECK: llvm.func @fifth_order_right(!llvm<"void ()* ()* ()* ()*">) +//CHECK: llvm.func @fifth_order_right(!llvm.ptr> ()>> ()>> ()>>) func @fifth_order_right(%arg0: () -> (() -> (() -> (() -> ())))) // Check that memrefs are converted to argument packs if appear as function arguments. -// CHECK: llvm.func @memref_call_conv(!llvm<"float*">, !llvm<"float*">, !llvm.i64, !llvm.i64, !llvm.i64) +// CHECK: llvm.func @memref_call_conv(!llvm.ptr, !llvm.ptr, !llvm.i64, !llvm.i64, !llvm.i64) func @memref_call_conv(%arg0: memref) // Same in nested functions. -// CHECK: llvm.func @memref_call_conv_nested(!llvm<"void (float*, float*, i64, i64, i64)*">) +// CHECK: llvm.func @memref_call_conv_nested(!llvm.ptr, ptr, i64, i64, i64)>>) func @memref_call_conv_nested(%arg0: (memref) -> ()) -//CHECK-LABEL: llvm.func @pass_through(%arg0: !llvm<"void ()*">) -> !llvm<"void ()*"> { +//CHECK-LABEL: llvm.func @pass_through(%arg0: !llvm.ptr>) -> !llvm.ptr> { func @pass_through(%arg0: () -> ()) -> (() -> ()) { -// CHECK-NEXT: llvm.br ^bb1(%arg0 : !llvm<"void ()*">) +// CHECK-NEXT: llvm.br ^bb1(%arg0 : !llvm.ptr>) br ^bb1(%arg0 : () -> ()) -//CHECK-NEXT: ^bb1(%0: !llvm<"void ()*">): +//CHECK-NEXT: ^bb1(%0: !llvm.ptr>): ^bb1(%bbarg: () -> ()): -// CHECK-NEXT: llvm.return %0 : !llvm<"void ()*"> +// CHECK-NEXT: llvm.return %0 : !llvm.ptr> return %bbarg : () -> () } @@ -43,7 +43,7 @@ func @body(i32) // CHECK-LABEL: llvm.func @indirect_const_call // CHECK-SAME: (%[[ARG0:.*]]: !llvm.i32) { func @indirect_const_call(%arg0: i32) { -// CHECK-NEXT: %[[ADDR:.*]] = llvm.mlir.addressof @body : !llvm<"void (i32)*"> +// CHECK-NEXT: %[[ADDR:.*]] = llvm.mlir.addressof @body : !llvm.ptr> %0 = constant @body : (i32) -> () // CHECK-NEXT: llvm.call %[[ADDR]](%[[ARG0:.*]]) : (!llvm.i32) -> () call_indirect %0(%arg0) : (i32) -> () @@ -51,7 +51,7 @@ func @indirect_const_call(%arg0: i32) { return } -// CHECK-LABEL: llvm.func @indirect_call(%arg0: !llvm<"i32 (float)*">, %arg1: !llvm.float) -> !llvm.i32 { +// CHECK-LABEL: llvm.func @indirect_call(%arg0: !llvm.ptr>, %arg1: !llvm.float) -> !llvm.i32 { func @indirect_call(%arg0: (f32) -> i32, %arg1: f32) -> i32 { // CHECK-NEXT: %0 = llvm.call %arg0(%arg1) : (!llvm.float) -> !llvm.i32 %0 = call_indirect %arg0(%arg1) : (f32) -> i32 diff --git a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir index dbf8fe6884289..b428d37a36167 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-static-memref-ops.mlir @@ -2,7 +2,7 @@ // RUN: mlir-opt -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -split-input-file %s | FileCheck %s --check-prefix=BAREPTR // BAREPTR-LABEL: func @check_noalias -// BAREPTR-SAME: %{{.*}}: !llvm<"float*"> {llvm.noalias = true}, %{{.*}}: !llvm<"float*"> {llvm.noalias = true} +// BAREPTR-SAME: %{{.*}}: !llvm.ptr {llvm.noalias = true}, %{{.*}}: !llvm.ptr {llvm.noalias = true} func @check_noalias(%static : memref<2xf32> {llvm.noalias = true}, %other : memref<2xf32> {llvm.noalias = true}) { return } @@ -10,93 +10,93 @@ func @check_noalias(%static : memref<2xf32> {llvm.noalias = true}, %other : memr // ----- // CHECK-LABEL: func @check_static_return -// CHECK-COUNT-2: !llvm<"float*"> +// CHECK-COUNT-2: !llvm.ptr // CHECK-COUNT-5: !llvm.i64 -// CHECK-SAME: -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-SAME: -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-LABEL: func @check_static_return -// BAREPTR-SAME: (%[[arg:.*]]: !llvm<"float*">) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { +// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { func @check_static_return(%static : memref<32x18xf32>) -> memref<32x18xf32> { -// CHECK: llvm.return %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.return %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// BAREPTR-NEXT: %[[base:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[base:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins1:.*]] = llvm.insertvalue %[[val1]], %[[ins0]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins1:.*]] = llvm.insertvalue %[[val1]], %[[ins0]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val2:.*]] = llvm.mlir.constant(18 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins2:.*]] = llvm.insertvalue %[[val2]], %[[ins1]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins2:.*]] = llvm.insertvalue %[[val2]], %[[ins1]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val3:.*]] = llvm.mlir.constant(18 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val4:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// BAREPTR-NEXT: llvm.return %[[ins4]] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: llvm.return %[[ins4]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> return %static : memref<32x18xf32> } // ----- // CHECK-LABEL: func @check_static_return_with_offset -// CHECK-COUNT-2: !llvm<"float*"> +// CHECK-COUNT-2: !llvm.ptr // CHECK-COUNT-5: !llvm.i64 -// CHECK-SAME: -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK-SAME: -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-LABEL: func @check_static_return_with_offset -// BAREPTR-SAME: (%[[arg:.*]]: !llvm<"float*">) -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { +// BAREPTR-SAME: (%[[arg:.*]]: !llvm.ptr) -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { func @check_static_return_with_offset(%static : memref<32x18xf32, offset:7, strides:[22,1]>) -> memref<32x18xf32, offset:7, strides:[22,1]> { -// CHECK: llvm.return %{{.*}} : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.return %{{.*}} : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// BAREPTR-NEXT: %[[base:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR: %[[udf:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[base:.*]] = llvm.insertvalue %[[arg]], %[[udf]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[aligned:.*]] = llvm.insertvalue %[[arg]], %[[base]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val0:.*]] = llvm.mlir.constant(7 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins0:.*]] = llvm.insertvalue %[[val0]], %[[aligned]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins1:.*]] = llvm.insertvalue %[[val1]], %[[ins0]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins1:.*]] = llvm.insertvalue %[[val1]], %[[ins0]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val2:.*]] = llvm.mlir.constant(22 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins2:.*]] = llvm.insertvalue %[[val2]], %[[ins1]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins2:.*]] = llvm.insertvalue %[[val2]], %[[ins1]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val3:.*]] = llvm.mlir.constant(18 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins3:.*]] = llvm.insertvalue %[[val3]], %[[ins2]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[val4:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// BAREPTR-NEXT: llvm.return %[[ins4]] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR-NEXT: %[[ins4:.*]] = llvm.insertvalue %[[val4]], %[[ins3]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: llvm.return %[[ins4]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> return %static : memref<32x18xf32, offset:7, strides:[22,1]> } // ----- -// CHECK-LABEL: func @zero_d_alloc() -> !llvm<"{ float*, float*, i64 }"> { -// BAREPTR-LABEL: func @zero_d_alloc() -> !llvm<"{ float*, float*, i64 }"> { +// CHECK-LABEL: func @zero_d_alloc() -> !llvm.struct<(ptr, ptr, i64)> { +// BAREPTR-LABEL: func @zero_d_alloc() -> !llvm.struct<(ptr, ptr, i64)> { func @zero_d_alloc() -> memref { // CHECK-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 // CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: llvm.call @malloc(%{{.*}}) : (!llvm.i64) -> !llvm<"i8*"> -// CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> -// CHECK-NEXT: llvm.mlir.undef : !llvm<"{ float*, float*, i64 }"> -// CHECK-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm<"{ float*, float*, i64 }"> -// CHECK-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[1] : !llvm<"{ float*, float*, i64 }"> +// CHECK-NEXT: llvm.call @malloc(%{{.*}}) : (!llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> +// CHECK-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64)> +// CHECK-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64)> // CHECK-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm<"{ float*, float*, i64 }"> +// CHECK-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64)> // BAREPTR-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// BAREPTR-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // BAREPTR-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // BAREPTR-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 // BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// BAREPTR-NEXT: llvm.call @malloc(%{{.*}}) : (!llvm.i64) -> !llvm<"i8*"> -// BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> -// BAREPTR-NEXT: llvm.mlir.undef : !llvm<"{ float*, float*, i64 }"> -// BAREPTR-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm<"{ float*, float*, i64 }"> -// BAREPTR-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[1] : !llvm<"{ float*, float*, i64 }"> +// BAREPTR-NEXT: llvm.call @malloc(%{{.*}}) : (!llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +// BAREPTR-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> +// BAREPTR-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64)> +// BAREPTR-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64)> // BAREPTR-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// BAREPTR-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm<"{ float*, float*, i64 }"> +// BAREPTR-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64)> %0 = alloc() : memref return %0 : memref } @@ -104,15 +104,15 @@ func @zero_d_alloc() -> memref { // ----- // CHECK-LABEL: func @zero_d_dealloc -// BAREPTR-LABEL: func @zero_d_dealloc(%{{.*}}: !llvm<"float*">) { +// BAREPTR-LABEL: func @zero_d_dealloc(%{{.*}}: !llvm.ptr) { func @zero_d_dealloc(%arg0: memref) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64 }"> -// CHECK-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm<"float*"> to !llvm<"i8*"> -// CHECK-NEXT: llvm.call @free(%[[bc]]) : (!llvm<"i8*">) -> () +// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64)> +// CHECK-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: llvm.call @free(%[[bc]]) : (!llvm.ptr) -> () -// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64 }"> -// BAREPTR-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm<"float*"> to !llvm<"i8*"> -// BAREPTR-NEXT: llvm.call @free(%[[bc]]) : (!llvm<"i8*">) -> () +// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64)> +// BAREPTR-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr to !llvm.ptr +// BAREPTR-NEXT: llvm.call @free(%[[bc]]) : (!llvm.ptr) -> () dealloc %arg0 : memref return } @@ -123,111 +123,111 @@ func @zero_d_dealloc(%arg0: memref) { // BAREPTR-LABEL: func @aligned_1d_alloc( func @aligned_1d_alloc() -> memref<42xf32> { // CHECK: llvm.mlir.constant(42 : index) : !llvm.i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 // CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 // CHECK-NEXT: %[[alignmentMinus1:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 // CHECK-NEXT: %[[allocsize:.*]] = llvm.sub %[[alignmentMinus1]], %[[one_1]] : !llvm.i64 -// CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm<"i8*"> -// CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> -// CHECK-NEXT: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> -// CHECK-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[allocated]] : !llvm<"i8*"> to !llvm.i64 +// CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[allocated]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[alignAdj1:.*]] = llvm.urem %[[allocatedAsInt]], %[[alignment]] : !llvm.i64 // CHECK-NEXT: %[[alignAdj2:.*]] = llvm.sub %[[alignment]], %[[alignAdj1]] : !llvm.i64 // CHECK-NEXT: %[[alignAdj3:.*]] = llvm.urem %[[alignAdj2]], %[[alignment]] : !llvm.i64 -// CHECK-NEXT: %[[aligned:.*]] = llvm.getelementptr %[[allocated]][%[[alignAdj3]]] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*"> -// CHECK-NEXT: %[[alignedBitCast:.*]] = llvm.bitcast %[[aligned]] : !llvm<"i8*"> to !llvm<"float*"> -// CHECK-NEXT: llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> +// CHECK-NEXT: %[[aligned:.*]] = llvm.getelementptr %[[allocated]][%[[alignAdj3]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[alignedBitCast:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // BAREPTR-NEXT: llvm.mlir.constant(42 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// BAREPTR-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // BAREPTR-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // BAREPTR-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64 // BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64 // BAREPTR-NEXT: %[[alignmentMinus1:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64 // BAREPTR-NEXT: %[[allocsize:.*]] = llvm.sub %[[alignmentMinus1]], %[[one_1]] : !llvm.i64 -// BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm<"i8*"> -// BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> -// BAREPTR-NEXT: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> -// BAREPTR-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> -// BAREPTR-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[allocated]] : !llvm<"i8*"> to !llvm.i64 +// BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr +// BAREPTR-NEXT: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// BAREPTR-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// BAREPTR-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[allocated]] : !llvm.ptr to !llvm.i64 // BAREPTR-NEXT: %[[alignAdj1:.*]] = llvm.urem %[[allocatedAsInt]], %[[alignment]] : !llvm.i64 // BAREPTR-NEXT: %[[alignAdj2:.*]] = llvm.sub %[[alignment]], %[[alignAdj1]] : !llvm.i64 // BAREPTR-NEXT: %[[alignAdj3:.*]] = llvm.urem %[[alignAdj2]], %[[alignment]] : !llvm.i64 -// BAREPTR-NEXT: %[[aligned:.*]] = llvm.getelementptr %[[allocated]][%[[alignAdj3]]] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*"> -// BAREPTR-NEXT: %[[alignedBitCast:.*]] = llvm.bitcast %[[aligned]] : !llvm<"i8*"> to !llvm<"float*"> -// BAREPTR-NEXT: llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> +// BAREPTR-NEXT: %[[aligned:.*]] = llvm.getelementptr %[[allocated]][%[[alignAdj3]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: %[[alignedBitCast:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr +// BAREPTR-NEXT: llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // BAREPTR-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// BAREPTR-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> +// BAREPTR-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %0 = alloc() {alignment = 8} : memref<42xf32> return %0 : memref<42xf32> } // ----- -// CHECK-LABEL: func @static_alloc() -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { -// BAREPTR-LABEL: func @static_alloc() -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { +// CHECK-LABEL: func @static_alloc() -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { +// BAREPTR-LABEL: func @static_alloc() -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { func @static_alloc() -> memref<32x18xf32> { // CHECK: %[[sz1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 // CHECK-NEXT: %[[sz2:.*]] = llvm.mlir.constant(18 : index) : !llvm.i64 // CHECK-NEXT: %[[num_elems:.*]] = llvm.mul %0, %1 : !llvm.i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 // CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[bytes]]) : (!llvm.i64) -> !llvm<"i8*"> -// CHECK-NEXT: llvm.bitcast %[[allocated]] : !llvm<"i8*"> to !llvm<"float*"> +// CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[bytes]]) : (!llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr // BAREPTR-NEXT: %[[sz1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 // BAREPTR-NEXT: %[[sz2:.*]] = llvm.mlir.constant(18 : index) : !llvm.i64 // BAREPTR-NEXT: %[[num_elems:.*]] = llvm.mul %[[sz1]], %[[sz2]] : !llvm.i64 -// BAREPTR-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// BAREPTR-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // BAREPTR-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // BAREPTR-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 // BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[bytes]]) : (!llvm.i64) -> !llvm<"i8*"> -// BAREPTR-NEXT: llvm.bitcast %[[allocated]] : !llvm<"i8*"> to !llvm<"float*"> +// BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[bytes]]) : (!llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr %0 = alloc() : memref<32x18xf32> return %0 : memref<32x18xf32> } // ----- -// CHECK-LABEL: func @static_alloca() -> !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> { +// CHECK-LABEL: func @static_alloca() -> !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> { func @static_alloca() -> memref<32x18xf32> { // CHECK-NEXT: %[[sz1:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64 // CHECK-NEXT: %[[sz2:.*]] = llvm.mlir.constant(18 : index) : !llvm.i64 // CHECK-NEXT: %[[num_elems:.*]] = llvm.mul %0, %1 : !llvm.i64 -// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK-NEXT: %[[null:.*]] = llvm.mlir.null : !llvm.ptr // CHECK-NEXT: %[[one:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm<"float*"> to !llvm.i64 +// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64 // CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[bytes]] x !llvm.float : (!llvm.i64) -> !llvm<"float*"> +// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[bytes]] x !llvm.float : (!llvm.i64) -> !llvm.ptr %0 = alloca() : memref<32x18xf32> // Test with explicitly specified alignment. llvm.alloca takes care of the // alignment. The same pointer is thus used for allocation and aligned // accesses. - // CHECK: %[[alloca_aligned:.*]] = llvm.alloca %{{.*}} x !llvm.float {alignment = 32 : i64} : (!llvm.i64) -> !llvm<"float*"> - // CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[alloca_aligned:.*]] = llvm.alloca %{{.*}} x !llvm.float {alignment = 32 : i64} : (!llvm.i64) -> !llvm.ptr + // CHECK: %[[desc:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[desc1:.*]] = llvm.insertvalue %[[alloca_aligned]], %[[desc]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.insertvalue %[[alloca_aligned]], %[[desc1]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> alloca() {alignment = 32} : memref<32x18xf32> return %0 : memref<32x18xf32> } @@ -235,15 +235,15 @@ func @static_alloca() -> memref<32x18xf32> { // ----- // CHECK-LABEL: func @static_dealloc -// BAREPTR-LABEL: func @static_dealloc(%{{.*}}: !llvm<"float*">) { +// BAREPTR-LABEL: func @static_dealloc(%{{.*}}: !llvm.ptr) { func @static_dealloc(%static: memref<10x8xf32>) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm<"float*"> to !llvm<"i8*"> -// CHECK-NEXT: llvm.call @free(%[[bc]]) : (!llvm<"i8*">) -> () +// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: llvm.call @free(%[[bc]]) : (!llvm.ptr) -> () -// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// BAREPTR-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm<"float*"> to !llvm<"i8*"> -// BAREPTR-NEXT: llvm.call @free(%[[bc]]) : (!llvm<"i8*">) -> () +// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// BAREPTR-NEXT: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr to !llvm.ptr +// BAREPTR-NEXT: llvm.call @free(%[[bc]]) : (!llvm.ptr) -> () dealloc %static : memref<10x8xf32> return } @@ -251,17 +251,17 @@ func @static_dealloc(%static: memref<10x8xf32>) { // ----- // CHECK-LABEL: func @zero_d_load -// BAREPTR-LABEL: func @zero_d_load(%{{.*}}: !llvm<"float*">) -> !llvm.float +// BAREPTR-LABEL: func @zero_d_load(%{{.*}}: !llvm.ptr) -> !llvm.float func @zero_d_load(%arg0: memref) -> f32 { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64 }"> +// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64)> // CHECK-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[c0]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: %{{.*}} = llvm.load %[[addr]] : !llvm<"float*"> +// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[c0]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: %{{.*}} = llvm.load %[[addr]] : !llvm.ptr -// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64 }"> +// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64)> // BAREPTR-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[c0]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// BAREPTR-NEXT: llvm.load %[[addr:.*]] : !llvm<"float*"> +// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[c0]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: llvm.load %[[addr:.*]] : !llvm.ptr %0 = load %arg0[] : memref return %0 : f32 } @@ -269,14 +269,14 @@ func @zero_d_load(%arg0: memref) -> f32 { // ----- // CHECK-LABEL: func @static_load( -// CHECK-COUNT-2: !llvm<"float*">, +// CHECK-COUNT-2: !llvm.ptr, // CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i64 // CHECK: %[[I:.*]]: !llvm.i64, // CHECK: %[[J:.*]]: !llvm.i64) // BAREPTR-LABEL: func @static_load -// BAREPTR-SAME: (%[[A:.*]]: !llvm<"float*">, %[[I:.*]]: !llvm.i64, %[[J:.*]]: !llvm.i64) { +// BAREPTR-SAME: (%[[A:.*]]: !llvm.ptr, %[[I:.*]]: !llvm.i64, %[[J:.*]]: !llvm.i64) { func @static_load(%static : memref<10x42xf32>, %i : index, %j : index) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: %[[st0:.*]] = llvm.mlir.constant(42 : index) : !llvm.i64 // CHECK-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : !llvm.i64 @@ -284,10 +284,10 @@ func @static_load(%static : memref<10x42xf32>, %i : index, %j : index) { // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[offJ:.*]] = llvm.mul %[[J]], %[[st1]] : !llvm.i64 // CHECK-NEXT: %[[off1:.*]] = llvm.add %[[off0]], %[[offJ]] : !llvm.i64 -// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: llvm.load %[[addr]] : !llvm<"float*"> +// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.load %[[addr]] : !llvm.ptr -// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // BAREPTR-NEXT: %[[st0:.*]] = llvm.mlir.constant(42 : index) : !llvm.i64 // BAREPTR-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : !llvm.i64 @@ -295,8 +295,8 @@ func @static_load(%static : memref<10x42xf32>, %i : index, %j : index) { // BAREPTR-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: %[[offJ:.*]] = llvm.mul %[[J]], %[[st1]] : !llvm.i64 // BAREPTR-NEXT: %[[off1:.*]] = llvm.add %[[off0]], %[[offJ]] : !llvm.i64 -// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// BAREPTR-NEXT: llvm.load %[[addr]] : !llvm<"float*"> +// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: llvm.load %[[addr]] : !llvm.ptr %0 = load %static[%i, %j] : memref<10x42xf32> return } @@ -305,17 +305,17 @@ func @static_load(%static : memref<10x42xf32>, %i : index, %j : index) { // CHECK-LABEL: func @zero_d_store // BAREPTR-LABEL: func @zero_d_store -// BAREPTR-SAME: (%[[A:.*]]: !llvm<"float*">, %[[val:.*]]: !llvm.float) +// BAREPTR-SAME: (%[[A:.*]]: !llvm.ptr, %[[val:.*]]: !llvm.float) func @zero_d_store(%arg0: memref, %arg1: f32) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm<"{ float*, float*, i64 }"> +// CHECK: %[[ptr:.*]] = llvm.extractvalue %[[ld:.*]][1] : !llvm.struct<(ptr, ptr, i64)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm<"float*"> +// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm.ptr -// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64 }"> +// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64)> // BAREPTR-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// BAREPTR-NEXT: llvm.store %[[val]], %[[addr]] : !llvm<"float*"> +// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: llvm.store %[[val]], %[[addr]] : !llvm.ptr store %arg1, %arg0[] : memref return } @@ -323,8 +323,8 @@ func @zero_d_store(%arg0: memref, %arg1: f32) { // ----- // CHECK-LABEL: func @static_store -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm<"float*"> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm<"float*"> +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]*]]: !llvm.i64 @@ -333,11 +333,11 @@ func @zero_d_store(%arg0: memref, %arg1: f32) { // CHECK-SAME: %[[I:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[J:[a-zA-Z0-9]*]]: !llvm.i64 // BAREPTR-LABEL: func @static_store -// BAREPTR-SAME: %[[A:.*]]: !llvm<"float*"> +// BAREPTR-SAME: %[[A:.*]]: !llvm.ptr // BAREPTR-SAME: %[[I:[a-zA-Z0-9]*]]: !llvm.i64 // BAREPTR-SAME: %[[J:[a-zA-Z0-9]*]]: !llvm.i64 func @static_store(%static : memref<10x42xf32>, %i : index, %j : index, %val : f32) { -// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: %[[st0:.*]] = llvm.mlir.constant(42 : index) : !llvm.i64 // CHECK-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : !llvm.i64 @@ -345,10 +345,10 @@ func @static_store(%static : memref<10x42xf32>, %i : index, %j : index, %val : f // CHECK-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %[[offJ:.*]] = llvm.mul %[[J]], %[[st1]] : !llvm.i64 // CHECK-NEXT: %[[off1:.*]] = llvm.add %[[off0]], %[[offJ]] : !llvm.i64 -// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm<"float*"> +// CHECK-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// CHECK-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm.ptr -// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// BAREPTR: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // BAREPTR-NEXT: %[[off:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // BAREPTR-NEXT: %[[st0:.*]] = llvm.mlir.constant(42 : index) : !llvm.i64 // BAREPTR-NEXT: %[[offI:.*]] = llvm.mul %[[I]], %[[st0]] : !llvm.i64 @@ -356,8 +356,8 @@ func @static_store(%static : memref<10x42xf32>, %i : index, %j : index, %val : f // BAREPTR-NEXT: %[[st1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // BAREPTR-NEXT: %[[offJ:.*]] = llvm.mul %[[J]], %[[st1]] : !llvm.i64 // BAREPTR-NEXT: %[[off1:.*]] = llvm.add %[[off0]], %[[offJ]] : !llvm.i64 -// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// BAREPTR-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm<"float*"> +// BAREPTR-NEXT: %[[addr:.*]] = llvm.getelementptr %[[ptr]][%[[off1]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr +// BAREPTR-NEXT: llvm.store %{{.*}}, %[[addr]] : !llvm.ptr store %val, %static[%i, %j] : memref<10x42xf32> return } @@ -365,10 +365,10 @@ func @static_store(%static : memref<10x42xf32>, %i : index, %j : index, %val : f // ----- // CHECK-LABEL: func @static_memref_dim -// BAREPTR-LABEL: func @static_memref_dim(%{{.*}}: !llvm<"float*">) { +// BAREPTR-LABEL: func @static_memref_dim(%{{.*}}: !llvm.ptr) { func @static_memref_dim(%static : memref<42x32x15x13x27xf32>) { // CHECK: llvm.mlir.constant(42 : index) : !llvm.i64 -// BAREPTR: llvm.insertvalue %{{.*}}, %{{.*}}[4, 4] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// BAREPTR: llvm.insertvalue %{{.*}}, %{{.*}}[4, 4] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // BAREPTR: llvm.mlir.constant(42 : index) : !llvm.i64 %c0 = constant 0 : index %0 = dim %static, %c0 : memref<42x32x15x13x27xf32> diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir index c1ec558da86f1..d0e883a10bee3 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir @@ -68,11 +68,11 @@ func @simple_loop() { // CHECK-LABEL: llvm.func @complex_numbers() // CHECK-NEXT: %[[REAL0:.*]] = llvm.mlir.constant(1.200000e+00 : f32) : !llvm.float // CHECK-NEXT: %[[IMAG0:.*]] = llvm.mlir.constant(3.400000e+00 : f32) : !llvm.float -// CHECK-NEXT: %[[CPLX0:.*]] = llvm.mlir.undef : !llvm<"{ float, float }"> -// CHECK-NEXT: %[[CPLX1:.*]] = llvm.insertvalue %[[REAL0]], %[[CPLX0]][0] : !llvm<"{ float, float }"> -// CHECK-NEXT: %[[CPLX2:.*]] = llvm.insertvalue %[[IMAG0]], %[[CPLX1]][1] : !llvm<"{ float, float }"> -// CHECK-NEXT: %[[REAL1:.*]] = llvm.extractvalue %[[CPLX2:.*]][0] : !llvm<"{ float, float }"> -// CHECK-NEXT: %[[IMAG1:.*]] = llvm.extractvalue %[[CPLX2:.*]][1] : !llvm<"{ float, float }"> +// CHECK-NEXT: %[[CPLX0:.*]] = llvm.mlir.undef : !llvm.struct<(float, float)> +// CHECK-NEXT: %[[CPLX1:.*]] = llvm.insertvalue %[[REAL0]], %[[CPLX0]][0] : !llvm.struct<(float, float)> +// CHECK-NEXT: %[[CPLX2:.*]] = llvm.insertvalue %[[IMAG0]], %[[CPLX1]][1] : !llvm.struct<(float, float)> +// CHECK-NEXT: %[[REAL1:.*]] = llvm.extractvalue %[[CPLX2:.*]][0] : !llvm.struct<(float, float)> +// CHECK-NEXT: %[[IMAG1:.*]] = llvm.extractvalue %[[CPLX2:.*]][1] : !llvm.struct<(float, float)> // CHECK-NEXT: llvm.return func @complex_numbers() { %real0 = constant 1.2 : f32 @@ -84,15 +84,15 @@ func @complex_numbers() { } // CHECK-LABEL: llvm.func @complex_addition() -// CHECK-DAG: %[[A_REAL:.*]] = llvm.extractvalue %[[A:.*]][0] : !llvm<"{ double, double }"> -// CHECK-DAG: %[[B_REAL:.*]] = llvm.extractvalue %[[B:.*]][0] : !llvm<"{ double, double }"> -// CHECK-DAG: %[[A_IMAG:.*]] = llvm.extractvalue %[[A]][1] : !llvm<"{ double, double }"> -// CHECK-DAG: %[[B_IMAG:.*]] = llvm.extractvalue %[[B]][1] : !llvm<"{ double, double }"> -// CHECK: %[[C0:.*]] = llvm.mlir.undef : !llvm<"{ double, double }"> +// CHECK-DAG: %[[A_REAL:.*]] = llvm.extractvalue %[[A:.*]][0] : !llvm.struct<(double, double)> +// CHECK-DAG: %[[B_REAL:.*]] = llvm.extractvalue %[[B:.*]][0] : !llvm.struct<(double, double)> +// CHECK-DAG: %[[A_IMAG:.*]] = llvm.extractvalue %[[A]][1] : !llvm.struct<(double, double)> +// CHECK-DAG: %[[B_IMAG:.*]] = llvm.extractvalue %[[B]][1] : !llvm.struct<(double, double)> +// CHECK: %[[C0:.*]] = llvm.mlir.undef : !llvm.struct<(double, double)> // CHECK-DAG: %[[C_REAL:.*]] = llvm.fadd %[[A_REAL]], %[[B_REAL]] : !llvm.double // CHECK-DAG: %[[C_IMAG:.*]] = llvm.fadd %[[A_IMAG]], %[[B_IMAG]] : !llvm.double -// CHECK: %[[C1:.*]] = llvm.insertvalue %[[C_REAL]], %[[C0]][0] : !llvm<"{ double, double }"> -// CHECK: %[[C2:.*]] = llvm.insertvalue %[[C_IMAG]], %[[C1]][1] : !llvm<"{ double, double }"> +// CHECK: %[[C1:.*]] = llvm.insertvalue %[[C_REAL]], %[[C0]][0] : !llvm.struct<(double, double)> +// CHECK: %[[C2:.*]] = llvm.insertvalue %[[C_IMAG]], %[[C1]][1] : !llvm.struct<(double, double)> func @complex_addition() { %a_re = constant 1.2 : f64 %a_im = constant 3.4 : f64 @@ -105,15 +105,15 @@ func @complex_addition() { } // CHECK-LABEL: llvm.func @complex_substraction() -// CHECK-DAG: %[[A_REAL:.*]] = llvm.extractvalue %[[A:.*]][0] : !llvm<"{ double, double }"> -// CHECK-DAG: %[[B_REAL:.*]] = llvm.extractvalue %[[B:.*]][0] : !llvm<"{ double, double }"> -// CHECK-DAG: %[[A_IMAG:.*]] = llvm.extractvalue %[[A]][1] : !llvm<"{ double, double }"> -// CHECK-DAG: %[[B_IMAG:.*]] = llvm.extractvalue %[[B]][1] : !llvm<"{ double, double }"> -// CHECK: %[[C0:.*]] = llvm.mlir.undef : !llvm<"{ double, double }"> +// CHECK-DAG: %[[A_REAL:.*]] = llvm.extractvalue %[[A:.*]][0] : !llvm.struct<(double, double)> +// CHECK-DAG: %[[B_REAL:.*]] = llvm.extractvalue %[[B:.*]][0] : !llvm.struct<(double, double)> +// CHECK-DAG: %[[A_IMAG:.*]] = llvm.extractvalue %[[A]][1] : !llvm.struct<(double, double)> +// CHECK-DAG: %[[B_IMAG:.*]] = llvm.extractvalue %[[B]][1] : !llvm.struct<(double, double)> +// CHECK: %[[C0:.*]] = llvm.mlir.undef : !llvm.struct<(double, double)> // CHECK-DAG: %[[C_REAL:.*]] = llvm.fsub %[[A_REAL]], %[[B_REAL]] : !llvm.double // CHECK-DAG: %[[C_IMAG:.*]] = llvm.fsub %[[A_IMAG]], %[[B_IMAG]] : !llvm.double -// CHECK: %[[C1:.*]] = llvm.insertvalue %[[C_REAL]], %[[C0]][0] : !llvm<"{ double, double }"> -// CHECK: %[[C2:.*]] = llvm.insertvalue %[[C_IMAG]], %[[C1]][1] : !llvm<"{ double, double }"> +// CHECK: %[[C1:.*]] = llvm.insertvalue %[[C_REAL]], %[[C0]][0] : !llvm.struct<(double, double)> +// CHECK: %[[C2:.*]] = llvm.insertvalue %[[C_IMAG]], %[[C1]][1] : !llvm.struct<(double, double)> func @complex_substraction() { %a_re = constant 1.2 : f64 %a_im = constant 3.4 : f64 @@ -427,39 +427,39 @@ func @more_imperfectly_nested_loops() { func @get_i64() -> (i64) // CHECK-LABEL: func @get_f32() -> !llvm.float func @get_f32() -> (f32) -// CHECK-LABEL: func @get_c16() -> !llvm<"{ half, half }"> +// CHECK-LABEL: func @get_c16() -> !llvm.struct<(half, half)> func @get_c16() -> (complex) -// CHECK-LABEL: func @get_c32() -> !llvm<"{ float, float }"> +// CHECK-LABEL: func @get_c32() -> !llvm.struct<(float, float)> func @get_c32() -> (complex) -// CHECK-LABEL: func @get_c64() -> !llvm<"{ double, double }"> +// CHECK-LABEL: func @get_c64() -> !llvm.struct<(double, double)> func @get_c64() -> (complex) -// CHECK-LABEL: func @get_memref() -> !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }"> -// CHECK32-LABEL: func @get_memref() -> !llvm<"{ float*, float*, i32, [4 x i32], [4 x i32] }"> +// CHECK-LABEL: func @get_memref() -> !llvm.struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)> +// CHECK32-LABEL: func @get_memref() -> !llvm.struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)> func @get_memref() -> (memref<42x?x10x?xf32>) -// CHECK-LABEL: func @multireturn() -> !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> { -// CHECK32-LABEL: func @multireturn() -> !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> { +// CHECK-LABEL: func @multireturn() -> !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> { +// CHECK32-LABEL: func @multireturn() -> !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> { func @multireturn() -> (i64, f32, memref<42x?x10x?xf32>) { ^bb0: // CHECK-NEXT: {{.*}} = llvm.call @get_i64() : () -> !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.call @get_f32() : () -> !llvm.float -// CHECK-NEXT: {{.*}} = llvm.call @get_memref() : () -> !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }"> +// CHECK-NEXT: {{.*}} = llvm.call @get_memref() : () -> !llvm.struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)> // CHECK32-NEXT: {{.*}} = llvm.call @get_i64() : () -> !llvm.i64 // CHECK32-NEXT: {{.*}} = llvm.call @get_f32() : () -> !llvm.float -// CHECK32-NEXT: {{.*}} = llvm.call @get_memref() : () -> !llvm<"{ float*, float*, i32, [4 x i32], [4 x i32] }"> +// CHECK32-NEXT: {{.*}} = llvm.call @get_memref() : () -> !llvm.struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)> %0 = call @get_i64() : () -> (i64) %1 = call @get_f32() : () -> (f32) %2 = call @get_memref() : () -> (memref<42x?x10x?xf32>) -// CHECK-NEXT: {{.*}} = llvm.mlir.undef : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> -// CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> -// CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> -// CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> -// CHECK-NEXT: llvm.return {{.*}} : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> -// CHECK32-NEXT: {{.*}} = llvm.mlir.undef : !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> -// CHECK32-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> -// CHECK32-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> -// CHECK32-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> -// CHECK32-NEXT: llvm.return {{.*}} : !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> +// CHECK-NEXT: {{.*}} = llvm.mlir.undef : !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> +// CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> +// CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> +// CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> +// CHECK-NEXT: llvm.return {{.*}} : !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> +// CHECK32-NEXT: {{.*}} = llvm.mlir.undef : !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> +// CHECK32-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> +// CHECK32-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> +// CHECK32-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> +// CHECK32-NEXT: llvm.return {{.*}} : !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> return %0, %1, %2 : i64, f32, memref<42x?x10x?xf32> } @@ -468,14 +468,14 @@ func @multireturn() -> (i64, f32, memref<42x?x10x?xf32>) { // CHECK32-LABEL: func @multireturn_caller() { func @multireturn_caller() { ^bb0: -// CHECK-NEXT: {{.*}} = llvm.call @multireturn() : () -> !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> -// CHECK-NEXT: {{.*}} = llvm.extractvalue {{.*}}[0] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> -// CHECK-NEXT: {{.*}} = llvm.extractvalue {{.*}}[1] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> -// CHECK-NEXT: {{.*}} = llvm.extractvalue {{.*}}[2] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> -// CHECK32-NEXT: {{.*}} = llvm.call @multireturn() : () -> !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> -// CHECK32-NEXT: {{.*}} = llvm.extractvalue {{.*}}[0] : !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> -// CHECK32-NEXT: {{.*}} = llvm.extractvalue {{.*}}[1] : !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> -// CHECK32-NEXT: {{.*}} = llvm.extractvalue {{.*}}[2] : !llvm<"{ i64, float, { float*, float*, i32, [4 x i32], [4 x i32] } }"> +// CHECK-NEXT: {{.*}} = llvm.call @multireturn() : () -> !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> +// CHECK-NEXT: {{.*}} = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> +// CHECK-NEXT: {{.*}} = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> +// CHECK-NEXT: {{.*}} = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i64, float, struct<(ptr, ptr, i64, array<4 x i64>, array<4 x i64>)>)> +// CHECK32-NEXT: {{.*}} = llvm.call @multireturn() : () -> !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> +// CHECK32-NEXT: {{.*}} = llvm.extractvalue {{.*}}[0] : !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> +// CHECK32-NEXT: {{.*}} = llvm.extractvalue {{.*}}[1] : !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> +// CHECK32-NEXT: {{.*}} = llvm.extractvalue {{.*}}[2] : !llvm.struct<(i64, float, struct<(ptr, ptr, i32, array<4 x i32>, array<4 x i32>)>)> %0:3 = call @multireturn() : () -> (i64, f32, memref<42x?x10x?xf32>) %1 = constant 42 : i64 // CHECK: {{.*}} = llvm.add {{.*}}, {{.*}} : !llvm.i64 @@ -487,35 +487,35 @@ func @multireturn_caller() { return } -// CHECK-LABEL: func @vector_ops(%arg0: !llvm<"<4 x float>">, %arg1: !llvm<"<4 x i1>">, %arg2: !llvm<"<4 x i64>">, %arg3: !llvm<"<4 x i64>">) -> !llvm<"<4 x float>"> { +// CHECK-LABEL: func @vector_ops(%arg0: !llvm.vec<4 x float>, %arg1: !llvm.vec<4 x i1>, %arg2: !llvm.vec<4 x i64>, %arg3: !llvm.vec<4 x i64>) -> !llvm.vec<4 x float> { func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> { -// CHECK-NEXT: %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : !llvm<"<4 x float>"> +// CHECK-NEXT: %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : !llvm.vec<4 x float> %0 = constant dense<42.> : vector<4xf32> -// CHECK-NEXT: %1 = llvm.fadd %arg0, %0 : !llvm<"<4 x float>"> +// CHECK-NEXT: %1 = llvm.fadd %arg0, %0 : !llvm.vec<4 x float> %1 = addf %arg0, %0 : vector<4xf32> -// CHECK-NEXT: %2 = llvm.sdiv %arg2, %arg2 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %2 = llvm.sdiv %arg2, %arg2 : !llvm.vec<4 x i64> %3 = divi_signed %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %3 = llvm.udiv %arg2, %arg2 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %3 = llvm.udiv %arg2, %arg2 : !llvm.vec<4 x i64> %4 = divi_unsigned %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %4 = llvm.srem %arg2, %arg2 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %4 = llvm.srem %arg2, %arg2 : !llvm.vec<4 x i64> %5 = remi_signed %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %5 = llvm.urem %arg2, %arg2 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %5 = llvm.urem %arg2, %arg2 : !llvm.vec<4 x i64> %6 = remi_unsigned %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %6 = llvm.fdiv %arg0, %0 : !llvm<"<4 x float>"> +// CHECK-NEXT: %6 = llvm.fdiv %arg0, %0 : !llvm.vec<4 x float> %7 = divf %arg0, %0 : vector<4xf32> -// CHECK-NEXT: %7 = llvm.frem %arg0, %0 : !llvm<"<4 x float>"> +// CHECK-NEXT: %7 = llvm.frem %arg0, %0 : !llvm.vec<4 x float> %8 = remf %arg0, %0 : vector<4xf32> -// CHECK-NEXT: %8 = llvm.and %arg2, %arg3 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %8 = llvm.and %arg2, %arg3 : !llvm.vec<4 x i64> %9 = and %arg2, %arg3 : vector<4xi64> -// CHECK-NEXT: %9 = llvm.or %arg2, %arg3 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %9 = llvm.or %arg2, %arg3 : !llvm.vec<4 x i64> %10 = or %arg2, %arg3 : vector<4xi64> -// CHECK-NEXT: %10 = llvm.xor %arg2, %arg3 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %10 = llvm.xor %arg2, %arg3 : !llvm.vec<4 x i64> %11 = xor %arg2, %arg3 : vector<4xi64> -// CHECK-NEXT: %11 = llvm.shl %arg2, %arg2 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %11 = llvm.shl %arg2, %arg2 : !llvm.vec<4 x i64> %12 = shift_left %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %12 = llvm.ashr %arg2, %arg2 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %12 = llvm.ashr %arg2, %arg2 : !llvm.vec<4 x i64> %13 = shift_right_signed %arg2, %arg2 : vector<4xi64> -// CHECK-NEXT: %13 = llvm.lshr %arg2, %arg2 : !llvm<"<4 x i64>"> +// CHECK-NEXT: %13 = llvm.lshr %arg2, %arg2 : !llvm.vec<4 x i64> %14 = shift_right_unsigned %arg2, %arg2 : vector<4xi64> return %1 : vector<4xf32> } @@ -583,13 +583,13 @@ func @index_cast(%arg0: index, %arg1: i1) { // Checking conversion of integer types to floating point. // CHECK-LABEL: @sitofp func @sitofp(%arg0 : i32, %arg1 : i64) { -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i{{.*}} to !llvm.float +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i32 to !llvm.float %0 = sitofp %arg0: i32 to f32 -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i{{.*}} to !llvm.double +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i32 to !llvm.double %1 = sitofp %arg0: i32 to f64 -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i{{.*}} to !llvm.float +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i64 to !llvm.float %2 = sitofp %arg1: i64 to f32 -// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i{{.*}} to !llvm.double +// CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i64 to !llvm.double %3 = sitofp %arg1: i64 to f64 return } @@ -609,11 +609,11 @@ func @fpext(%arg0 : f16, %arg1 : f32) { // Checking conversion of integer types to floating point. // CHECK-LABEL: @fpext func @fpext_vector(%arg0 : vector<2xf16>, %arg1 : vector<2xf32>) { -// CHECK-NEXT: = llvm.fpext {{.*}} : !llvm<"<2 x half>"> to !llvm<"<2 x float>"> +// CHECK-NEXT: = llvm.fpext {{.*}} : !llvm.vec<2 x half> to !llvm.vec<2 x float> %0 = fpext %arg0: vector<2xf16> to vector<2xf32> -// CHECK-NEXT: = llvm.fpext {{.*}} : !llvm<"<2 x half>"> to !llvm<"<2 x double>"> +// CHECK-NEXT: = llvm.fpext {{.*}} : !llvm.vec<2 x half> to !llvm.vec<2 x double> %1 = fpext %arg0: vector<2xf16> to vector<2xf64> -// CHECK-NEXT: = llvm.fpext {{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x double>"> +// CHECK-NEXT: = llvm.fpext {{.*}} : !llvm.vec<2 x float> to !llvm.vec<2 x double> %2 = fpext %arg1: vector<2xf32> to vector<2xf64> return } @@ -647,11 +647,11 @@ func @fptrunc(%arg0 : f32, %arg1 : f64) { // Checking conversion of integer types to floating point. // CHECK-LABEL: @fptrunc func @fptrunc_vector(%arg0 : vector<2xf32>, %arg1 : vector<2xf64>) { -// CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm<"<2 x float>"> to !llvm<"<2 x half>"> +// CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm.vec<2 x float> to !llvm.vec<2 x half> %0 = fptrunc %arg0: vector<2xf32> to vector<2xf16> -// CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm<"<2 x double>"> to !llvm<"<2 x half>"> +// CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm.vec<2 x double> to !llvm.vec<2 x half> %1 = fptrunc %arg1: vector<2xf64> to vector<2xf16> -// CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm<"<2 x double>"> to !llvm<"<2 x float>"> +// CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm.vec<2 x double> to !llvm.vec<2 x float> %2 = fptrunc %arg1: vector<2xf64> to vector<2xf32> return } @@ -731,110 +731,110 @@ func @vec_bin(%arg0: vector<2x2x2xf32>) -> vector<2x2x2xf32> { %0 = addf %arg0, %arg0 : vector<2x2x2xf32> return %0 : vector<2x2x2xf32> -// CHECK-NEXT: llvm.mlir.undef : !llvm<"[2 x [2 x <2 x float>]]"> +// CHECK-NEXT: llvm.mlir.undef : !llvm.array<2 x array<2 x vec<2 x float>>> // This block appears 2x2 times -// CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm<"[2 x [2 x <2 x float>]]"> -// CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm<"[2 x [2 x <2 x float>]]"> -// CHECK-NEXT: llvm.fadd %{{.*}} : !llvm<"<2 x float>"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}[0, 0] : !llvm<"[2 x [2 x <2 x float>]]"> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm.array<2 x array<2 x vec<2 x float>>> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm.array<2 x array<2 x vec<2 x float>>> +// CHECK-NEXT: llvm.fadd %{{.*}} : !llvm.vec<2 x float> +// CHECK-NEXT: llvm.insertvalue %{{.*}}[0, 0] : !llvm.array<2 x array<2 x vec<2 x float>>> // We check the proper indexing of extract/insert in the remaining 3 positions. -// CHECK: llvm.extractvalue %{{.*}}[0, 1] : !llvm<"[2 x [2 x <2 x float>]]"> -// CHECK: llvm.insertvalue %{{.*}}[0, 1] : !llvm<"[2 x [2 x <2 x float>]]"> -// CHECK: llvm.extractvalue %{{.*}}[1, 0] : !llvm<"[2 x [2 x <2 x float>]]"> -// CHECK: llvm.insertvalue %{{.*}}[1, 0] : !llvm<"[2 x [2 x <2 x float>]]"> -// CHECK: llvm.extractvalue %{{.*}}[1, 1] : !llvm<"[2 x [2 x <2 x float>]]"> -// CHECK: llvm.insertvalue %{{.*}}[1, 1] : !llvm<"[2 x [2 x <2 x float>]]"> +// CHECK: llvm.extractvalue %{{.*}}[0, 1] : !llvm.array<2 x array<2 x vec<2 x float>>> +// CHECK: llvm.insertvalue %{{.*}}[0, 1] : !llvm.array<2 x array<2 x vec<2 x float>>> +// CHECK: llvm.extractvalue %{{.*}}[1, 0] : !llvm.array<2 x array<2 x vec<2 x float>>> +// CHECK: llvm.insertvalue %{{.*}}[1, 0] : !llvm.array<2 x array<2 x vec<2 x float>>> +// CHECK: llvm.extractvalue %{{.*}}[1, 1] : !llvm.array<2 x array<2 x vec<2 x float>>> +// CHECK: llvm.insertvalue %{{.*}}[1, 1] : !llvm.array<2 x array<2 x vec<2 x float>>> // And we're done // CHECK-NEXT: return } // CHECK-LABEL: @splat -// CHECK-SAME: %[[A:arg[0-9]+]]: !llvm<"<4 x float>"> +// CHECK-SAME: %[[A:arg[0-9]+]]: !llvm.vec<4 x float> // CHECK-SAME: %[[ELT:arg[0-9]+]]: !llvm.float func @splat(%a: vector<4xf32>, %b: f32) -> vector<4xf32> { %vb = splat %b : vector<4xf32> %r = mulf %a, %vb : vector<4xf32> return %r : vector<4xf32> } -// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : !llvm<"<4 x float>"> +// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : !llvm.vec<4 x float> // CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : !llvm.i32] : !llvm<"<4 x float>"> +// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : !llvm.i32] : !llvm.vec<4 x float> // CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] -// CHECK-NEXT: %[[SCALE:[0-9]+]] = llvm.fmul %[[A]], %[[SPLAT]] : !llvm<"<4 x float>"> -// CHECK-NEXT: llvm.return %[[SCALE]] : !llvm<"<4 x float>"> +// CHECK-NEXT: %[[SCALE:[0-9]+]] = llvm.fmul %[[A]], %[[SPLAT]] : !llvm.vec<4 x float> +// CHECK-NEXT: llvm.return %[[SCALE]] : !llvm.vec<4 x float> // CHECK-LABEL: func @view( // CHECK: %[[ARG0:.*]]: !llvm.i64, %[[ARG1:.*]]: !llvm.i64, %[[ARG2:.*]]: !llvm.i64 func @view(%arg0 : index, %arg1 : index, %arg2 : index) { // CHECK: llvm.mlir.constant(2048 : index) : !llvm.i64 - // CHECK: llvm.mlir.undef : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> + // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %0 = alloc() : memref<2048xi8> // Test two dynamic sizes. - // CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BASE_PTR:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> - // CHECK: %[[SHIFTED_BASE_PTR:.*]] = llvm.getelementptr %[[BASE_PTR]][%[[ARG2]]] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*"> - // CHECK: %[[CAST_SHIFTED_BASE_PTR:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR]] : !llvm<"i8*"> to !llvm<"float*"> - // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR]], %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BASE_PTR:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: %[[SHIFTED_BASE_PTR:.*]] = llvm.getelementptr %[[BASE_PTR]][%[[ARG2]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + // CHECK: %[[CAST_SHIFTED_BASE_PTR:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR]] : !llvm.ptr to !llvm.ptr + // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %[[C0]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %[[C0]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: llvm.insertvalue %[[ARG0]], %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.insertvalue %[[ARG0]], %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mul %{{.*}}, %[[ARG1]] - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %1 = view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref // Test one dynamic size. - // CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BASE_PTR_2:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> - // CHECK: %[[SHIFTED_BASE_PTR_2:.*]] = llvm.getelementptr %[[BASE_PTR_2]][%[[ARG2]]] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*"> - // CHECK: %[[CAST_SHIFTED_BASE_PTR_2:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_2]] : !llvm<"i8*"> to !llvm<"float*"> - // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_2]], %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BASE_PTR_2:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: %[[SHIFTED_BASE_PTR_2:.*]] = llvm.getelementptr %[[BASE_PTR_2]][%[[ARG2]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + // CHECK: %[[CAST_SHIFTED_BASE_PTR_2:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_2]] : !llvm.ptr to !llvm.ptr + // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_2]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[C0_2:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %[[C0_2]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %[[C0_2]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mul %{{.*}}, %[[ARG1]] - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %3 = view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32> // Test static sizes. - // CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BASE_PTR_3:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> - // CHECK: %[[SHIFTED_BASE_PTR_3:.*]] = llvm.getelementptr %[[BASE_PTR_3]][%[[ARG2]]] : (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*"> - // CHECK: %[[CAST_SHIFTED_BASE_PTR_3:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_3]] : !llvm<"i8*"> to !llvm<"float*"> - // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_3]], %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BASE_PTR_3:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: %[[SHIFTED_BASE_PTR_3:.*]] = llvm.getelementptr %[[BASE_PTR_3]][%[[ARG2]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + // CHECK: %[[CAST_SHIFTED_BASE_PTR_3:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_3]] : !llvm.ptr to !llvm.ptr + // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_3]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[C0_3:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %[[C0_3]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %[[C0_3]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(64 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 - // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %5 = view %0[%arg2][] : memref<2048xi8> to memref<64x4xf32> return } // CHECK-LABEL: func @subview( -// CHECK-COUNT-2: !llvm<"float*">, +// CHECK-COUNT-2: !llvm.ptr, // CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i64, // CHECK: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.i64, // CHECK: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.i64, // CHECK: %[[ARG2:.*]]: !llvm.i64) // CHECK32-LABEL: func @subview( -// CHECK32-COUNT-2: !llvm<"float*">, +// CHECK32-COUNT-2: !llvm.ptr, // CHECK32-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i32, // CHECK32: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.i32, // CHECK32: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.i32, @@ -844,42 +844,42 @@ func @subview(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] - // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i64 // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i64 - // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 - // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 - // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i32 // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i32 // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i32 // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i32 - // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i32 - // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i32 %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] : @@ -889,13 +889,13 @@ func @subview(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg0 : index, } // CHECK-LABEL: func @subview_non_zero_addrspace( -// CHECK-COUNT-2: !llvm<"float addrspace(3)*">, +// CHECK-COUNT-2: !llvm.ptr, // CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i64, // CHECK: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.i64, // CHECK: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.i64, // CHECK: %[[ARG2:.*]]: !llvm.i64) // CHECK32-LABEL: func @subview_non_zero_addrspace( -// CHECK32-COUNT-2: !llvm<"float addrspace(3)*">, +// CHECK32-COUNT-2: !llvm.ptr, // CHECK32-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i32, // CHECK32: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.i32, // CHECK32: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.i32, @@ -905,42 +905,42 @@ func @subview_non_zero_addrspace(%0 : memref<64x4xf32, offset: 0, strides: [4, 1 // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] - // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float addrspace(3)*"> to !llvm<"float addrspace(3)*"> - // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float addrspace(3)*"> to !llvm<"float addrspace(3)*"> - // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i64 // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i64 - // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 - // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 - // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [2 x i64], [2 x i64] }"> - // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float addrspace(3)*"> to !llvm<"float addrspace(3)*"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float addrspace(3)*"> to !llvm<"float addrspace(3)*"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i32 // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i32 // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i32 // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i32 - // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i32 - // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i32 %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] : @@ -950,8 +950,8 @@ func @subview_non_zero_addrspace(%0 : memref<64x4xf32, offset: 0, strides: [4, 1 } // CHECK-LABEL: func @subview_const_size( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm<"float*">, -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm<"float*">, +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr, +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr, // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]*]]: !llvm.i64 @@ -961,8 +961,8 @@ func @subview_non_zero_addrspace(%0 : memref<64x4xf32, offset: 0, strides: [4, 1 // CHECK-SAME: %[[ARG8:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG9:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK32-LABEL: func @subview_const_size( -// CHECK32-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm<"float*">, -// CHECK32-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm<"float*">, +// CHECK32-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr, +// CHECK32-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr, // CHECK32-SAME: %[[ARG2:[a-zA-Z0-9]*]]: !llvm.i32 // CHECK32-SAME: %[[ARG3:[a-zA-Z0-9]*]]: !llvm.i32 // CHECK32-SAME: %[[ARG4:[a-zA-Z0-9]*]]: !llvm.i32 @@ -976,48 +976,48 @@ func @subview_const_size(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] - // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : !llvm.i64 // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i64 // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : !llvm.i64 // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i64 - // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[CST2:.*]] = llvm.mlir.constant(2 : i64) - // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[CST2]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[CST2]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : !llvm.i64 - // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) - // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : !llvm.i64 - // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : !llvm.i32 // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i32 // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : !llvm.i32 // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i32 - // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST2:.*]] = llvm.mlir.constant(2 : i64) - // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[CST2]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[CST2]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : !llvm.i32 - // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) - // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : !llvm.i32 - // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> %1 = subview %0[%arg0, %arg1][4, 2][%arg0, %arg1] : memref<64x4xf32, offset: 0, strides: [4, 1]> to memref<4x2xf32, offset: ?, strides: [?, ?]> @@ -1025,8 +1025,8 @@ func @subview_const_size(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg } // CHECK-LABEL: func @subview_const_stride( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm<"float*">, -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm<"float*">, +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr, +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr, // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]*]]: !llvm.i64 @@ -1036,8 +1036,8 @@ func @subview_const_size(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %arg // CHECK-SAME: %[[ARG8:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK-SAME: %[[ARG9:[a-zA-Z0-9]*]]: !llvm.i64 // CHECK32-LABEL: func @subview_const_stride( -// CHECK32-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm<"float*">, -// CHECK32-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm<"float*">, +// CHECK32-SAME: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.ptr, +// CHECK32-SAME: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.ptr, // CHECK32-SAME: %[[ARG2:[a-zA-Z0-9]*]]: !llvm.i32 // CHECK32-SAME: %[[ARG3:[a-zA-Z0-9]*]]: !llvm.i32 // CHECK32-SAME: %[[ARG4:[a-zA-Z0-9]*]]: !llvm.i32 @@ -1051,44 +1051,44 @@ func @subview_const_stride(%0 : memref<64x4xf32, offset: 0, strides: [4, 1]>, %a // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] - // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : !llvm.i64 // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i64 // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : !llvm.i64 // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i64 - // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG8]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG8]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[CST2:.*]] = llvm.mlir.constant(2 : i64) - // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[CST2]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG7]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[CST2]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG7]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) - // CHECK: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> - // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[OFFINC:.*]] = llvm.mul %[[ARG7]], %[[STRIDE0]] : !llvm.i32 // CHECK32: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i32 // CHECK32: %[[OFFINC1:.*]] = llvm.mul %[[ARG8]], %[[STRIDE1]] : !llvm.i32 // CHECK32: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i32 - // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG8]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG8]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST2:.*]] = llvm.mlir.constant(2 : i64) - // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST2]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG7]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST2]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[ARG7]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) - // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> %1 = subview %0[%arg0, %arg1][%arg0, %arg1][1, 2] : memref<64x4xf32, offset: 0, strides: [4, 1]> to memref @@ -1102,23 +1102,23 @@ func @subview_const_stride_and_offset(%0 : memref<64x4xf32, offset: 0, strides: // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] - // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST8:.*]] = llvm.mlir.constant(8 : index) - // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[CST8]], %[[DESC1]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[CST8]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST3:.*]] = llvm.mlir.constant(3 : i64) - // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[CST3]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[CST3]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST1:.*]] = llvm.mlir.constant(1 : i64) - // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST1]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST62:.*]] = llvm.mlir.constant(62 : i64) - // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) - // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> %1 = subview %0[0, 8][62, 3][1, 1] : memref<64x4xf32, offset: 0, strides: [4, 1]> to memref<62x3xf32, offset: 8, strides: [4, 1]> @@ -1126,13 +1126,13 @@ func @subview_const_stride_and_offset(%0 : memref<64x4xf32, offset: 0, strides: } // CHECK-LABEL: func @subview_mixed_static_dynamic( -// CHECK-COUNT-2: !llvm<"float*">, +// CHECK-COUNT-2: !llvm.ptr, // CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i64, // CHECK: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.i64, // CHECK: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.i64, // CHECK: %[[ARG2:.*]]: !llvm.i64) // CHECK32-LABEL: func @subview_mixed_static_dynamic( -// CHECK32-COUNT-2: !llvm<"float*">, +// CHECK32-COUNT-2: !llvm.ptr, // CHECK32-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i32, // CHECK32: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.i32, // CHECK32: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.i32, @@ -1142,28 +1142,28 @@ func @subview_mixed_static_dynamic(%0 : memref<64x4xf32, offset: 0, strides: [4, // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK32: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] - // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm<"float*"> to !llvm<"float*"> - // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> - // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST0:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC0:.*]] = llvm.insertvalue %[[BITCAST0]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[BITCAST1:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr to !llvm.ptr + // CHECK32: %[[DESC1:.*]] = llvm.insertvalue %[[BITCAST1]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> + // CHECK32: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[OFFM1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE0]] : !llvm.i32 // CHECK32: %[[OFFA1:.*]] = llvm.add %[[OFF]], %[[OFFM1]] : !llvm.i32 // CHECK32: %[[CST8:.*]] = llvm.mlir.constant(8 : i64) : !llvm.i32 // CHECK32: %[[OFFM2:.*]] = llvm.mul %[[CST8]], %[[STRIDE1]] : !llvm.i32 // CHECK32: %[[OFFA2:.*]] = llvm.add %[[OFFA1]], %[[OFFM2]] : !llvm.i32 - // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFFA2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC2:.*]] = llvm.insertvalue %[[OFFA2]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> - // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG2]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC3:.*]] = llvm.insertvalue %[[ARG2]], %[[DESC2]][3, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST1:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i32 - // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST1]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC4:.*]] = llvm.insertvalue %[[CST1]], %[[DESC3]][4, 1] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[CST62:.*]] = llvm.mlir.constant(62 : i64) : !llvm.i32 - // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> // CHECK32: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i32 - // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i32, [2 x i32], [2 x i32] }"> + // CHECK32: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<2 x i32>, array<2 x i32>)> %1 = subview %0[%arg1, 8][62, %arg2][%arg0, 1] : memref<64x4xf32, offset: 0, strides: [4, 1]> to memref<62x?xf32, offset: ?, strides: [?, 1]> @@ -1200,7 +1200,7 @@ func @generic_atomic_rmw(%I : memref<10xf32>, %i : index) -> f32 { %c1 = constant 1.0 : f32 atomic_yield %c1 : f32 } - // CHECK: [[init:%.*]] = llvm.load %{{.*}} : !llvm<"float*"> + // CHECK: [[init:%.*]] = llvm.load %{{.*}} : !llvm.ptr // CHECK-NEXT: llvm.br ^bb1([[init]] : !llvm.float) // CHECK-NEXT: ^bb1([[loaded:%.*]]: !llvm.float): // CHECK-NEXT: [[c1:%.*]] = llvm.mlir.constant(1.000000e+00 : f32) @@ -1222,10 +1222,10 @@ func @generic_atomic_rmw(%I : memref<10xf32>, %i : index) -> f32 { // CHECK-LABEL: func @assume_alignment func @assume_alignment(%0 : memref<4x4xf16>) { - // CHECK: %[[PTR:.*]] = llvm.extractvalue %[[MEMREF:.*]][1] : !llvm<"{ half*, half*, i64, [2 x i64], [2 x i64] }"> + // CHECK: %[[PTR:.*]] = llvm.extractvalue %[[MEMREF:.*]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK-NEXT: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: %[[MASK:.*]] = llvm.mlir.constant(15 : index) : !llvm.i64 - // CHECK-NEXT: %[[INT:.*]] = llvm.ptrtoint %[[PTR]] : !llvm<"half*"> to !llvm.i64 + // CHECK-NEXT: %[[INT:.*]] = llvm.ptrtoint %[[PTR]] : !llvm.ptr to !llvm.i64 // CHECK-NEXT: %[[MASKED_PTR:.*]] = llvm.and %[[INT]], %[[MASK:.*]] : !llvm.i64 // CHECK-NEXT: %[[CONDITION:.*]] = llvm.icmp "eq" %[[MASKED_PTR]], %[[ZERO]] : !llvm.i64 // CHECK-NEXT: "llvm.intr.assume"(%[[CONDITION]]) : (!llvm.i1) -> () @@ -1237,16 +1237,16 @@ func @assume_alignment(%0 : memref<4x4xf16>) { // CHECK-LABEL: func @mlir_cast_to_llvm // CHECK-SAME: %[[ARG:.*]]: -func @mlir_cast_to_llvm(%0 : vector<2xf16>) -> !llvm<"<2 x half>"> { - %1 = llvm.mlir.cast %0 : vector<2xf16> to !llvm<"<2 x half>"> +func @mlir_cast_to_llvm(%0 : vector<2xf16>) -> !llvm.vec<2 x half> { + %1 = llvm.mlir.cast %0 : vector<2xf16> to !llvm.vec<2 x half> // CHECK-NEXT: llvm.return %[[ARG]] - return %1 : !llvm<"<2 x half>"> + return %1 : !llvm.vec<2 x half> } // CHECK-LABEL: func @mlir_cast_from_llvm // CHECK-SAME: %[[ARG:.*]]: -func @mlir_cast_from_llvm(%0 : !llvm<"<2 x half>">) -> vector<2xf16> { - %1 = llvm.mlir.cast %0 : !llvm<"<2 x half>"> to vector<2xf16> +func @mlir_cast_from_llvm(%0 : !llvm.vec<2 x half>) -> vector<2xf16> { + %1 = llvm.mlir.cast %0 : !llvm.vec<2 x half> to vector<2xf16> // CHECK-NEXT: llvm.return %[[ARG]] return %1 : vector<2xf16> } @@ -1281,13 +1281,13 @@ func @bfloat(%arg0: bf16) -> bf16 { // ----- // CHECK-LABEL: func @memref_index -// CHECK-SAME: %arg0: !llvm<"i64*">, %arg1: !llvm<"i64*">, +// CHECK-SAME: %arg0: !llvm.ptr, %arg1: !llvm.ptr, // CHECK-SAME: %arg2: !llvm.i64, %arg3: !llvm.i64, %arg4: !llvm.i64) -// CHECK-SAME: -> !llvm<"{ i64*, i64*, i64, [1 x i64], [1 x i64] }"> +// CHECK-SAME: -> !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK32-LABEL: func @memref_index -// CHECK32-SAME: %arg0: !llvm<"i32*">, %arg1: !llvm<"i32*">, +// CHECK32-SAME: %arg0: !llvm.ptr, %arg1: !llvm.ptr, // CHECK32-SAME: %arg2: !llvm.i32, %arg3: !llvm.i32, %arg4: !llvm.i32) -// CHECK32-SAME: -> !llvm<"{ i32*, i32*, i32, [1 x i32], [1 x i32] }"> +// CHECK32-SAME: -> !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> func @memref_index(%arg0: memref<32xindex>) -> memref<32xindex> { return %arg0 : memref<32xindex> } diff --git a/mlir/test/Conversion/StandardToLLVM/invalid.mlir b/mlir/test/Conversion/StandardToLLVM/invalid.mlir index 56e6612423360..469bb9753ec49 100644 --- a/mlir/test/Conversion/StandardToLLVM/invalid.mlir +++ b/mlir/test/Conversion/StandardToLLVM/invalid.mlir @@ -24,8 +24,8 @@ func @mlir_cast_to_llvm_int(%0 : i32) -> !llvm.i64 { // ----- -func @mlir_cast_to_llvm_vec(%0 : vector<1x1xf32>) -> !llvm<"<1 x float>"> { +func @mlir_cast_to_llvm_vec(%0 : vector<1x1xf32>) -> !llvm.vec<1 x float> { // expected-error@+1 {{'llvm.mlir.cast' op only 1-d vector is allowed}} - %1 = llvm.mlir.cast %0 : vector<1x1xf32> to !llvm<"<1 x float>"> - return %1 : !llvm<"<1 x float>"> + %1 = llvm.mlir.cast %0 : vector<1x1xf32> to !llvm.vec<1 x float> + return %1 : !llvm.vec<1 x float> } diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir index a6006023a5092..c55950a556344 100644 --- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir +++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir @@ -1,11 +1,11 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -convert-std-to-llvm -split-input-file -verify-diagnostics | FileCheck %s // CHECK-LABEL: func @address_space( -// CHECK-SAME: !llvm<"float addrspace(7)*"> +// CHECK-SAME: !llvm.ptr func @address_space(%arg0 : memref<32xf32, affine_map<(d0) -> (d0)>, 7>) { %0 = alloc() : memref<32xf32, affine_map<(d0) -> (d0)>, 5> %1 = constant 7 : index - // CHECK: llvm.load %{{.*}} : !llvm<"float addrspace(5)*"> + // CHECK: llvm.load %{{.*}} : !llvm.ptr %2 = load %0[%1] : memref<32xf32, affine_map<(d0) -> (d0)>, 5> std.return } @@ -53,11 +53,11 @@ func @rsqrt_double(%arg0 : f64) { // ----- // CHECK-LABEL: func @rsqrt_vector( -// CHECK-SAME: !llvm<"<4 x float>"> +// CHECK-SAME: !llvm.vec<4 x float> func @rsqrt_vector(%arg0 : vector<4xf32>) { - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm<"<4 x float>"> - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (!llvm<"<4 x float>">) -> !llvm<"<4 x float>"> - // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : !llvm<"<4 x float>"> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm.vec<4 x float> + // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (!llvm.vec<4 x float>) -> !llvm.vec<4 x float> + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : !llvm.vec<4 x float> %0 = rsqrt %arg0 : vector<4xf32> std.return } @@ -65,13 +65,13 @@ func @rsqrt_vector(%arg0 : vector<4xf32>) { // ----- // CHECK-LABEL: func @rsqrt_multidim_vector( -// CHECK-SAME: !llvm<"[4 x <3 x float>]"> +// CHECK-SAME: !llvm.array<4 x vec<3 x float>> func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) { - // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %arg0[0] : !llvm<"[4 x <3 x float>]"> - // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : !llvm<"<3 x float>"> - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%[[EXTRACT]]) : (!llvm<"<3 x float>">) -> !llvm<"<3 x float>"> - // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : !llvm<"<3 x float>"> - // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[DIV]], %0[0] : !llvm<"[4 x <3 x float>]"> + // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %arg0[0] : !llvm.array<4 x vec<3 x float>> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : !llvm.vec<3 x float> + // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%[[EXTRACT]]) : (!llvm.vec<3 x float>) -> !llvm.vec<3 x float> + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : !llvm.vec<3 x float> + // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[DIV]], %0[0] : !llvm.array<4 x vec<3 x float>> %0 = rsqrt %arg0 : vector<4x3xf32> std.return } diff --git a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir index eab211ffaaf4d..a2a47b0eff267 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-reduction-to-llvm.mlir @@ -3,17 +3,17 @@ // // CHECK-LABEL: llvm.func @reduce_add_f32( -// CHECK-SAME: %[[A:.*]]: !llvm<"<16 x float>">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float // CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fadd"(%[[C]], %[[A]]) -// CHECK-SAME: {reassoc = false} : (!llvm.float, !llvm<"<16 x float>">) -> !llvm.float +// CHECK-SAME: {reassoc = false} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // CHECK: llvm.return %[[V]] : !llvm.float // // REASSOC-LABEL: llvm.func @reduce_add_f32( -// REASSOC-SAME: %[[A:.*]]: !llvm<"<16 x float>">) +// REASSOC-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // REASSOC: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float // REASSOC: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fadd"(%[[C]], %[[A]]) -// REASSOC-SAME: {reassoc = true} : (!llvm.float, !llvm<"<16 x float>">) -> !llvm.float +// REASSOC-SAME: {reassoc = true} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // REASSOC: llvm.return %[[V]] : !llvm.float // func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 { @@ -23,17 +23,17 @@ func @reduce_add_f32(%arg0: vector<16xf32>) -> f32 { // // CHECK-LABEL: llvm.func @reduce_mul_f32( -// CHECK-SAME: %[[A:.*]]: !llvm<"<16 x float>">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // CHECK: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float // CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fmul"(%[[C]], %[[A]]) -// CHECK-SAME: {reassoc = false} : (!llvm.float, !llvm<"<16 x float>">) -> !llvm.float +// CHECK-SAME: {reassoc = false} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // CHECK: llvm.return %[[V]] : !llvm.float // // REASSOC-LABEL: llvm.func @reduce_mul_f32( -// REASSOC-SAME: %[[A:.*]]: !llvm<"<16 x float>">) +// REASSOC-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // REASSOC: %[[C:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float // REASSOC: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fmul"(%[[C]], %[[A]]) -// REASSOC-SAME: {reassoc = true} : (!llvm.float, !llvm<"<16 x float>">) -> !llvm.float +// REASSOC-SAME: {reassoc = true} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // REASSOC: llvm.return %[[V]] : !llvm.float // func @reduce_mul_f32(%arg0: vector<16xf32>) -> f32 { diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index c5259a17fcef0..2e5aae886c380 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -6,11 +6,11 @@ func @broadcast_vec1d_from_scalar(%arg0: f32) -> vector<2xf32> { } // CHECK-LABEL: llvm.func @broadcast_vec1d_from_scalar( // CHECK-SAME: %[[A:.*]]: !llvm.float) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm<"<2 x float>"> +// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.vec<2 x float> // CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T2:.*]] = llvm.insertelement %[[A]], %[[T0]][%[[T1]] : !llvm.i32] : !llvm<"<2 x float>"> -// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T0]] [0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> -// CHECK: llvm.return %[[T3]] : !llvm<"<2 x float>"> +// CHECK: %[[T2:.*]] = llvm.insertelement %[[A]], %[[T0]][%[[T1]] : !llvm.i32] : !llvm.vec<2 x float> +// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T0]] [0 : i32, 0 : i32] : !llvm.vec<2 x float>, !llvm.vec<2 x float> +// CHECK: llvm.return %[[T3]] : !llvm.vec<2 x float> func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x3xf32> @@ -18,14 +18,14 @@ func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> { } // CHECK-LABEL: llvm.func @broadcast_vec2d_from_scalar( // CHECK-SAME: %[[A:.*]]: !llvm.float) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm<"[2 x <3 x float>]"> -// CHECK: %[[T1:.*]] = llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vec<3 x float>> +// CHECK: %[[T1:.*]] = llvm.mlir.undef : !llvm.vec<3 x float> // CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : !llvm.i32] : !llvm<"<3 x float>"> -// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0] : !llvm<"[2 x <3 x float>]"> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.return %[[T6]] : !llvm<"[2 x <3 x float>]"> +// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : !llvm.i32] : !llvm.vec<3 x float> +// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x float>, !llvm.vec<3 x float> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0] : !llvm.array<2 x vec<3 x float>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<2 x vec<3 x float>> +// CHECK: llvm.return %[[T6]] : !llvm.array<2 x vec<3 x float>> func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> { %0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32> @@ -33,277 +33,277 @@ func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> { } // CHECK-LABEL: llvm.func @broadcast_vec3d_from_scalar( // CHECK-SAME: %[[A:.*]]: !llvm.float) -// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: %[[T1:.*]] = llvm.mlir.undef : !llvm<"<4 x float>"> +// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x array<3 x vec<4 x float>>> +// CHECK: %[[T1:.*]] = llvm.mlir.undef : !llvm.vec<4 x float> // CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : !llvm.i32] : !llvm<"<4 x float>"> -// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0, 0] : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0, 1] : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][0, 2] : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][1, 0] : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T4]], %[[T8]][1, 1] : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T4]], %[[T9]][1, 2] : !llvm<"[2 x [3 x <4 x float>]]"> -// CHECK: llvm.return %[[T10]] : !llvm<"[2 x [3 x <4 x float>]]"> +// CHECK: %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : !llvm.i32] : !llvm.vec<4 x float> +// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<4 x float>, !llvm.vec<4 x float> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0, 0] : !llvm.array<2 x array<3 x vec<4 x float>>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0, 1] : !llvm.array<2 x array<3 x vec<4 x float>>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][0, 2] : !llvm.array<2 x array<3 x vec<4 x float>>> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][1, 0] : !llvm.array<2 x array<3 x vec<4 x float>>> +// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T4]], %[[T8]][1, 1] : !llvm.array<2 x array<3 x vec<4 x float>>> +// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T4]], %[[T9]][1, 2] : !llvm.array<2 x array<3 x vec<4 x float>>> +// CHECK: llvm.return %[[T10]] : !llvm.array<2 x array<3 x vec<4 x float>>> func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32> return %0 : vector<2xf32> } // CHECK-LABEL: llvm.func @broadcast_vec1d_from_vec1d( -// CHECK-SAME: %[[A:.*]]: !llvm<"<2 x float>">) -// CHECK: llvm.return %[[A]] : !llvm<"<2 x float>"> +// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x float>) +// CHECK: llvm.return %[[A]] : !llvm.vec<2 x float> func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32> return %0 : vector<3x2xf32> } // CHECK-LABEL: llvm.func @broadcast_vec2d_from_vec1d( -// CHECK-SAME: %[[A:.*]]: !llvm<"<2 x float>">) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm<"[3 x <2 x float>]"> -// CHECK: llvm.return %[[T3]] : !llvm<"[3 x <2 x float>]"> +// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x float>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<3 x vec<2 x float>> +// CHECK: llvm.return %[[T3]] : !llvm.array<3 x vec<2 x float>> func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } // CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec1d( -// CHECK-SAME: %[[A:.*]]: !llvm<"<2 x float>">) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T1]][0] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][2] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][3] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.return %[[T8]] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x float>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T1]][0] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][2] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][3] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: llvm.return %[[T8]] : !llvm.array<4 x array<3 x vec<2 x float>>> func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } // CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec2d( -// CHECK-SAME: %[[A:.*]]: !llvm<"[3 x <2 x float>]">) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][3] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.return %[[T4]] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vec<2 x float>>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][3] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: llvm.return %[[T4]] : !llvm.array<4 x array<3 x vec<2 x float>>> func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> { %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32> return %0 : vector<4xf32> } // CHECK-LABEL: llvm.func @broadcast_stretch( -// CHECK-SAME: %[[A:.*]]: !llvm<"<1 x float>">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<1 x float>) // CHECK: %[[T0:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 -// CHECK: %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: %[[T2:.*]] = llvm.mlir.undef : !llvm<"<4 x float>"> +// CHECK: %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : !llvm.i64] : !llvm.vec<1 x float> +// CHECK: %[[T2:.*]] = llvm.mlir.undef : !llvm.vec<4 x float> // CHECK: %[[T3:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%3 : !llvm.i32] : !llvm<"<4 x float>"> -// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> -// CHECK: llvm.return %[[T5]] : !llvm<"<4 x float>"> +// CHECK: %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%3 : !llvm.i32] : !llvm.vec<4 x float> +// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm.vec<4 x float>, !llvm.vec<4 x float> +// CHECK: llvm.return %[[T5]] : !llvm.vec<4 x float> func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> { %0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32> return %0 : vector<3x4xf32> } // CHECK-LABEL: llvm.func @broadcast_stretch_at_start( -// CHECK-SAME: %[[A:.*]]: !llvm<"[1 x <4 x float>]">) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x4xf32>) : !llvm<"[3 x <4 x float>]"> -// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm<"[1 x <4 x float>]"> -// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm<"[3 x <4 x float>]"> -// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][1] : !llvm<"[3 x <4 x float>]"> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T1]], %[[T3]][2] : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.return %[[T4]] : !llvm<"[3 x <4 x float>]"> +// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vec<4 x float>>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x4xf32>) : !llvm.array<3 x vec<4 x float>> +// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vec<4 x float>> +// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<3 x vec<4 x float>> +// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][1] : !llvm.array<3 x vec<4 x float>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T1]], %[[T3]][2] : !llvm.array<3 x vec<4 x float>> +// CHECK: llvm.return %[[T4]] : !llvm.array<3 x vec<4 x float>> func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> { %0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32> return %0 : vector<4x3xf32> } // CHECK-LABEL: llvm.func @broadcast_stretch_at_end( -// CHECK-SAME: %[[A:.*]]: !llvm<"[4 x <1 x float>]">) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3xf32>) : !llvm<"[4 x <3 x float>]"> -// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm<"[4 x <1 x float>]"> +// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x vec<1 x float>>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3xf32>) : !llvm.array<4 x vec<3 x float>> +// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<4 x vec<1 x float>> // CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 -// CHECK: %[[T3:.*]] = llvm.extractelement %[[T1]][%[[T2]] : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: %[[T4:.*]] = llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: %[[T3:.*]] = llvm.extractelement %[[T1]][%[[T2]] : !llvm.i64] : !llvm.vec<1 x float> +// CHECK: %[[T4:.*]] = llvm.mlir.undef : !llvm.vec<3 x float> // CHECK: %[[T5:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T6:.*]] = llvm.insertelement %[[T3]], %[[T4]][%[[T5]] : !llvm.i32] : !llvm<"<3 x float>"> -// CHECK: %[[T7:.*]] = llvm.shufflevector %[[T6]], %[[T4]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm<"[4 x <3 x float>]"> -// CHECK: %[[T9:.*]] = llvm.extractvalue %[[A]][1] : !llvm<"[4 x <1 x float>]"> +// CHECK: %[[T6:.*]] = llvm.insertelement %[[T3]], %[[T4]][%[[T5]] : !llvm.i32] : !llvm.vec<3 x float> +// CHECK: %[[T7:.*]] = llvm.shufflevector %[[T6]], %[[T4]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x float>, !llvm.vec<3 x float> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<4 x vec<3 x float>> +// CHECK: %[[T9:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<4 x vec<1 x float>> // CHECK: %[[T10:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 -// CHECK: %[[T11:.*]] = llvm.extractelement %[[T9]][%[[T10]] : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: %[[T12:.*]] = llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: %[[T11:.*]] = llvm.extractelement %[[T9]][%[[T10]] : !llvm.i64] : !llvm.vec<1 x float> +// CHECK: %[[T12:.*]] = llvm.mlir.undef : !llvm.vec<3 x float> // CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : !llvm.i32] : !llvm<"<3 x float>"> -// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm<"[4 x <3 x float>]"> -// CHECK: %[[T17:.*]] = llvm.extractvalue %[[A]][2] : !llvm<"[4 x <1 x float>]"> +// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : !llvm.i32] : !llvm.vec<3 x float> +// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x float>, !llvm.vec<3 x float> +// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<4 x vec<3 x float>> +// CHECK: %[[T17:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vec<1 x float>> // CHECK: %[[T18:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 -// CHECK: %[[T19:.*]] = llvm.extractelement %[[T17]][%[[T18]] : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: %[[T20:.*]] = llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: %[[T19:.*]] = llvm.extractelement %[[T17]][%[[T18]] : !llvm.i64] : !llvm.vec<1 x float> +// CHECK: %[[T20:.*]] = llvm.mlir.undef : !llvm.vec<3 x float> // CHECK: %[[T21:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T22:.*]] = llvm.insertelement %[[T19]], %[[T20]][%[[T21]] : !llvm.i32] : !llvm<"<3 x float>"> -// CHECK: %[[T23:.*]] = llvm.shufflevector %[[T22]], %[[T20]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T16]][2] : !llvm<"[4 x <3 x float>]"> -// CHECK: %[[T25:.*]] = llvm.extractvalue %[[A]][3] : !llvm<"[4 x <1 x float>]"> +// CHECK: %[[T22:.*]] = llvm.insertelement %[[T19]], %[[T20]][%[[T21]] : !llvm.i32] : !llvm.vec<3 x float> +// CHECK: %[[T23:.*]] = llvm.shufflevector %[[T22]], %[[T20]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x float>, !llvm.vec<3 x float> +// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T16]][2] : !llvm.array<4 x vec<3 x float>> +// CHECK: %[[T25:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vec<1 x float>> // CHECK: %[[T26:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 -// CHECK: %[[T27:.*]] = llvm.extractelement %[[T25]][%[[T26]] : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: %[[T28:.*]] = llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: %[[T27:.*]] = llvm.extractelement %[[T25]][%[[T26]] : !llvm.i64] : !llvm.vec<1 x float> +// CHECK: %[[T28:.*]] = llvm.mlir.undef : !llvm.vec<3 x float> // CHECK: %[[T29:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T30:.*]] = llvm.insertelement %[[T27]], %[[T28]][%[[T29]] : !llvm.i32] : !llvm<"<3 x float>"> -// CHECK: %[[T31:.*]] = llvm.shufflevector %[[T30]], %[[T28]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: %[[T32:.*]] = llvm.insertvalue %[[T31]], %[[T24]][3] : !llvm<"[4 x <3 x float>]"> -// CHECK: llvm.return %[[T32]] : !llvm<"[4 x <3 x float>]"> +// CHECK: %[[T30:.*]] = llvm.insertelement %[[T27]], %[[T28]][%[[T29]] : !llvm.i32] : !llvm.vec<3 x float> +// CHECK: %[[T31:.*]] = llvm.shufflevector %[[T30]], %[[T28]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x float>, !llvm.vec<3 x float> +// CHECK: %[[T32:.*]] = llvm.insertvalue %[[T31]], %[[T24]][3] : !llvm.array<4 x vec<3 x float>> +// CHECK: llvm.return %[[T32]] : !llvm.array<4 x vec<3 x float>> func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32> { %0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32> return %0 : vector<4x3x2xf32> } // CHECK-LABEL: llvm.func @broadcast_stretch_in_middle( -// CHECK-SAME: %[[A:.*]]: !llvm<"[4 x [1 x <2 x float>]]">) -// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][0, 0] : !llvm<"[4 x [1 x <2 x float>]]"> -// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T2]], %[[T1]][0] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T2]], %[[T4]][1] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T2]], %[[T5]][2] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T0]][0] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T8:.*]] = llvm.extractvalue %[[A]][1, 0] : !llvm<"[4 x [1 x <2 x float>]]"> -// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T1]][0] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T8]], %[[T10]][1] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T12:.*]] = llvm.insertvalue %[[T8]], %[[T11]][2] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T7]][1] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T14:.*]] = llvm.extractvalue %[[A]][2, 0] : !llvm<"[4 x [1 x <2 x float>]]"> -// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T14]], %[[T1]][0] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T14]], %[[T16]][1] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T14]], %[[T17]][2] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T19:.*]] = llvm.insertvalue %[[T18]], %[[T13]][2] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: %[[T20:.*]] = llvm.extractvalue %[[A]][3, 0] : !llvm<"[4 x [1 x <2 x float>]]"> -// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T20]], %[[T1]][0] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T20]], %[[T22]][1] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T20]], %[[T23]][2] : !llvm<"[3 x <2 x float>]"> -// CHECK: %[[T25:.*]] = llvm.insertvalue %[[T24]], %[[T19]][3] : !llvm<"[4 x [3 x <2 x float>]]"> -// CHECK: llvm.return %[[T25]] : !llvm<"[4 x [3 x <2 x float>]]"> +// CHECK-SAME: %[[A:.*]]: !llvm.array<4 x array<1 x vec<2 x float>>>) +// CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][0, 0] : !llvm.array<4 x array<1 x vec<2 x float>>> +// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T2]], %[[T1]][0] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T2]], %[[T4]][1] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T2]], %[[T5]][2] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T0]][0] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T8:.*]] = llvm.extractvalue %[[A]][1, 0] : !llvm.array<4 x array<1 x vec<2 x float>>> +// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T1]][0] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T8]], %[[T10]][1] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T12:.*]] = llvm.insertvalue %[[T8]], %[[T11]][2] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T7]][1] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T14:.*]] = llvm.extractvalue %[[A]][2, 0] : !llvm.array<4 x array<1 x vec<2 x float>>> +// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T14]], %[[T1]][0] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T14]], %[[T16]][1] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T14]], %[[T17]][2] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T19:.*]] = llvm.insertvalue %[[T18]], %[[T13]][2] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: %[[T20:.*]] = llvm.extractvalue %[[A]][3, 0] : !llvm.array<4 x array<1 x vec<2 x float>>> +// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T20]], %[[T1]][0] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T20]], %[[T22]][1] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T20]], %[[T23]][2] : !llvm.array<3 x vec<2 x float>> +// CHECK: %[[T25:.*]] = llvm.insertvalue %[[T24]], %[[T19]][3] : !llvm.array<4 x array<3 x vec<2 x float>>> +// CHECK: llvm.return %[[T25]] : !llvm.array<4 x array<3 x vec<2 x float>>> func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32> { %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32> return %2 : vector<2x3xf32> } // CHECK-LABEL: llvm.func @outerproduct( -// CHECK-SAME: %[[A:.*]]: !llvm<"<2 x float>">, -// CHECK-SAME: %[[B:.*]]: !llvm<"<3 x float>">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x float>, +// CHECK-SAME: %[[B:.*]]: !llvm.vec<3 x float>) // CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x3xf32>) // CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 -// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : !llvm.i64] : !llvm<"<2 x float>"> -// CHECK: %[[T3:.*]] = llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : !llvm.i64] : !llvm.vec<2 x float> +// CHECK: %[[T3:.*]] = llvm.mlir.undef : !llvm.vec<3 x float> // CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%4 : !llvm.i32] : !llvm<"<3 x float>"> -// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: %[[T7:.*]] = llvm.fmul %[[T6]], %[[B]] : !llvm<"<3 x float>"> -// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm<"[2 x <3 x float>]"> +// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%4 : !llvm.i32] : !llvm.vec<3 x float> +// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x float>, !llvm.vec<3 x float> +// CHECK: %[[T7:.*]] = llvm.fmul %[[T6]], %[[B]] : !llvm.vec<3 x float> +// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm.array<2 x vec<3 x float>> // CHECK: %[[T9:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64 -// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]][%9 : !llvm.i64] : !llvm<"<2 x float>"> -// CHECK: %[[T11:.*]] = llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]][%9 : !llvm.i64] : !llvm.vec<2 x float> +// CHECK: %[[T11:.*]] = llvm.mlir.undef : !llvm.vec<3 x float> // CHECK: %[[T12:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T13:.*]] = llvm.insertelement %[[T10]], %[[T11]][%12 : !llvm.i32] : !llvm<"<3 x float>"> -// CHECK: %[[T14:.*]] = llvm.shufflevector %[[T13]], %[[T11]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: %[[T15:.*]] = llvm.fmul %[[T14]], %[[B]] : !llvm<"<3 x float>"> -// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.return %[[T16]] : !llvm<"[2 x <3 x float>]"> +// CHECK: %[[T13:.*]] = llvm.insertelement %[[T10]], %[[T11]][%12 : !llvm.i32] : !llvm.vec<3 x float> +// CHECK: %[[T14:.*]] = llvm.shufflevector %[[T13]], %[[T11]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x float>, !llvm.vec<3 x float> +// CHECK: %[[T15:.*]] = llvm.fmul %[[T14]], %[[B]] : !llvm.vec<3 x float> +// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm.array<2 x vec<3 x float>> +// CHECK: llvm.return %[[T16]] : !llvm.array<2 x vec<3 x float>> func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> { %2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<3xf32> return %2 : vector<2x3xf32> } // CHECK-LABEL: llvm.func @outerproduct_add( -// CHECK-SAME: %[[A:.*]]: !llvm<"<2 x float>">, -// CHECK-SAME: %[[B:.*]]: !llvm<"<3 x float>">, -// CHECK-SAME: %[[C:.*]]: !llvm<"[2 x <3 x float>]">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x float>, +// CHECK-SAME: %[[B:.*]]: !llvm.vec<3 x float>, +// CHECK-SAME: %[[C:.*]]: !llvm.array<2 x vec<3 x float>>) // CHECK: %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<2x3xf32>) // CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 -// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : !llvm.i64] : !llvm<"<2 x float>"> -// CHECK: %[[T3:.*]] = llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : !llvm.i64] : !llvm.vec<2 x float> +// CHECK: %[[T3:.*]] = llvm.mlir.undef : !llvm.vec<3 x float> // CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%[[T4]] : !llvm.i32] : !llvm<"<3 x float>"> -// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: %[[T7:.*]] = llvm.extractvalue %[[C]][0] : !llvm<"[2 x <3 x float>]"> -// CHECK: %[[T8:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T7]]) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">) -// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T8]], %[[T0]][0] : !llvm<"[2 x <3 x float>]"> +// CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%[[T4]] : !llvm.i32] : !llvm.vec<3 x float> +// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x float>, !llvm.vec<3 x float> +// CHECK: %[[T7:.*]] = llvm.extractvalue %[[C]][0] : !llvm.array<2 x vec<3 x float>> +// CHECK: %[[T8:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T7]]) : (!llvm.vec<3 x float>, !llvm.vec<3 x float>, !llvm.vec<3 x float>) +// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T8]], %[[T0]][0] : !llvm.array<2 x vec<3 x float>> // CHECK: %[[T10:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64 -// CHECK: %[[T11:.*]] = llvm.extractelement %[[A]][%[[T10]] : !llvm.i64] : !llvm<"<2 x float>"> -// CHECK: %[[T12:.*]] = llvm.mlir.undef : !llvm<"<3 x float>"> +// CHECK: %[[T11:.*]] = llvm.extractelement %[[A]][%[[T10]] : !llvm.i64] : !llvm.vec<2 x float> +// CHECK: %[[T12:.*]] = llvm.mlir.undef : !llvm.vec<3 x float> // CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 -// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : !llvm.i32] : !llvm<"<3 x float>"> -// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>"> -// CHECK: %[[T16:.*]] = llvm.extractvalue %[[C]][1] : !llvm<"[2 x <3 x float>]"> -// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T15]], %[[B]], %[[T16]]) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">) -// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T9]][1] : !llvm<"[2 x <3 x float>]"> -// CHECK: llvm.return %[[T18]] : !llvm<"[2 x <3 x float>]"> +// CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : !llvm.i32] : !llvm.vec<3 x float> +// CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : !llvm.vec<3 x float>, !llvm.vec<3 x float> +// CHECK: %[[T16:.*]] = llvm.extractvalue %[[C]][1] : !llvm.array<2 x vec<3 x float>> +// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T15]], %[[B]], %[[T16]]) : (!llvm.vec<3 x float>, !llvm.vec<3 x float>, !llvm.vec<3 x float>) +// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T9]][1] : !llvm.array<2 x vec<3 x float>> +// CHECK: llvm.return %[[T18]] : !llvm.array<2 x vec<3 x float>> func @shuffle_1D_direct(%arg0: vector<2xf32>, %arg1: vector<2xf32>) -> vector<2xf32> { %1 = vector.shuffle %arg0, %arg1 [0, 1] : vector<2xf32>, vector<2xf32> return %1 : vector<2xf32> } // CHECK-LABEL: llvm.func @shuffle_1D_direct( -// CHECK-SAME: %[[A:.*]]: !llvm<"<2 x float>">, -// CHECK-SAME: %[[B:.*]]: !llvm<"<2 x float>">) -// CHECK: %[[s:.*]] = llvm.shufflevector %[[A]], %[[B]] [0, 1] : !llvm<"<2 x float>">, !llvm<"<2 x float>"> -// CHECK: llvm.return %[[s]] : !llvm<"<2 x float>"> +// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x float>, +// CHECK-SAME: %[[B:.*]]: !llvm.vec<2 x float>) +// CHECK: %[[s:.*]] = llvm.shufflevector %[[A]], %[[B]] [0, 1] : !llvm.vec<2 x float>, !llvm.vec<2 x float> +// CHECK: llvm.return %[[s]] : !llvm.vec<2 x float> func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf32> { %1 = vector.shuffle %arg0, %arg1 [4, 3, 2, 1, 0] : vector<2xf32>, vector<3xf32> return %1 : vector<5xf32> } // CHECK-LABEL: llvm.func @shuffle_1D( -// CHECK-SAME: %[[A:.*]]: !llvm<"<2 x float>">, -// CHECK-SAME: %[[B:.*]]: !llvm<"<3 x float>">) -// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm<"<5 x float>"> +// CHECK-SAME: %[[A:.*]]: !llvm.vec<2 x float>, +// CHECK-SAME: %[[B:.*]]: !llvm.vec<3 x float>) +// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm.vec<5 x float> // CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: %[[e1:.*]] = llvm.extractelement %[[B]][%[[c2]] : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: %[[e1:.*]] = llvm.extractelement %[[B]][%[[c2]] : !llvm.i64] : !llvm.vec<3 x float> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: %[[i1:.*]] = llvm.insertelement %[[e1]], %[[u0]][%[[c0]] : !llvm.i64] : !llvm<"<5 x float>"> +// CHECK: %[[i1:.*]] = llvm.insertelement %[[e1]], %[[u0]][%[[c0]] : !llvm.i64] : !llvm.vec<5 x float> // CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: %[[e2:.*]] = llvm.extractelement %[[B]][%[[c1]] : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: %[[e2:.*]] = llvm.extractelement %[[B]][%[[c1]] : !llvm.i64] : !llvm.vec<3 x float> // CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: %[[i2:.*]] = llvm.insertelement %[[e2]], %[[i1]][%[[c1]] : !llvm.i64] : !llvm<"<5 x float>"> +// CHECK: %[[i2:.*]] = llvm.insertelement %[[e2]], %[[i1]][%[[c1]] : !llvm.i64] : !llvm.vec<5 x float> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: %[[e3:.*]] = llvm.extractelement %[[B]][%[[c0]] : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: %[[e3:.*]] = llvm.extractelement %[[B]][%[[c0]] : !llvm.i64] : !llvm.vec<3 x float> // CHECK: %[[c2:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: %[[i3:.*]] = llvm.insertelement %[[e3]], %[[i2]][%[[c2]] : !llvm.i64] : !llvm<"<5 x float>"> +// CHECK: %[[i3:.*]] = llvm.insertelement %[[e3]], %[[i2]][%[[c2]] : !llvm.i64] : !llvm.vec<5 x float> // CHECK: %[[c1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: %[[e4:.*]] = llvm.extractelement %[[A]][%[[c1]] : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: %[[e4:.*]] = llvm.extractelement %[[A]][%[[c1]] : !llvm.i64] : !llvm.vec<2 x float> // CHECK: %[[c3:.*]] = llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: %[[i4:.*]] = llvm.insertelement %[[e4]], %[[i3]][%[[c3]] : !llvm.i64] : !llvm<"<5 x float>"> +// CHECK: %[[i4:.*]] = llvm.insertelement %[[e4]], %[[i3]][%[[c3]] : !llvm.i64] : !llvm.vec<5 x float> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: %[[e5:.*]] = llvm.extractelement %[[A]][%[[c0]] : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: %[[e5:.*]] = llvm.extractelement %[[A]][%[[c0]] : !llvm.i64] : !llvm.vec<2 x float> // CHECK: %[[c4:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 -// CHECK: %[[i5:.*]] = llvm.insertelement %[[e5]], %[[i4]][%[[c4]] : !llvm.i64] : !llvm<"<5 x float>"> -// CHECK: llvm.return %[[i5]] : !llvm<"<5 x float>"> +// CHECK: %[[i5:.*]] = llvm.insertelement %[[e5]], %[[i4]][%[[c4]] : !llvm.i64] : !llvm.vec<5 x float> +// CHECK: llvm.return %[[i5]] : !llvm.vec<5 x float> func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf32> { %1 = vector.shuffle %a, %b[1, 0, 2] : vector<1x4xf32>, vector<2x4xf32> return %1 : vector<3x4xf32> } // CHECK-LABEL: llvm.func @shuffle_2D( -// CHECK-SAME: %[[A:.*]]: !llvm<"[1 x <4 x float>]">, -// CHECK-SAME: %[[B:.*]]: !llvm<"[2 x <4 x float>]">) -// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm<"[3 x <4 x float>]"> -// CHECK: %[[e1:.*]] = llvm.extractvalue %[[B]][0] : !llvm<"[2 x <4 x float>]"> -// CHECK: %[[i1:.*]] = llvm.insertvalue %[[e1]], %[[u0]][0] : !llvm<"[3 x <4 x float>]"> -// CHECK: %[[e2:.*]] = llvm.extractvalue %[[A]][0] : !llvm<"[1 x <4 x float>]"> -// CHECK: %[[i2:.*]] = llvm.insertvalue %[[e2]], %[[i1]][1] : !llvm<"[3 x <4 x float>]"> -// CHECK: %[[e3:.*]] = llvm.extractvalue %[[B]][1] : !llvm<"[2 x <4 x float>]"> -// CHECK: %[[i3:.*]] = llvm.insertvalue %[[e3]], %[[i2]][2] : !llvm<"[3 x <4 x float>]"> -// CHECK: llvm.return %[[i3]] : !llvm<"[3 x <4 x float>]"> +// CHECK-SAME: %[[A:.*]]: !llvm.array<1 x vec<4 x float>>, +// CHECK-SAME: %[[B:.*]]: !llvm.array<2 x vec<4 x float>>) +// CHECK: %[[u0:.*]] = llvm.mlir.undef : !llvm.array<3 x vec<4 x float>> +// CHECK: %[[e1:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vec<4 x float>> +// CHECK: %[[i1:.*]] = llvm.insertvalue %[[e1]], %[[u0]][0] : !llvm.array<3 x vec<4 x float>> +// CHECK: %[[e2:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<1 x vec<4 x float>> +// CHECK: %[[i2:.*]] = llvm.insertvalue %[[e2]], %[[i1]][1] : !llvm.array<3 x vec<4 x float>> +// CHECK: %[[e3:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vec<4 x float>> +// CHECK: %[[i3:.*]] = llvm.insertvalue %[[e3]], %[[i2]][2] : !llvm.array<3 x vec<4 x float>> +// CHECK: llvm.return %[[i3]] : !llvm.array<3 x vec<4 x float>> func @extract_element(%arg0: vector<16xf32>) -> f32 { %0 = constant 15 : i32 @@ -311,9 +311,9 @@ func @extract_element(%arg0: vector<16xf32>) -> f32 { return %1 : f32 } // CHECK-LABEL: llvm.func @extract_element( -// CHECK-SAME: %[[A:.*]]: !llvm<"<16 x float>">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // CHECK: %[[c:.*]] = llvm.mlir.constant(15 : i32) : !llvm.i32 -// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : !llvm.i32] : !llvm<"<16 x float>"> +// CHECK: %[[x:.*]] = llvm.extractelement %[[A]][%[[c]] : !llvm.i32] : !llvm.vec<16 x float> // CHECK: llvm.return %[[x]] : !llvm.float func @extract_element_from_vec_1d(%arg0: vector<16xf32>) -> f32 { @@ -322,7 +322,7 @@ func @extract_element_from_vec_1d(%arg0: vector<16xf32>) -> f32 { } // CHECK-LABEL: llvm.func @extract_element_from_vec_1d // CHECK: llvm.mlir.constant(15 : i64) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<16 x float>"> +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<16 x float> // CHECK: llvm.return {{.*}} : !llvm.float func @extract_vec_2d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32> { @@ -330,25 +330,25 @@ func @extract_vec_2d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32> return %0 : vector<3x16xf32> } // CHECK-LABEL: llvm.func @extract_vec_2d_from_vec_3d -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[4 x [3 x <16 x float>]]"> -// CHECK: llvm.return {{.*}} : !llvm<"[3 x <16 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vec<16 x float>>> +// CHECK: llvm.return {{.*}} : !llvm.array<3 x vec<16 x float>> func @extract_vec_1d_from_vec_3d(%arg0: vector<4x3x16xf32>) -> vector<16xf32> { %0 = vector.extract %arg0[0, 0]: vector<4x3x16xf32> return %0 : vector<16xf32> } // CHECK-LABEL: llvm.func @extract_vec_1d_from_vec_3d -// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm<"[4 x [3 x <16 x float>]]"> -// CHECK: llvm.return {{.*}} : !llvm<"<16 x float>"> +// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vec<16 x float>>> +// CHECK: llvm.return {{.*}} : !llvm.vec<16 x float> func @extract_element_from_vec_3d(%arg0: vector<4x3x16xf32>) -> f32 { %0 = vector.extract %arg0[0, 0, 0]: vector<4x3x16xf32> return %0 : f32 } // CHECK-LABEL: llvm.func @extract_element_from_vec_3d -// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm<"[4 x [3 x <16 x float>]]"> +// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vec<16 x float>>> // CHECK: llvm.mlir.constant(0 : i64) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<16 x float>"> +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<16 x float> // CHECK: llvm.return {{.*}} : !llvm.float func @insert_element(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { @@ -358,10 +358,10 @@ func @insert_element(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { } // CHECK-LABEL: llvm.func @insert_element( // CHECK-SAME: %[[A:.*]]: !llvm.float, -// CHECK-SAME: %[[B:.*]]: !llvm<"<4 x float>">) +// CHECK-SAME: %[[B:.*]]: !llvm.vec<4 x float>) // CHECK: %[[c:.*]] = llvm.mlir.constant(3 : i32) : !llvm.i32 -// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[c]] : !llvm.i32] : !llvm<"<4 x float>"> -// CHECK: llvm.return %[[x]] : !llvm<"<4 x float>"> +// CHECK: %[[x:.*]] = llvm.insertelement %[[A]], %[[B]][%[[c]] : !llvm.i32] : !llvm.vec<4 x float> +// CHECK: llvm.return %[[x]] : !llvm.vec<4 x float> func @insert_element_into_vec_1d(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { %0 = vector.insert %arg0, %arg1[3] : f32 into vector<4xf32> @@ -369,65 +369,65 @@ func @insert_element_into_vec_1d(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf } // CHECK-LABEL: llvm.func @insert_element_into_vec_1d // CHECK: llvm.mlir.constant(3 : i64) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> -// CHECK: llvm.return {{.*}} : !llvm<"<4 x float>"> +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<4 x float> +// CHECK: llvm.return {{.*}} : !llvm.vec<4 x float> func @insert_vec_2d_into_vec_3d(%arg0: vector<8x16xf32>, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> { %0 = vector.insert %arg0, %arg1[3] : vector<8x16xf32> into vector<4x8x16xf32> return %0 : vector<4x8x16xf32> } // CHECK-LABEL: llvm.func @insert_vec_2d_into_vec_3d -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [8 x <16 x float>]]"> -// CHECK: llvm.return {{.*}} : !llvm<"[4 x [8 x <16 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vec<16 x float>>> +// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vec<16 x float>>> func @insert_vec_1d_into_vec_3d(%arg0: vector<16xf32>, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> { %0 = vector.insert %arg0, %arg1[3, 7] : vector<16xf32> into vector<4x8x16xf32> return %0 : vector<4x8x16xf32> } // CHECK-LABEL: llvm.func @insert_vec_1d_into_vec_3d -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm<"[4 x [8 x <16 x float>]]"> -// CHECK: llvm.return {{.*}} : !llvm<"[4 x [8 x <16 x float>]]"> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vec<16 x float>>> +// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vec<16 x float>>> func @insert_element_into_vec_3d(%arg0: f32, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> { %0 = vector.insert %arg0, %arg1[3, 7, 15] : f32 into vector<4x8x16xf32> return %0 : vector<4x8x16xf32> } // CHECK-LABEL: llvm.func @insert_element_into_vec_3d -// CHECK: llvm.extractvalue {{.*}}[3, 7] : !llvm<"[4 x [8 x <16 x float>]]"> +// CHECK: llvm.extractvalue {{.*}}[3, 7] : !llvm.array<4 x array<8 x vec<16 x float>>> // CHECK: llvm.mlir.constant(15 : i64) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<16 x float>"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm<"[4 x [8 x <16 x float>]]"> -// CHECK: llvm.return {{.*}} : !llvm<"[4 x [8 x <16 x float>]]"> +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<16 x float> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vec<16 x float>>> +// CHECK: llvm.return {{.*}} : !llvm.array<4 x array<8 x vec<16 x float>>> func @vector_type_cast(%arg0: memref<8x8x8xf32>) -> memref> { %0 = vector.type_cast %arg0: memref<8x8x8xf32> to memref> return %0 : memref> } // CHECK-LABEL: llvm.func @vector_type_cast -// CHECK: llvm.mlir.undef : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64 }"> -// CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm<"float*"> to !llvm<"[8 x [8 x <8 x float>]]*"> -// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64 }"> -// CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm<"float*"> to !llvm<"[8 x [8 x <8 x float>]]*"> -// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64 }"> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr>>>, ptr>>>, i64)> +// CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr>>> +// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0] : !llvm.struct<(ptr>>>, ptr>>>, i64)> +// CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr>>> +// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1] : !llvm.struct<(ptr>>>, ptr>>>, i64)> // CHECK: llvm.mlir.constant(0 : index -// CHECK: llvm.insertvalue {{.*}}[2] : !llvm<"{ [8 x [8 x <8 x float>]]*, [8 x [8 x <8 x float>]]*, i64 }"> +// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr>>>, ptr>>>, i64)> func @vector_type_cast_non_zero_addrspace(%arg0: memref<8x8x8xf32, 3>) -> memref, 3> { %0 = vector.type_cast %arg0: memref<8x8x8xf32, 3> to memref, 3> return %0 : memref, 3> } // CHECK-LABEL: llvm.func @vector_type_cast_non_zero_addrspace -// CHECK: llvm.mlir.undef : !llvm<"{ [8 x [8 x <8 x float>]] addrspace(3)*, [8 x [8 x <8 x float>]] addrspace(3)*, i64 }"> -// CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [3 x i64], [3 x i64] }"> -// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm<"float addrspace(3)*"> to !llvm<"[8 x [8 x <8 x float>]] addrspace(3)*"> -// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0] : !llvm<"{ [8 x [8 x <8 x float>]] addrspace(3)*, [8 x [8 x <8 x float>]] addrspace(3)*, i64 }"> -// CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [3 x i64], [3 x i64] }"> -// CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm<"float addrspace(3)*"> to !llvm<"[8 x [8 x <8 x float>]] addrspace(3)*"> -// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1] : !llvm<"{ [8 x [8 x <8 x float>]] addrspace(3)*, [8 x [8 x <8 x float>]] addrspace(3)*, i64 }"> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> +// CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: %[[allocatedBit:.*]] = llvm.bitcast %[[allocated]] : !llvm.ptr to !llvm.ptr>>, 3> +// CHECK: llvm.insertvalue %[[allocatedBit]], {{.*}}[0] : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> +// CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: %[[alignedBit:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr to !llvm.ptr>>, 3> +// CHECK: llvm.insertvalue %[[alignedBit]], {{.*}}[1] : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> // CHECK: llvm.mlir.constant(0 : index -// CHECK: llvm.insertvalue {{.*}}[2] : !llvm<"{ [8 x [8 x <8 x float>]] addrspace(3)*, [8 x [8 x <8 x float>]] addrspace(3)*, i64 }"> +// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr>>, 3>, ptr>>, 3>, i64)> func @vector_print_scalar_i1(%arg0: i1) { vector.print %arg0 : i1 @@ -482,27 +482,27 @@ func @vector_print_vector(%arg0: vector<2x2xf32>) { return } // CHECK-LABEL: llvm.func @vector_print_vector( -// CHECK-SAME: %[[A:.*]]: !llvm<"[2 x <2 x float>]">) +// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vec<2 x float>>) // CHECK: llvm.call @print_open() : () -> () -// CHECK: %[[x0:.*]] = llvm.extractvalue %[[A]][0] : !llvm<"[2 x <2 x float>]"> +// CHECK: %[[x0:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vec<2 x float>> // CHECK: llvm.call @print_open() : () -> () // CHECK: %[[x1:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: %[[x2:.*]] = llvm.extractelement %[[x0]][%[[x1]] : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: %[[x2:.*]] = llvm.extractelement %[[x0]][%[[x1]] : !llvm.i64] : !llvm.vec<2 x float> // CHECK: llvm.call @print_f32(%[[x2]]) : (!llvm.float) -> () // CHECK: llvm.call @print_comma() : () -> () // CHECK: %[[x3:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: %[[x4:.*]] = llvm.extractelement %[[x0]][%[[x3]] : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: %[[x4:.*]] = llvm.extractelement %[[x0]][%[[x3]] : !llvm.i64] : !llvm.vec<2 x float> // CHECK: llvm.call @print_f32(%[[x4]]) : (!llvm.float) -> () // CHECK: llvm.call @print_close() : () -> () // CHECK: llvm.call @print_comma() : () -> () -// CHECK: %[[x5:.*]] = llvm.extractvalue %[[A]][1] : !llvm<"[2 x <2 x float>]"> +// CHECK: %[[x5:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vec<2 x float>> // CHECK: llvm.call @print_open() : () -> () // CHECK: %[[x6:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: %[[x7:.*]] = llvm.extractelement %[[x5]][%[[x6]] : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: %[[x7:.*]] = llvm.extractelement %[[x5]][%[[x6]] : !llvm.i64] : !llvm.vec<2 x float> // CHECK: llvm.call @print_f32(%[[x7]]) : (!llvm.float) -> () // CHECK: llvm.call @print_comma() : () -> () // CHECK: %[[x8:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: %[[x9:.*]] = llvm.extractelement %[[x5]][%[[x8]] : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: %[[x9:.*]] = llvm.extractelement %[[x5]][%[[x8]] : !llvm.i64] : !llvm.vec<2 x float> // CHECK: llvm.call @print_f32(%[[x9]]) : (!llvm.float) -> () // CHECK: llvm.call @print_close() : () -> () // CHECK: llvm.call @print_close() : () -> () @@ -514,15 +514,15 @@ func @extract_strided_slice1(%arg0: vector<4xf32>) -> vector<2xf32> { } // CHECK-LABEL: llvm.func @extract_strided_slice1 // CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float -// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm.vec<2 x float> // CHECK: llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: llvm.extractelement %{{.*}}[%{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: llvm.extractelement %{{.*}}[%{{.*}} : !llvm.i64] : !llvm.vec<4 x float> // CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement %{{.*}}, %{{.*}}[%{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.insertelement %{{.*}}, %{{.*}}[%{{.*}} : !llvm.i64] : !llvm.vec<2 x float> // CHECK: llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: llvm.extractelement %{{.*}}[%{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: llvm.extractelement %{{.*}}[%{{.*}} : !llvm.i64] : !llvm.vec<4 x float> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertelement %{{.*}}, %{{.*}}[%{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.insertelement %{{.*}}, %{{.*}}[%{{.*}} : !llvm.i64] : !llvm.vec<2 x float> func @extract_strided_slice2(%arg0: vector<4x8xf32>) -> vector<2x8xf32> { %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x8xf32> to vector<2x8xf32> @@ -530,11 +530,11 @@ func @extract_strided_slice2(%arg0: vector<4x8xf32>) -> vector<2x8xf32> { } // CHECK-LABEL: llvm.func @extract_strided_slice2 // CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float -// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2x8xf32>) : !llvm<"[2 x <8 x float>]"> -// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm<"[4 x <8 x float>]"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"[2 x <8 x float>]"> -// CHECK: llvm.extractvalue %{{.*}}[3] : !llvm<"[4 x <8 x float>]"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"[2 x <8 x float>]"> +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2x8xf32>) : !llvm.array<2 x vec<8 x float>> +// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.array<4 x vec<8 x float>> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.array<2 x vec<8 x float>> +// CHECK: llvm.extractvalue %{{.*}}[3] : !llvm.array<4 x vec<8 x float>> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.array<2 x vec<8 x float>> func @extract_strided_slice3(%arg0: vector<4x8xf32>) -> vector<2x2xf32> { %0 = vector.extract_strided_slice %arg0 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32> @@ -542,43 +542,43 @@ func @extract_strided_slice3(%arg0: vector<4x8xf32>) -> vector<2x2xf32> { } // CHECK-LABEL: llvm.func @extract_strided_slice3 // CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float -// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2x2xf32>) : !llvm<"[2 x <2 x float>]"> +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2x2xf32>) : !llvm.array<2 x vec<2 x float>> // // Subvector vector<8xf32> @2 -// CHECK: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x <8 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x vec<8 x float>> // CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float -// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm.vec<2 x float> // CHECK: llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<8 x float> // CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float> // CHECK: llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<8 x float> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[2 x <2 x float>]"> +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm.array<2 x vec<2 x float>> // // Subvector vector<8xf32> @3 -// CHECK: llvm.extractvalue {{.*}}[3] : !llvm<"[4 x <8 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[3] : !llvm.array<4 x vec<8 x float>> // CHECK: llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float -// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm<"<2 x float>"> +// CHECK: llvm.mlir.constant(dense<0.000000e+00> : vector<2xf32>) : !llvm.vec<2 x float> // CHECK: llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<8 x float> // CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float> // CHECK: llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<8 x float> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> -// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[2 x <2 x float>]"> +// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float> +// CHECK: llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm.array<2 x vec<2 x float>> func @insert_strided_slice1(%b: vector<4x4xf32>, %c: vector<4x4x4xf32>) -> vector<4x4x4xf32> { %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32> return %0 : vector<4x4x4xf32> } // CHECK-LABEL: llvm.func @insert_strided_slice1 -// CHECK: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x [4 x <4 x float>]]"> -// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [4 x <4 x float>]]"> +// CHECK: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x array<4 x vec<4 x float>>> +// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vec<4 x float>>> func @insert_strided_slice2(%a: vector<2x2xf32>, %b: vector<4x4xf32>) -> vector<4x4xf32> { %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32> @@ -587,34 +587,34 @@ func @insert_strided_slice2(%a: vector<2x2xf32>, %b: vector<4x4xf32>) -> vector< // CHECK-LABEL: llvm.func @insert_strided_slice2 // // Subvector vector<2xf32> @0 into vector<4xf32> @2 -// CHECK: llvm.extractvalue {{.*}}[0] : !llvm<"[2 x <2 x float>]"> -// CHECK-NEXT: llvm.extractvalue {{.*}}[2] : !llvm<"[4 x <4 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vec<2 x float>> +// CHECK-NEXT: llvm.extractvalue {{.*}}[2] : !llvm.array<4 x vec<4 x float>> // Element @0 -> element @2 // CHECK-NEXT: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float> // CHECK-NEXT: llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<4 x float> // Element @1 -> element @3 // CHECK-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float> // CHECK-NEXT: llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> -// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x <4 x float>]"> +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<4 x float> +// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x vec<4 x float>> // // Subvector vector<2xf32> @1 into vector<4xf32> @3 -// CHECK: llvm.extractvalue {{.*}}[1] : !llvm<"[2 x <2 x float>]"> -// CHECK-NEXT: llvm.extractvalue {{.*}}[3] : !llvm<"[4 x <4 x float>]"> +// CHECK: llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vec<2 x float>> +// CHECK-NEXT: llvm.extractvalue {{.*}}[3] : !llvm.array<4 x vec<4 x float>> // Element @0 -> element @2 // CHECK-NEXT: llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float> // CHECK-NEXT: llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<4 x float> // Element @1 -> element @3 // CHECK-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>"> +// CHECK-NEXT: llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<2 x float> // CHECK-NEXT: llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>"> -// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x <4 x float>]"> +// CHECK-NEXT: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm.vec<4 x float> +// CHECK-NEXT: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x vec<4 x float>> func @insert_strided_slice3(%arg0: vector<2x4xf32>, %arg1: vector<16x4x8xf32>) -> vector<16x4x8xf32> { %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [0, 0, 2], strides = [1, 1]}: @@ -622,49 +622,49 @@ func @insert_strided_slice3(%arg0: vector<2x4xf32>, %arg1: vector<16x4x8xf32>) - return %0 : vector<16x4x8xf32> } // CHECK-LABEL: llvm.func @insert_strided_slice3( -// CHECK-SAME: %[[A:.*]]: !llvm<"[2 x <4 x float>]">, -// CHECK-SAME: %[[B:.*]]: !llvm<"[16 x [4 x <8 x float>]]">) -// CHECK: %[[s0:.*]] = llvm.extractvalue %[[B]][0] : !llvm<"[16 x [4 x <8 x float>]]"> -// CHECK: %[[s1:.*]] = llvm.extractvalue %[[A]][0] : !llvm<"[2 x <4 x float>]"> -// CHECK: %[[s2:.*]] = llvm.extractvalue %[[B]][0, 0] : !llvm<"[16 x [4 x <8 x float>]]"> +// CHECK-SAME: %[[A:.*]]: !llvm.array<2 x vec<4 x float>>, +// CHECK-SAME: %[[B:.*]]: !llvm.array<16 x array<4 x vec<8 x float>>>) +// CHECK: %[[s0:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<16 x array<4 x vec<8 x float>>> +// CHECK: %[[s1:.*]] = llvm.extractvalue %[[A]][0] : !llvm.array<2 x vec<4 x float>> +// CHECK: %[[s2:.*]] = llvm.extractvalue %[[B]][0, 0] : !llvm.array<16 x array<4 x vec<8 x float>>> // CHECK: %[[s3:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: %[[s4:.*]] = llvm.extractelement %[[s1]][%[[s3]] : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: %[[s4:.*]] = llvm.extractelement %[[s1]][%[[s3]] : !llvm.i64] : !llvm.vec<4 x float> // CHECK: %[[s5:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: %[[s6:.*]] = llvm.insertelement %[[s4]], %[[s2]][%[[s5]] : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: %[[s6:.*]] = llvm.insertelement %[[s4]], %[[s2]][%[[s5]] : !llvm.i64] : !llvm.vec<8 x float> // CHECK: %[[s7:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: %[[s8:.*]] = llvm.extractelement %[[s1]][%[[s7]] : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: %[[s8:.*]] = llvm.extractelement %[[s1]][%[[s7]] : !llvm.i64] : !llvm.vec<4 x float> // CHECK: %[[s9:.*]] = llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: %[[s10:.*]] = llvm.insertelement %[[s8]], %[[s6]][%[[s9]] : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: %[[s10:.*]] = llvm.insertelement %[[s8]], %[[s6]][%[[s9]] : !llvm.i64] : !llvm.vec<8 x float> // CHECK: %[[s11:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: %[[s12:.*]] = llvm.extractelement %[[s1]][%[[s11]] : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: %[[s12:.*]] = llvm.extractelement %[[s1]][%[[s11]] : !llvm.i64] : !llvm.vec<4 x float> // CHECK: %[[s13:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 -// CHECK: %[[s14:.*]] = llvm.insertelement %[[s12]], %[[s10]][%[[s13]] : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: %[[s14:.*]] = llvm.insertelement %[[s12]], %[[s10]][%[[s13]] : !llvm.i64] : !llvm.vec<8 x float> // CHECK: %[[s15:.*]] = llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: %[[s16:.*]] = llvm.extractelement %[[s1]][%[[s15]] : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: %[[s16:.*]] = llvm.extractelement %[[s1]][%[[s15]] : !llvm.i64] : !llvm.vec<4 x float> // CHECK: %[[s17:.*]] = llvm.mlir.constant(5 : index) : !llvm.i64 -// CHECK: %[[s18:.*]] = llvm.insertelement %[[s16]], %[[s14]][%[[s17]] : !llvm.i64] : !llvm<"<8 x float>"> -// CHECK: %[[s19:.*]] = llvm.insertvalue %[[s18]], %[[s0]][0] : !llvm<"[4 x <8 x float>]"> -// CHECK: %[[s20:.*]] = llvm.extractvalue %[[A]][1] : !llvm<"[2 x <4 x float>]"> -// CHECK: %[[s21:.*]] = llvm.extractvalue %[[B]][0, 1] : !llvm<"[16 x [4 x <8 x float>]]"> +// CHECK: %[[s18:.*]] = llvm.insertelement %[[s16]], %[[s14]][%[[s17]] : !llvm.i64] : !llvm.vec<8 x float> +// CHECK: %[[s19:.*]] = llvm.insertvalue %[[s18]], %[[s0]][0] : !llvm.array<4 x vec<8 x float>> +// CHECK: %[[s20:.*]] = llvm.extractvalue %[[A]][1] : !llvm.array<2 x vec<4 x float>> +// CHECK: %[[s21:.*]] = llvm.extractvalue %[[B]][0, 1] : !llvm.array<16 x array<4 x vec<8 x float>>> // CHECK: %[[s22:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: %[[s23:.*]] = llvm.extractelement %[[s20]][%[[s22]] : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: %[[s23:.*]] = llvm.extractelement %[[s20]][%[[s22]] : !llvm.i64] : !llvm.vec<4 x float> // CHECK: %[[s24:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: %[[s25:.*]] = llvm.insertelement %[[s23]], %[[s21]][%[[s24]] : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: %[[s25:.*]] = llvm.insertelement %[[s23]], %[[s21]][%[[s24]] : !llvm.i64] : !llvm.vec<8 x float> // CHECK: %[[s26:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: %[[s27:.*]] = llvm.extractelement %[[s20]][%[[s26]] : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: %[[s27:.*]] = llvm.extractelement %[[s20]][%[[s26]] : !llvm.i64] : !llvm.vec<4 x float> // CHECK: %[[s28:.*]] = llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: %[[s29:.*]] = llvm.insertelement %[[s27]], %[[s25]][%[[s28]] : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: %[[s29:.*]] = llvm.insertelement %[[s27]], %[[s25]][%[[s28]] : !llvm.i64] : !llvm.vec<8 x float> // CHECK: %[[s30:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: %[[s31:.*]] = llvm.extractelement %[[s20]][%[[s30]] : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: %[[s31:.*]] = llvm.extractelement %[[s20]][%[[s30]] : !llvm.i64] : !llvm.vec<4 x float> // CHECK: %[[s32:.*]] = llvm.mlir.constant(4 : index) : !llvm.i64 -// CHECK: %[[s33:.*]] = llvm.insertelement %[[s31]], %[[s29]][%[[s32]] : !llvm.i64] : !llvm<"<8 x float>"> +// CHECK: %[[s33:.*]] = llvm.insertelement %[[s31]], %[[s29]][%[[s32]] : !llvm.i64] : !llvm.vec<8 x float> // CHECK: %[[s34:.*]] = llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: %[[s35:.*]] = llvm.extractelement %[[s20]][%[[s34]] : !llvm.i64] : !llvm<"<4 x float>"> +// CHECK: %[[s35:.*]] = llvm.extractelement %[[s20]][%[[s34]] : !llvm.i64] : !llvm.vec<4 x float> // CHECK: %[[s36:.*]] = llvm.mlir.constant(5 : index) : !llvm.i64 -// CHECK: %[[s37:.*]] = llvm.insertelement %[[s35]], %[[s33]][%[[s36]] : !llvm.i64] : !llvm<"<8 x float>"> -// CHECK: %[[s38:.*]] = llvm.insertvalue %[[s37]], %[[s19]][1] : !llvm<"[4 x <8 x float>]"> -// CHECK: %[[s39:.*]] = llvm.insertvalue %[[s38]], %[[B]][0] : !llvm<"[16 x [4 x <8 x float>]]"> -// CHECK: llvm.return %[[s39]] : !llvm<"[16 x [4 x <8 x float>]]"> +// CHECK: %[[s37:.*]] = llvm.insertelement %[[s35]], %[[s33]][%[[s36]] : !llvm.i64] : !llvm.vec<8 x float> +// CHECK: %[[s38:.*]] = llvm.insertvalue %[[s37]], %[[s19]][1] : !llvm.array<4 x vec<8 x float>> +// CHECK: %[[s39:.*]] = llvm.insertvalue %[[s38]], %[[B]][0] : !llvm.array<16 x array<4 x vec<8 x float>>> +// CHECK: llvm.return %[[s39]] : !llvm.array<16 x array<4 x vec<8 x float>>> func @extract_strides(%arg0: vector<3x3xf32>) -> vector<1x1xf32> { %0 = vector.extract_slices %arg0, [2, 2], [1, 1] @@ -673,37 +673,37 @@ func @extract_strides(%arg0: vector<3x3xf32>) -> vector<1x1xf32> { return %1 : vector<1x1xf32> } // CHECK-LABEL: llvm.func @extract_strides( -// CHECK-SAME: %[[A:.*]]: !llvm<"[3 x <3 x float>]">) -// CHECK: %[[s0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm<"[1 x <1 x float>]"> -// CHECK: %[[s1:.*]] = llvm.extractvalue %[[A]][2] : !llvm<"[3 x <3 x float>]"> -// CHECK: %[[s3:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1xf32>) : !llvm<"<1 x float>"> +// CHECK-SAME: %[[A:.*]]: !llvm.array<3 x vec<3 x float>>) +// CHECK: %[[s0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1x1xf32>) : !llvm.array<1 x vec<1 x float>> +// CHECK: %[[s1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<3 x vec<3 x float>> +// CHECK: %[[s3:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<1xf32>) : !llvm.vec<1 x float> // CHECK: %[[s4:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 -// CHECK: %[[s5:.*]] = llvm.extractelement %[[s1]][%[[s4]] : !llvm.i64] : !llvm<"<3 x float>"> +// CHECK: %[[s5:.*]] = llvm.extractelement %[[s1]][%[[s4]] : !llvm.i64] : !llvm.vec<3 x float> // CHECK: %[[s6:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: %[[s7:.*]] = llvm.insertelement %[[s5]], %[[s3]][%[[s6]] : !llvm.i64] : !llvm<"<1 x float>"> -// CHECK: %[[s8:.*]] = llvm.insertvalue %[[s7]], %[[s0]][0] : !llvm<"[1 x <1 x float>]"> -// CHECK: llvm.return %[[s8]] : !llvm<"[1 x <1 x float>]"> +// CHECK: %[[s7:.*]] = llvm.insertelement %[[s5]], %[[s3]][%[[s6]] : !llvm.i64] : !llvm.vec<1 x float> +// CHECK: %[[s8:.*]] = llvm.insertvalue %[[s7]], %[[s0]][0] : !llvm.array<1 x vec<1 x float>> +// CHECK: llvm.return %[[s8]] : !llvm.array<1 x vec<1 x float>> // CHECK-LABEL: llvm.func @vector_fma( -// CHECK-SAME: %[[A:.*]]: !llvm<"<8 x float>">, %[[B:.*]]: !llvm<"[2 x <4 x float>]">) -// CHECK-SAME: -> !llvm<"{ <8 x float>, [2 x <4 x float>] }"> { +// CHECK-SAME: %[[A:.*]]: !llvm.vec<8 x float>, %[[B:.*]]: !llvm.array<2 x vec<4 x float>>) +// CHECK-SAME: -> !llvm.struct<(vec<8 x float>, array<2 x vec<4 x float>>)> { func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>) -> (vector<8xf32>, vector<2x4xf32>) { // CHECK: "llvm.intr.fmuladd"(%[[A]], %[[A]], %[[A]]) : - // CHECK-SAME: (!llvm<"<8 x float>">, !llvm<"<8 x float>">, !llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + // CHECK-SAME: (!llvm.vec<8 x float>, !llvm.vec<8 x float>, !llvm.vec<8 x float>) -> !llvm.vec<8 x float> %0 = vector.fma %a, %a, %a : vector<8xf32> - // CHECK: %[[b00:.*]] = llvm.extractvalue %[[B]][0] : !llvm<"[2 x <4 x float>]"> - // CHECK: %[[b01:.*]] = llvm.extractvalue %[[B]][0] : !llvm<"[2 x <4 x float>]"> - // CHECK: %[[b02:.*]] = llvm.extractvalue %[[B]][0] : !llvm<"[2 x <4 x float>]"> + // CHECK: %[[b00:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vec<4 x float>> + // CHECK: %[[b01:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vec<4 x float>> + // CHECK: %[[b02:.*]] = llvm.extractvalue %[[B]][0] : !llvm.array<2 x vec<4 x float>> // CHECK: %[[B0:.*]] = "llvm.intr.fmuladd"(%[[b00]], %[[b01]], %[[b02]]) : - // CHECK-SAME: (!llvm<"<4 x float>">, !llvm<"<4 x float>">, !llvm<"<4 x float>">) -> !llvm<"<4 x float>"> - // CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm<"[2 x <4 x float>]"> - // CHECK: %[[b10:.*]] = llvm.extractvalue %[[B]][1] : !llvm<"[2 x <4 x float>]"> - // CHECK: %[[b11:.*]] = llvm.extractvalue %[[B]][1] : !llvm<"[2 x <4 x float>]"> - // CHECK: %[[b12:.*]] = llvm.extractvalue %[[B]][1] : !llvm<"[2 x <4 x float>]"> + // CHECK-SAME: (!llvm.vec<4 x float>, !llvm.vec<4 x float>, !llvm.vec<4 x float>) -> !llvm.vec<4 x float> + // CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm.array<2 x vec<4 x float>> + // CHECK: %[[b10:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vec<4 x float>> + // CHECK: %[[b11:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vec<4 x float>> + // CHECK: %[[b12:.*]] = llvm.extractvalue %[[B]][1] : !llvm.array<2 x vec<4 x float>> // CHECK: %[[B1:.*]] = "llvm.intr.fmuladd"(%[[b10]], %[[b11]], %[[b12]]) : - // CHECK-SAME: (!llvm<"<4 x float>">, !llvm<"<4 x float>">, !llvm<"<4 x float>">) -> !llvm<"<4 x float>"> - // CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm<"[2 x <4 x float>]"> + // CHECK-SAME: (!llvm.vec<4 x float>, !llvm.vec<4 x float>, !llvm.vec<4 x float>) -> !llvm.vec<4 x float> + // CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm.array<2 x vec<4 x float>> %1 = vector.fma %b, %b, %b : vector<2x4xf32> return %0, %1: vector<8xf32>, vector<2x4xf32> @@ -714,10 +714,10 @@ func @reduce_f32(%arg0: vector<16xf32>) -> f32 { return %0 : f32 } // CHECK-LABEL: llvm.func @reduce_f32( -// CHECK-SAME: %[[A:.*]]: !llvm<"<16 x float>">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x float>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : !llvm.float // CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fadd"(%[[C]], %[[A]]) -// CHECK-SAME: {reassoc = false} : (!llvm.float, !llvm<"<16 x float>">) -> !llvm.float +// CHECK-SAME: {reassoc = false} : (!llvm.float, !llvm.vec<16 x float>) -> !llvm.float // CHECK: llvm.return %[[V]] : !llvm.float func @reduce_f64(%arg0: vector<16xf64>) -> f64 { @@ -725,10 +725,10 @@ func @reduce_f64(%arg0: vector<16xf64>) -> f64 { return %0 : f64 } // CHECK-LABEL: llvm.func @reduce_f64( -// CHECK-SAME: %[[A:.*]]: !llvm<"<16 x double>">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x double>) // CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : !llvm.double // CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.v2.fadd"(%[[C]], %[[A]]) -// CHECK-SAME: {reassoc = false} : (!llvm.double, !llvm<"<16 x double>">) -> !llvm.double +// CHECK-SAME: {reassoc = false} : (!llvm.double, !llvm.vec<16 x double>) -> !llvm.double // CHECK: llvm.return %[[V]] : !llvm.double func @reduce_i32(%arg0: vector<16xi32>) -> i32 { @@ -736,7 +736,7 @@ func @reduce_i32(%arg0: vector<16xi32>) -> i32 { return %0 : i32 } // CHECK-LABEL: llvm.func @reduce_i32( -// CHECK-SAME: %[[A:.*]]: !llvm<"<16 x i32>">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x i32>) // CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.add"(%[[A]]) // CHECK: llvm.return %[[V]] : !llvm.i32 @@ -745,7 +745,7 @@ func @reduce_i64(%arg0: vector<16xi64>) -> i64 { return %0 : i64 } // CHECK-LABEL: llvm.func @reduce_i64( -// CHECK-SAME: %[[A:.*]]: !llvm<"<16 x i64>">) +// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x i64>) // CHECK: %[[V:.*]] = "llvm.intr.experimental.vector.reduce.add"(%[[A]]) // CHECK: llvm.return %[[V]] : !llvm.i64 @@ -760,7 +760,7 @@ func @matrix_ops(%A: vector<64xf64>, %B: vector<48xf64>) -> vector<12xf64> { // CHECK-LABEL: llvm.func @matrix_ops // CHECK: llvm.intr.matrix.multiply %{{.*}}, %{{.*}} { // CHECK-SAME: lhs_columns = 16 : i32, lhs_rows = 4 : i32, rhs_columns = 3 : i32 -// CHECK-SAME: } : (!llvm<"<64 x double>">, !llvm<"<48 x double>">) -> !llvm<"<12 x double>"> +// CHECK-SAME: } : (!llvm.vec<64 x double>, !llvm.vec<48 x double>) -> !llvm.vec<12 x double> func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { %f7 = constant 7.0: f32 @@ -773,72 +773,72 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_1d -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm<"<17 x float>"> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm.vec<17 x float> // // 1. Bitcast to vector form. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : -// CHECK-SAME: (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> +// CHECK-SAME: (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : -// CHECK-SAME: !llvm<"float*"> to !llvm<"<17 x float>*"> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. // CHECK: %[[linearIndex:.*]] = llvm.mlir.constant( // CHECK-SAME: dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi64>) : !llvm<"<17 x i64>"> +// CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. -// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm<"<17 x i64>"> +// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[BASE]], %[[offsetVec]][%[[c0]] : -// CHECK-SAME: !llvm.i32] : !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i64> // CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm<"<17 x i64>">, !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> // CHECK: %[[offsetVec4:.*]] = llvm.add %[[offsetVec3]], %[[linearIndex]] : -// CHECK-SAME: !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.vec<17 x i64> // // 4. Let dim the memref dimension, compute the vector comparison mask: // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] // CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : -// CHECK-SAME: !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> -// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[DIM]], %[[dimVec]][%[[c01]] : -// CHECK-SAME: !llvm.i32] : !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i64> // CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm<"<17 x i64>">, !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> // CHECK: %[[mask:.*]] = llvm.icmp "slt" %[[offsetVec4]], %[[dimVec3]] : -// CHECK-SAME: !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.vec<17 x i64> // // 5. Rewrite as a masked read. // CHECK: %[[PASS_THROUGH:.*]] = llvm.mlir.constant(dense<7.000000e+00> : -// CHECK-SAME: vector<17xf32>) : !llvm<"<17 x float>"> +// CHECK-SAME: vector<17xf32>) : !llvm.vec<17 x float> // CHECK: %[[loaded:.*]] = llvm.intr.masked.load %[[vecPtr]], %[[mask]], // CHECK-SAME: %[[PASS_THROUGH]] {alignment = 4 : i32} : -// CHECK-SAME: (!llvm<"<17 x float>*">, !llvm<"<17 x i1>">, !llvm<"<17 x float>">) -> !llvm<"<17 x float>"> +// CHECK-SAME: (!llvm.ptr>, !llvm.vec<17 x i1>, !llvm.vec<17 x float>) -> !llvm.vec<17 x float> // // 1. Bitcast to vector form. // CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} : -// CHECK-SAME: (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> +// CHECK-SAME: (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK: %[[vecPtr_b:.*]] = llvm.bitcast %[[gep_b]] : -// CHECK-SAME: !llvm<"float*"> to !llvm<"<17 x float>*"> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Create a vector with linear indices [ 0 .. vector_length - 1 ]. // CHECK: %[[linearIndex_b:.*]] = llvm.mlir.constant( // CHECK-SAME: dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]> : -// CHECK-SAME: vector<17xi64>) : !llvm<"<17 x i64>"> +// CHECK-SAME: vector<17xi64>) : !llvm.vec<17 x i64> // // 3. Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. // CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm<"<17 x i64>">, !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> // CHECK: llvm.add // // 4. Let dim the memref dimension, compute the vector comparison mask: @@ -846,13 +846,13 @@ func @transfer_read_1d(%A : memref, %base: index) -> vector<17xf32> { // CHECK: llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm<"<17 x i64>">, !llvm<"<17 x i64>"> -// CHECK: %[[mask_b:.*]] = llvm.icmp "slt" {{.*}} : !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> +// CHECK: %[[mask_b:.*]] = llvm.icmp "slt" {{.*}} : !llvm.vec<17 x i64> // // 5. Rewrite as a masked write. // CHECK: llvm.intr.masked.store %[[loaded]], %[[vecPtr_b]], %[[mask_b]] // CHECK-SAME: {alignment = 4 : i32} : -// CHECK-SAME: !llvm<"<17 x float>">, !llvm<"<17 x i1>"> into !llvm<"<17 x float>*"> +// CHECK-SAME: !llvm.vec<17 x float>, !llvm.vec<17 x i1> into !llvm.ptr> func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) -> vector<17xf32> { %f7 = constant 7.0: f32 @@ -862,14 +862,14 @@ func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_2d_to_1d -// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: !llvm.i64, %[[BASE_1:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm<"<17 x float>"> +// CHECK-SAME: %[[BASE_0:[a-zA-Z0-9]*]]: !llvm.i64, %[[BASE_1:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm.vec<17 x float> // // Create offsetVector = [ offset + 0 .. offset + vector_length - 1 ]. -// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm<"<17 x i64>"> +// CHECK: %[[offsetVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> // CHECK: %[[c0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // Here we check we properly use %BASE_1 // CHECK: %[[offsetVec2:.*]] = llvm.insertelement %[[BASE_1]], %[[offsetVec]][%[[c0]] : -// CHECK-SAME: !llvm.i32] : !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i64> // CHECK: %[[offsetVec3:.*]] = llvm.shufflevector %[[offsetVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, @@ -879,16 +879,16 @@ func @transfer_read_2d_to_1d(%A : memref, %base0: index, %base1: index) // [ offset + 0 .. offset + vector_length - 1 ] < [ dim .. dim ] // Here we check we properly use %DIM[1] // CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 1] : -// CHECK-SAME: !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[dimVec:.*]] = llvm.mlir.undef : !llvm.vec<17 x i64> // CHECK: %[[c01:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: %[[dimVec2:.*]] = llvm.insertelement %[[DIM]], %[[dimVec]][%[[c01]] : -// CHECK-SAME: !llvm.i32] : !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.i32] : !llvm.vec<17 x i64> // CHECK: %[[dimVec3:.*]] = llvm.shufflevector %[[dimVec2]], %{{.*}} [ // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32, // CHECK-SAME: 0 : i32, 0 : i32, 0 : i32] : -// CHECK-SAME: !llvm<"<17 x i64>">, !llvm<"<17 x i64>"> +// CHECK-SAME: !llvm.vec<17 x i64>, !llvm.vec<17 x i64> func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) -> vector<17xf32> { %f7 = constant 7.0: f32 @@ -901,23 +901,23 @@ func @transfer_read_1d_non_zero_addrspace(%A : memref, %base: index) - return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_1d_non_zero_addrspace -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm<"<17 x float>"> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm.vec<17 x float> // // 1. Check address space for GEP is correct. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : -// CHECK-SAME: (!llvm<"float addrspace(3)*">, !llvm.i64) -> !llvm<"float addrspace(3)*"> +// CHECK-SAME: (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.addrspacecast %[[gep]] : -// CHECK-SAME: !llvm<"float addrspace(3)*"> to !llvm<"<17 x float>*"> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Check address space of the memref is correct. // CHECK: %[[DIM:.*]] = llvm.extractvalue %{{.*}}[3, 0] : -// CHECK-SAME: !llvm<"{ float addrspace(3)*, float addrspace(3)*, i64, [1 x i64], [1 x i64] }"> +// CHECK-SAME: !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // // 3. Check address apce for GEP is correct. // CHECK: %[[gep_b:.*]] = llvm.getelementptr {{.*}} : -// CHECK-SAME: (!llvm<"float addrspace(3)*">, !llvm.i64) -> !llvm<"float addrspace(3)*"> +// CHECK-SAME: (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK: %[[vecPtr_b:.*]] = llvm.addrspacecast %[[gep_b]] : -// CHECK-SAME: !llvm<"float addrspace(3)*"> to !llvm<"<17 x float>*"> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> func @transfer_read_1d_not_masked(%A : memref, %base: index) -> vector<17xf32> { %f7 = constant 7.0: f32 @@ -926,24 +926,24 @@ func @transfer_read_1d_not_masked(%A : memref, %base: index) -> vector<17 return %f: vector<17xf32> } // CHECK-LABEL: func @transfer_read_1d_not_masked -// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm<"<17 x float>"> +// CHECK-SAME: %[[BASE:[a-zA-Z0-9]*]]: !llvm.i64) -> !llvm.vec<17 x float> // // 1. Bitcast to vector form. // CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}} : -// CHECK-SAME: (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> +// CHECK-SAME: (!llvm.ptr, !llvm.i64) -> !llvm.ptr // CHECK: %[[vecPtr:.*]] = llvm.bitcast %[[gep]] : -// CHECK-SAME: !llvm<"float*"> to !llvm<"<17 x float>*"> +// CHECK-SAME: !llvm.ptr to !llvm.ptr> // // 2. Rewrite as a load. -// CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] {alignment = 4 : i64} : !llvm<"<17 x float>*"> +// CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] {alignment = 4 : i64} : !llvm.ptr> func @genbool_1d() -> vector<8xi1> { %0 = vector.constant_mask [4] : vector<8xi1> return %0 : vector<8xi1> } // CHECK-LABEL: func @genbool_1d -// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, true, true, false, false, false, false]> : vector<8xi1>) : !llvm<"<8 x i1>"> -// CHECK: llvm.return %[[C1]] : !llvm<"<8 x i1>"> +// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, true, true, false, false, false, false]> : vector<8xi1>) : !llvm.vec<8 x i1> +// CHECK: llvm.return %[[C1]] : !llvm.vec<8 x i1> func @genbool_2d() -> vector<4x4xi1> { %v = vector.constant_mask [2, 2] : vector<4x4xi1> @@ -951,11 +951,11 @@ func @genbool_2d() -> vector<4x4xi1> { } // CHECK-LABEL: func @genbool_2d -// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, false, false]> : vector<4xi1>) : !llvm<"<4 x i1>"> -// CHECK: %[[C2:.*]] = llvm.mlir.constant(dense : vector<4x4xi1>) : !llvm<"[4 x <4 x i1>]"> -// CHECK: %[[T0:.*]] = llvm.insertvalue %[[C1]], %[[C2]][0] : !llvm<"[4 x <4 x i1>]"> -// CHECK: %[[T1:.*]] = llvm.insertvalue %[[C1]], %[[T0]][1] : !llvm<"[4 x <4 x i1>]"> -// CHECK: llvm.return %[[T1]] : !llvm<"[4 x <4 x i1>]"> +// CHECK: %[[C1:.*]] = llvm.mlir.constant(dense<[true, true, false, false]> : vector<4xi1>) : !llvm.vec<4 x i1> +// CHECK: %[[C2:.*]] = llvm.mlir.constant(dense : vector<4x4xi1>) : !llvm.array<4 x vec<4 x i1>> +// CHECK: %[[T0:.*]] = llvm.insertvalue %[[C1]], %[[C2]][0] : !llvm.array<4 x vec<4 x i1>> +// CHECK: %[[T1:.*]] = llvm.insertvalue %[[C1]], %[[T0]][1] : !llvm.array<4 x vec<4 x i1>> +// CHECK: llvm.return %[[T1]] : !llvm.array<4 x vec<4 x i1>> func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> { %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 } @@ -964,11 +964,11 @@ func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> { } // CHECK-LABEL: func @flat_transpose -// CHECK-SAME: %[[A:.*]]: !llvm<"<16 x float>"> +// CHECK-SAME: %[[A:.*]]: !llvm.vec<16 x float> // CHECK: %[[T:.*]] = llvm.intr.matrix.transpose %[[A]] // CHECK-SAME: {columns = 4 : i32, rows = 4 : i32} : -// CHECK-SAME: !llvm<"<16 x float>"> into !llvm<"<16 x float>"> -// CHECK: llvm.return %[[T]] : !llvm<"<16 x float>"> +// CHECK-SAME: !llvm.vec<16 x float> into !llvm.vec<16 x float> +// CHECK: llvm.return %[[T]] : !llvm.vec<16 x float> func @gather_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) -> vector<3xf32> { %0 = vector.gather %arg0, %arg1, %arg2, %arg3 : (memref, vector<3xi32>, vector<3xi1>, vector<3xf32>) -> vector<3xf32> @@ -976,9 +976,9 @@ func @gather_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1>, } // CHECK-LABEL: func @gather_op -// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm<"float*">, !llvm<"<3 x i32>">) -> !llvm<"<3 x float*>"> -// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm<"<3 x float*>">, !llvm<"<3 x i1>">, !llvm<"<3 x float>">) -> !llvm<"<3 x float>"> -// CHECK: llvm.return %[[G]] : !llvm<"<3 x float>"> +// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr, !llvm.vec<3 x i32>) -> !llvm.vec<3 x ptr> +// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, !llvm.vec<3 x i1>, !llvm.vec<3 x float>) -> !llvm.vec<3 x float> +// CHECK: llvm.return %[[G]] : !llvm.vec<3 x float> func @scatter_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) { vector.scatter %arg0, %arg1, %arg2, %arg3 : vector<3xi32>, vector<3xi1>, vector<3xf32> into memref @@ -986,6 +986,6 @@ func @scatter_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1> } // CHECK-LABEL: func @scatter_op -// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm<"float*">, !llvm<"<3 x i32>">) -> !llvm<"<3 x float*>"> -// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : !llvm<"<3 x float>">, !llvm<"<3 x i1>"> into !llvm<"<3 x float*>"> +// CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr, !llvm.vec<3 x i32>) -> !llvm.vec<3 x ptr> +// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : !llvm.vec<3 x float>, !llvm.vec<3 x i1> into !llvm.vec<3 x ptr> // CHECK: llvm.return diff --git a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir index 1113197aa589b..f8522c8960080 100644 --- a/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir +++ b/mlir/test/Conversion/VectorToROCDL/vector-to-rocdl.mlir @@ -9,7 +9,7 @@ func @transfer_readx2(%A : memref, %base: index) -> vector<2xf32> { return %f: vector<2xf32> } // CHECK-LABEL: @transfer_readx2 -// CHECK: rocdl.buffer.load {{.*}} !llvm<"<2 x float>"> +// CHECK: rocdl.buffer.load {{.*}} !llvm.vec<2 x float> func @transfer_readx4(%A : memref, %base: index) -> vector<4xf32> { %f0 = constant 0.0: f32 @@ -19,7 +19,7 @@ func @transfer_readx4(%A : memref, %base: index) -> vector<4xf32> { return %f: vector<4xf32> } // CHECK-LABEL: @transfer_readx4 -// CHECK: rocdl.buffer.load {{.*}} !llvm<"<4 x float>"> +// CHECK: rocdl.buffer.load {{.*}} !llvm.vec<4 x float> func @transfer_read_dwordConfig(%A : memref, %base: index) -> vector<4xf32> { %f0 = constant 0.0: f32 @@ -43,7 +43,7 @@ func @transfer_writex2(%A : memref, %B : vector<2xf32>, %base: index) { return } // CHECK-LABEL: @transfer_writex2 -// CHECK: rocdl.buffer.store {{.*}} !llvm<"<2 x float>"> +// CHECK: rocdl.buffer.store {{.*}} !llvm.vec<2 x float> func @transfer_writex4(%A : memref, %B : vector<4xf32>, %base: index) { vector.transfer_write %B, %A[%base] @@ -52,7 +52,7 @@ func @transfer_writex4(%A : memref, %B : vector<4xf32>, %base: index) { return } // CHECK-LABEL: @transfer_writex4 -// CHECK: rocdl.buffer.store {{.*}} !llvm<"<4 x float>"> +// CHECK: rocdl.buffer.store {{.*}} !llvm.vec<4 x float> func @transfer_write_dwordConfig(%A : memref, %B : vector<2xf32>, %base: index) { vector.transfer_write %B, %A[%base] diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index 43b2434a2cdf9..739d23a59f058 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -89,7 +89,7 @@ module attributes {gpu.container_module} { module attributes {gpu.container_module} { module @kernels { // expected-error@+1 {{'gpu.func' op expects parent op 'gpu.module'}} - gpu.func @kernel_1(%arg1 : !llvm<"float*">) { + gpu.func @kernel_1(%arg1 : !llvm.ptr) { gpu.return } } @@ -128,16 +128,16 @@ module attributes {gpu.container_module} { module attributes {gpu.container_module} { module @kernels { - gpu.func @kernel_1(%arg1 : !llvm<"float*">) kernel { + gpu.func @kernel_1(%arg1 : !llvm.ptr) kernel { gpu.return } } - func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm<"float*">) { + func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr) { // expected-error@+1 {{kernel module 'kernels' is undefined}} "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg) {kernel = @kernels::@kernel_1} - : (index, index, index, index, index, index, !llvm<"float*">) -> () + : (index, index, index, index, index, index, !llvm.ptr) -> () return } } @@ -146,16 +146,16 @@ module attributes {gpu.container_module} { module attributes {gpu.container_module} { gpu.module @kernels { - gpu.func @kernel_1(%arg1 : !llvm<"float*">) { + gpu.func @kernel_1(%arg1 : !llvm.ptr) { gpu.return } } - func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm<"float*">) { + func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr) { // expected-error@+1 {{kernel function is missing the 'gpu.kernel' attribute}} "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg) {kernel = @kernels::@kernel_1} - : (index, index, index, index, index, index, !llvm<"float*">) -> () + : (index, index, index, index, index, index, !llvm.ptr) -> () return } } @@ -164,17 +164,17 @@ module attributes {gpu.container_module} { module attributes {gpu.container_module} { gpu.module @kernels { - gpu.func @kernel_1(%arg1 : !llvm<"float*">) kernel { + gpu.func @kernel_1(%arg1 : !llvm.ptr) kernel { gpu.return } } - func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm<"float*">) { + func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm.ptr) { // expected-error@+1 {{got 2 kernel operands but expected 1}} "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg, %arg) {kernel = @kernels::@kernel_1} - : (index, index, index, index, index, index, !llvm<"float*">, - !llvm<"float*">) -> () + : (index, index, index, index, index, index, !llvm.ptr, + !llvm.ptr) -> () return } } diff --git a/mlir/test/Dialect/GPU/multiple-all-reduce.mlir b/mlir/test/Dialect/GPU/multiple-all-reduce.mlir index f1437dbb1adb2..a084faec29c88 100644 --- a/mlir/test/Dialect/GPU/multiple-all-reduce.mlir +++ b/mlir/test/Dialect/GPU/multiple-all-reduce.mlir @@ -18,8 +18,8 @@ func @main() { } // CHECK: gpu.module @main_kernel { -// CHECK-NEXT: llvm.mlir.global internal @{{.*}}() {addr_space = 3 : i32} : !llvm<"[32 x float]"> -// CHECK-NEXT: llvm.mlir.global internal @{{.*}}() {addr_space = 3 : i32} : !llvm<"[32 x float]"> +// CHECK-NEXT: llvm.mlir.global internal @{{.*}}() {addr_space = 3 : i32} : !llvm.array<32 x float> +// CHECK-NEXT: llvm.mlir.global internal @{{.*}}() {addr_space = 3 : i32} : !llvm.array<32 x float> return } diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir index 9a3206f313459..23a8b9d98881f 100644 --- a/mlir/test/Dialect/GPU/outlining.mlir +++ b/mlir/test/Dialect/GPU/outlining.mlir @@ -143,7 +143,7 @@ func @function_call(%arg0 : memref) { %block_z = %cst) { call @device_function() : () -> () call @device_function() : () -> () - %0 = llvm.mlir.addressof @global : !llvm<"i64*"> + %0 = llvm.mlir.addressof @global : !llvm.ptr gpu.terminator } return @@ -163,7 +163,7 @@ func @recursive_device_function() { // CHECK: gpu.func @function_call_kernel() // CHECK: call @device_function() : () -> () // CHECK: call @device_function() : () -> () -// CHECK: llvm.mlir.addressof @global : !llvm<"i64*"> +// CHECK: llvm.mlir.addressof @global : !llvm.ptr // CHECK: gpu.return // // CHECK: llvm.mlir.global internal @global(42 : i64) : !llvm.i64 diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index 689e6db540650..65dc33cc1c4f9 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -4,19 +4,19 @@ module { // GENERIC: "llvm.func" // GENERIC: sym_name = "foo" - // GENERIC-SAME: type = !llvm<"void ()"> + // GENERIC-SAME: type = !llvm.func // GENERIC-SAME: () -> () // CHECK: llvm.func @foo() "llvm.func"() ({ - }) {sym_name = "foo", type = !llvm<"void ()">} : () -> () + }) {sym_name = "foo", type = !llvm.func} : () -> () // GENERIC: "llvm.func" // GENERIC: sym_name = "bar" - // GENERIC-SAME: type = !llvm<"i64 (i64, i64)"> + // GENERIC-SAME: type = !llvm.func // GENERIC-SAME: () -> () // CHECK: llvm.func @bar(!llvm.i64, !llvm.i64) -> !llvm.i64 "llvm.func"() ({ - }) {sym_name = "bar", type = !llvm<"i64 (i64, i64)">} : () -> () + }) {sym_name = "bar", type = !llvm.func} : () -> () // GENERIC: "llvm.func" // CHECK: llvm.func @baz(%{{.*}}: !llvm.i64) -> !llvm.i64 @@ -27,14 +27,14 @@ module { llvm.return %arg0 : !llvm.i64 // GENERIC: sym_name = "baz" - // GENERIC-SAME: type = !llvm<"i64 (i64)"> + // GENERIC-SAME: type = !llvm.func // GENERIC-SAME: () -> () - }) {sym_name = "baz", type = !llvm<"i64 (i64)">} : () -> () + }) {sym_name = "baz", type = !llvm.func} : () -> () - // CHECK: llvm.func @qux(!llvm<"i64*"> {llvm.noalias = true}, !llvm.i64) + // CHECK: llvm.func @qux(!llvm.ptr {llvm.noalias = true}, !llvm.i64) // CHECK: attributes {xxx = {yyy = 42 : i64}} "llvm.func"() ({ - }) {sym_name = "qux", type = !llvm<"void (i64*, i64)">, + }) {sym_name = "qux", type = !llvm.func, i64)>, arg0 = {llvm.noalias = true}, xxx = {yyy = 42}} : () -> () // CHECK: llvm.func @roundtrip1() @@ -69,20 +69,20 @@ module { // CHECK: llvm.func @roundtrip8() -> !llvm.i32 llvm.func @roundtrip8() -> !llvm.i32 attributes {} - // CHECK: llvm.func @roundtrip9(!llvm<"i32*"> {llvm.noalias = true}) - llvm.func @roundtrip9(!llvm<"i32*"> {llvm.noalias = true}) + // CHECK: llvm.func @roundtrip9(!llvm.ptr {llvm.noalias = true}) + llvm.func @roundtrip9(!llvm.ptr {llvm.noalias = true}) - // CHECK: llvm.func @roundtrip10(!llvm<"i32*"> {llvm.noalias = true}) - llvm.func @roundtrip10(%arg0: !llvm<"i32*"> {llvm.noalias = true}) + // CHECK: llvm.func @roundtrip10(!llvm.ptr {llvm.noalias = true}) + llvm.func @roundtrip10(%arg0: !llvm.ptr {llvm.noalias = true}) - // CHECK: llvm.func @roundtrip11(%{{.*}}: !llvm<"i32*"> {llvm.noalias = true}) { - llvm.func @roundtrip11(%arg0: !llvm<"i32*"> {llvm.noalias = true}) { + // CHECK: llvm.func @roundtrip11(%{{.*}}: !llvm.ptr {llvm.noalias = true}) { + llvm.func @roundtrip11(%arg0: !llvm.ptr {llvm.noalias = true}) { llvm.return } - // CHECK: llvm.func @roundtrip12(%{{.*}}: !llvm<"i32*"> {llvm.noalias = true}) + // CHECK: llvm.func @roundtrip12(%{{.*}}: !llvm.ptr {llvm.noalias = true}) // CHECK: attributes {foo = 42 : i32} - llvm.func @roundtrip12(%arg0: !llvm<"i32*"> {llvm.noalias = true}) + llvm.func @roundtrip12(%arg0: !llvm.ptr {llvm.noalias = true}) attributes {foo = 42 : i32} { llvm.return } @@ -119,7 +119,7 @@ module { module { // expected-error@+1 {{requires one region}} - "llvm.func"() {sym_name = "no_region", type = !llvm<"void ()">} : () -> () + "llvm.func"() {sym_name = "no_region", type = !llvm.func} : () -> () } // ----- @@ -140,7 +140,7 @@ module { module { // expected-error@+1 {{requires 'type' attribute of wrapped LLVM function type}} - "llvm.func"() ({}) {sym_name = "non_function_type", type = !llvm<"i64">} : () -> () + "llvm.func"() ({}) {sym_name = "non_function_type", type = !llvm.i64} : () -> () } // ----- @@ -150,7 +150,7 @@ module { "llvm.func"() ({ ^bb0(%arg0: !llvm.i64): llvm.return - }) {sym_name = "wrong_arg_number", type = !llvm<"void ()">} : () -> () + }) {sym_name = "wrong_arg_number", type = !llvm.func} : () -> () } // ----- @@ -160,7 +160,7 @@ module { "llvm.func"() ({ ^bb0(%arg0: i64): llvm.return - }) {sym_name = "wrong_arg_number", type = !llvm<"void (i64)">} : () -> () + }) {sym_name = "wrong_arg_number", type = !llvm.func} : () -> () } // ----- @@ -170,7 +170,7 @@ module { "llvm.func"() ({ ^bb0(%arg0: !llvm.i32): llvm.return - }) {sym_name = "wrong_arg_number", type = !llvm<"void (i64)">} : () -> () + }) {sym_name = "wrong_arg_number", type = !llvm.func} : () -> () } // ----- diff --git a/mlir/test/Dialect/LLVMIR/global.mlir b/mlir/test/Dialect/LLVMIR/global.mlir index e3faec63f00a8..7d7860645ee9b 100644 --- a/mlir/test/Dialect/LLVMIR/global.mlir +++ b/mlir/test/Dialect/LLVMIR/global.mlir @@ -13,7 +13,7 @@ llvm.mlir.global internal @global(42 : i64) : !llvm.i64 llvm.mlir.global internal constant @constant(37.0) : !llvm.float // CHECK: llvm.mlir.global internal constant @".string"("foobar") -llvm.mlir.global internal constant @".string"("foobar") : !llvm<"[6 x i8]"> +llvm.mlir.global internal constant @".string"("foobar") : !llvm.array<6 x i8> // CHECK: llvm.mlir.global internal @string_notype("1234567") llvm.mlir.global internal @string_notype("1234567") @@ -54,11 +54,11 @@ llvm.mlir.global weak_odr @weak_odr() : !llvm.i64 // CHECK-LABEL: references func @references() { - // CHECK: llvm.mlir.addressof @global : !llvm<"i64*"> - %0 = llvm.mlir.addressof @global : !llvm<"i64*"> + // CHECK: llvm.mlir.addressof @global : !llvm.ptr + %0 = llvm.mlir.addressof @global : !llvm.ptr - // CHECK: llvm.mlir.addressof @".string" : !llvm<"[6 x i8]*"> - %1 = llvm.mlir.addressof @".string" : !llvm<"[6 x i8]*"> + // CHECK: llvm.mlir.addressof @".string" : !llvm.ptr> + %1 = llvm.mlir.addressof @".string" : !llvm.ptr> llvm.return } @@ -76,7 +76,7 @@ func @references() { // ----- // expected-error @+1 {{expects type to be a valid element type for an LLVM pointer}} -llvm.mlir.global internal constant @constant(37.0) : !llvm<"label"> +llvm.mlir.global internal constant @constant(37.0) : !llvm.label // ----- @@ -98,7 +98,7 @@ func @foo() { // ----- // expected-error @+1 {{requires an i8 array type of the length equal to that of the string}} -llvm.mlir.global internal constant @string("foobar") : !llvm<"[42 x i8]"> +llvm.mlir.global internal constant @string("foobar") : !llvm.array<42 x i8> // ----- @@ -125,14 +125,14 @@ func @foo() { // The attribute parser will consume the first colon-type, so we put two of // them to trigger the attribute type mismatch error. // expected-error @+1 {{invalid kind of attribute specified}} - llvm.mlir.addressof "foo" : i64 : !llvm<"void ()*"> + llvm.mlir.addressof "foo" : i64 : !llvm.ptr> } // ----- func @foo() { // expected-error @+1 {{must reference a global defined by 'llvm.mlir.global'}} - llvm.mlir.addressof @foo : !llvm<"void ()*"> + llvm.mlir.addressof @foo : !llvm.ptr> } // ----- @@ -141,7 +141,7 @@ llvm.mlir.global internal @foo(0: i32) : !llvm.i32 func @bar() { // expected-error @+1 {{the type must be a pointer to the type of the referenced global}} - llvm.mlir.addressof @foo : !llvm<"i64*"> + llvm.mlir.addressof @foo : !llvm.ptr } // ----- @@ -150,7 +150,7 @@ llvm.func @foo() llvm.func @bar() { // expected-error @+1 {{the type must be a pointer to the type of the referenced function}} - llvm.mlir.addressof @foo : !llvm<"i8*"> + llvm.mlir.addressof @foo : !llvm.ptr } // ----- @@ -182,7 +182,7 @@ llvm.mlir.global internal @g(43 : i64) : !llvm.i64 { llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : !llvm.i64 func @mismatch_addr_space_implicit_global() { // expected-error @+1 {{op the type must be a pointer to the type of the referenced global}} - llvm.mlir.addressof @g : !llvm<"i64*"> + llvm.mlir.addressof @g : !llvm.ptr } // ----- @@ -190,5 +190,5 @@ func @mismatch_addr_space_implicit_global() { llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : !llvm.i64 func @mismatch_addr_space() { // expected-error @+1 {{op the type must be a pointer to the type of the referenced global}} - llvm.mlir.addressof @g : !llvm<"i64 addrspace(4)*"> + llvm.mlir.addressof @g : !llvm.ptr } diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 6ef25f85a5060..b4475df66fd1d 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -18,7 +18,7 @@ func @invalid_align(%arg0: !llvm.i32 {llvm.align = "foo"}) { // ----- -func @icmp_non_string(%arg0 : !llvm.i32, %arg1 : !llvm<"i16">) { +func @icmp_non_string(%arg0 : !llvm.i32, %arg1 : !llvm.i16) { // expected-error@+1 {{invalid kind of attribute specified}} llvm.icmp 42 %arg0, %arg0 : !llvm.i32 return @@ -26,7 +26,7 @@ func @icmp_non_string(%arg0 : !llvm.i32, %arg1 : !llvm<"i16">) { // ----- -func @icmp_wrong_string(%arg0 : !llvm.i32, %arg1 : !llvm<"i16">) { +func @icmp_wrong_string(%arg0 : !llvm.i32, %arg1 : !llvm.i16) { // expected-error@+1 {{'foo' is an incorrect value of the 'predicate' attribute}} llvm.icmp "foo" %arg0, %arg0 : !llvm.i32 return @@ -43,7 +43,7 @@ func @alloca_missing_input_result_type(%size : !llvm.i64) { func @alloca_missing_input_type() { // expected-error@+1 {{expected trailing function type with one argument and one result}} - llvm.alloca %size x !llvm.i32 : () -> (!llvm<"i32*">) + llvm.alloca %size x !llvm.i32 : () -> (!llvm.ptr) } // ----- @@ -57,42 +57,42 @@ func @alloca_mising_result_type() { func @alloca_non_function_type() { // expected-error@+1 {{expected trailing function type with one argument and one result}} - llvm.alloca %size x !llvm.i32 : !llvm<"i32*"> + llvm.alloca %size x !llvm.i32 : !llvm.ptr } // ----- func @alloca_nonpositive_alignment(%size : !llvm.i64) { // expected-error@+1 {{expected positive alignment}} - llvm.alloca %size x !llvm.i32 {alignment = -1} : (!llvm.i64) -> (!llvm<"i32*">) + llvm.alloca %size x !llvm.i32 {alignment = -1} : (!llvm.i64) -> (!llvm.ptr) } // ----- -func @gep_missing_input_result_type(%pos : !llvm.i64, %base : !llvm<"float*">) { +func @gep_missing_input_result_type(%pos : !llvm.i64, %base : !llvm.ptr) { // expected-error@+1 {{2 operands present, but expected 0}} llvm.getelementptr %base[%pos] : () -> () } // ----- -func @gep_missing_input_type(%pos : !llvm.i64, %base : !llvm<"float*">) { +func @gep_missing_input_type(%pos : !llvm.i64, %base : !llvm.ptr) { // expected-error@+1 {{2 operands present, but expected 0}} - llvm.getelementptr %base[%pos] : () -> (!llvm<"float*">) + llvm.getelementptr %base[%pos] : () -> (!llvm.ptr) } // ----- -func @gep_missing_result_type(%pos : !llvm.i64, %base : !llvm<"float*">) { +func @gep_missing_result_type(%pos : !llvm.i64, %base : !llvm.ptr) { // expected-error@+1 {{op requires one result}} - llvm.getelementptr %base[%pos] : (!llvm<"float *">, !llvm.i64) -> () + llvm.getelementptr %base[%pos] : (!llvm.ptr, !llvm.i64) -> () } // ----- -func @gep_non_function_type(%pos : !llvm.i64, %base : !llvm<"float*">) { +func @gep_non_function_type(%pos : !llvm.i64, %base : !llvm.ptr) { // expected-error@+1 {{invalid kind of type specified}} - llvm.getelementptr %base[%pos] : !llvm<"float*"> + llvm.getelementptr %base[%pos] : !llvm.ptr } // ----- @@ -125,9 +125,9 @@ func @store_non_ptr_type(%foo : !llvm.float, %bar : !llvm.float) { // ----- -func @call_non_function_type(%callee : !llvm<"i8(i8)">, %arg : !llvm<"i8">) { +func @call_non_function_type(%callee : !llvm.func, %arg : !llvm.i8) { // expected-error@+1 {{expected function type}} - llvm.call %callee(%arg) : !llvm<"i8(i8)"> + llvm.call %callee(%arg) : !llvm.func } // ----- @@ -155,7 +155,7 @@ func @call_non_llvm_input(%callee : (i32) -> (), %arg : i32) { func @constant_wrong_type() { // expected-error@+1 {{only supports integer, float, string or elements attributes}} - llvm.mlir.constant(@constant_wrong_type) : !llvm<"void ()*"> + llvm.mlir.constant(@constant_wrong_type) : !llvm.ptr> } // ----- @@ -171,35 +171,35 @@ func @insertvalue_non_array_position() { // Note the double-type, otherwise attribute parsing consumes the trailing // type of the op as the (wrong) attribute type. // expected-error@+1 {{invalid kind of attribute specified}} - llvm.insertvalue %a, %b 0 : i32 : !llvm<"{i32}"> + llvm.insertvalue %a, %b 0 : i32 : !llvm.struct<(i32)> } // ----- func @insertvalue_non_integer_position() { // expected-error@+1 {{expected an array of integer literals}} - llvm.insertvalue %a, %b[0.0] : !llvm<"{i32}"> + llvm.insertvalue %a, %b[0.0] : !llvm.struct<(i32)> } // ----- func @insertvalue_struct_out_of_bounds() { // expected-error@+1 {{position out of bounds}} - llvm.insertvalue %a, %b[1] : !llvm<"{i32}"> + llvm.insertvalue %a, %b[1] : !llvm.struct<(i32)> } // ----- func @insertvalue_array_out_of_bounds() { // expected-error@+1 {{position out of bounds}} - llvm.insertvalue %a, %b[1] : !llvm<"[1 x i32]"> + llvm.insertvalue %a, %b[1] : !llvm.array<1 x i32> } // ----- func @insertvalue_wrong_nesting() { // expected-error@+1 {{expected wrapped LLVM IR structure/array type}} - llvm.insertvalue %a, %b[0,0] : !llvm<"{i32}"> + llvm.insertvalue %a, %b[0,0] : !llvm.struct<(i32)> } // ----- @@ -215,41 +215,41 @@ func @extractvalue_non_array_position() { // Note the double-type, otherwise attribute parsing consumes the trailing // type of the op as the (wrong) attribute type. // expected-error@+1 {{invalid kind of attribute specified}} - llvm.extractvalue %b 0 : i32 : !llvm<"{i32}"> + llvm.extractvalue %b 0 : i32 : !llvm.struct<(i32)> } // ----- func @extractvalue_non_integer_position() { // expected-error@+1 {{expected an array of integer literals}} - llvm.extractvalue %b[0.0] : !llvm<"{i32}"> + llvm.extractvalue %b[0.0] : !llvm.struct<(i32)> } // ----- func @extractvalue_struct_out_of_bounds() { // expected-error@+1 {{position out of bounds}} - llvm.extractvalue %b[1] : !llvm<"{i32}"> + llvm.extractvalue %b[1] : !llvm.struct<(i32)> } // ----- func @extractvalue_array_out_of_bounds() { // expected-error@+1 {{position out of bounds}} - llvm.extractvalue %b[1] : !llvm<"[1 x i32]"> + llvm.extractvalue %b[1] : !llvm.array<1 x i32> } // ----- func @extractvalue_wrong_nesting() { // expected-error@+1 {{expected wrapped LLVM IR structure/array type}} - llvm.extractvalue %b[0,0] : !llvm<"{i32}"> + llvm.extractvalue %b[0,0] : !llvm.struct<(i32)> } // ----- // CHECK-LABEL: @invalid_vector_type_1 -func @invalid_vector_type_1(%arg0: !llvm<"<4 x float>">, %arg1: !llvm.i32, %arg2: !llvm.float) { +func @invalid_vector_type_1(%arg0: !llvm.vec<4 x float>, %arg1: !llvm.i32, %arg2: !llvm.float) { // expected-error@+1 {{expected LLVM IR dialect vector type for operand #1}} %0 = llvm.extractelement %arg2[%arg1 : !llvm.i32] : !llvm.float } @@ -257,7 +257,7 @@ func @invalid_vector_type_1(%arg0: !llvm<"<4 x float>">, %arg1: !llvm.i32, %arg2 // ----- // CHECK-LABEL: @invalid_vector_type_2 -func @invalid_vector_type_2(%arg0: !llvm<"<4 x float>">, %arg1: !llvm.i32, %arg2: !llvm.float) { +func @invalid_vector_type_2(%arg0: !llvm.vec<4 x float>, %arg1: !llvm.i32, %arg2: !llvm.float) { // expected-error@+1 {{expected LLVM IR dialect vector type for operand #1}} %0 = llvm.insertelement %arg2, %arg2[%arg1 : !llvm.i32] : !llvm.float } @@ -265,7 +265,7 @@ func @invalid_vector_type_2(%arg0: !llvm<"<4 x float>">, %arg1: !llvm.i32, %arg2 // ----- // CHECK-LABEL: @invalid_vector_type_3 -func @invalid_vector_type_3(%arg0: !llvm<"<4 x float>">, %arg1: !llvm.i32, %arg2: !llvm.float) { +func @invalid_vector_type_3(%arg0: !llvm.vec<4 x float>, %arg1: !llvm.i32, %arg2: !llvm.float) { // expected-error@+1 {{expected LLVM IR dialect vector type for operand #1}} %0 = llvm.shufflevector %arg2, %arg2 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.float, !llvm.float } @@ -281,7 +281,7 @@ func @null_non_llvm_type() { // CHECK-LABEL: @nvvm_invalid_shfl_pred_1 func @nvvm_invalid_shfl_pred_1(%arg0 : !llvm.i32, %arg1 : !llvm.i32, %arg2 : !llvm.i32, %arg3 : !llvm.i32) { - // expected-error@+1 {{expected return type !llvm<"{ ?, i1 }">}} + // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}} %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.i32 } @@ -289,122 +289,112 @@ func @nvvm_invalid_shfl_pred_1(%arg0 : !llvm.i32, %arg1 : !llvm.i32, %arg2 : !ll // CHECK-LABEL: @nvvm_invalid_shfl_pred_2 func @nvvm_invalid_shfl_pred_2(%arg0 : !llvm.i32, %arg1 : !llvm.i32, %arg2 : !llvm.i32, %arg3 : !llvm.i32) { - // expected-error@+1 {{expected return type !llvm<"{ ?, i1 }">}} - %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm<"{ i32 }"> + // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}} + %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(i32)> } // ----- // CHECK-LABEL: @nvvm_invalid_shfl_pred_3 func @nvvm_invalid_shfl_pred_3(%arg0 : !llvm.i32, %arg1 : !llvm.i32, %arg2 : !llvm.i32, %arg3 : !llvm.i32) { - // expected-error@+1 {{expected return type !llvm<"{ ?, i1 }">}} - %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm<"{ i32, i32 }"> + // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}} + %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(i32, i32)> } // ----- // CHECK-LABEL: @nvvm_invalid_mma_0 -func @nvvm_invalid_mma_0(%a0 : !llvm.half, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, +func @nvvm_invalid_mma_0(%a0 : !llvm.half, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, %c0 : !llvm.float, %c1 : !llvm.float, %c2 : !llvm.float, %c3 : !llvm.float, %c4 : !llvm.float, %c5 : !llvm.float, %c6 : !llvm.float, %c7 : !llvm.float) { // expected-error@+1 {{expected operands to be 4 s followed by either 4 s or 8 floats}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.half, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm<"{ float, float, float, float, float, float, float, float }"> - llvm.return %0 : !llvm<"{ float, float, float, float, float, float, float, float }"> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.half, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, float, float, float, float, float, float, float)> + llvm.return %0 : !llvm.struct<(float, float, float, float, float, float, float, float)> } // ----- // CHECK-LABEL: @nvvm_invalid_mma_1 -func @nvvm_invalid_mma_1(%a0 : !llvm<"<2 x half>">, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, +func @nvvm_invalid_mma_1(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, %c0 : !llvm.float, %c1 : !llvm.float, %c2 : !llvm.float, %c3 : !llvm.float, %c4 : !llvm.float, %c5 : !llvm.float, %c6 : !llvm.float, %c7 : !llvm.float) { // expected-error@+1 {{expected result type to be a struct of either 4 s or 8 floats}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm<"{ float, float, float, float, float, float, float, half }"> - llvm.return %0 : !llvm<"{ float, float, float, float, float, float, float, half }"> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, float, float, float, float, float, float, half)> + llvm.return %0 : !llvm.struct<(float, float, float, float, float, float, float, half)> } // ----- // CHECK-LABEL: @nvvm_invalid_mma_2 -func @nvvm_invalid_mma_2(%a0 : !llvm<"<2 x half>">, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, +func @nvvm_invalid_mma_2(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, %c0 : !llvm.float, %c1 : !llvm.float, %c2 : !llvm.float, %c3 : !llvm.float, %c4 : !llvm.float, %c5 : !llvm.float, %c6 : !llvm.float, %c7 : !llvm.float) { // expected-error@+1 {{alayout and blayout attributes must be set to either "row" or "col"}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 : (!llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm<"{ float, float, float, float, float, float, float, float }"> - llvm.return %0 : !llvm<"{ float, float, float, float, float, float, float, float }"> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 : (!llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, float, float, float, float, float, float, float)> + llvm.return %0 : !llvm.struct<(float, float, float, float, float, float, float, float)> } // ----- // CHECK-LABEL: @nvvm_invalid_mma_3 -func @nvvm_invalid_mma_3(%a0 : !llvm<"<2 x half>">, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, - %c0 : !llvm<"<2 x half>">, %c1 : !llvm<"<2 x half>">, - %c2 : !llvm<"<2 x half>">, %c3 : !llvm<"<2 x half>">) { +func @nvvm_invalid_mma_3(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, + %c0 : !llvm.vec<2 x half>, %c1 : !llvm.vec<2 x half>, + %c2 : !llvm.vec<2 x half>, %c3 : !llvm.vec<2 x half>) { // expected-error@+1 {{unimplemented mma.sync variant}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3 {alayout="row", blayout="col"} : (!llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">) -> !llvm<"{ float, float, float, float, float, float, float, float }"> - llvm.return %0 : !llvm<"{ float, float, float, float, float, float, float, float }"> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3 {alayout="row", blayout="col"} : (!llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>) -> !llvm.struct<(float, float, float, float, float, float, float, float)> + llvm.return %0 : !llvm.struct<(float, float, float, float, float, float, float, float)> } // ----- // CHECK-LABEL: @nvvm_invalid_mma_4 -func @nvvm_invalid_mma_4(%a0 : !llvm<"<2 x half>">, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, +func @nvvm_invalid_mma_4(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, %c0 : !llvm.float, %c1 : !llvm.float, %c2 : !llvm.float, %c3 : !llvm.float, %c4 : !llvm.float, %c5 : !llvm.float, %c6 : !llvm.float, %c7 : !llvm.float) { // expected-error@+1 {{unimplemented mma.sync variant}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm<"{<2 x half>, <2 x half>, <2 x half>, <2 x half>}"> - llvm.return %0 : !llvm<"{<2 x half>, <2 x half>, <2 x half>, <2 x half>}"> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(vec<2 x half>, vec<2 x half>, vec<2 x half>, vec<2 x half>)> + llvm.return %0 : !llvm.struct<(vec<2 x half>, vec<2 x half>, vec<2 x half>, vec<2 x half>)> } // ----- // CHECK-LABEL: @nvvm_invalid_mma_5 -func @nvvm_invalid_mma_5(%a0 : !llvm<"<2 x half>">, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, +func @nvvm_invalid_mma_5(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, %c0 : !llvm.float, %c1 : !llvm.float, %c2 : !llvm.float, %c3 : !llvm.float, %c4 : !llvm.float, %c5 : !llvm.float, %c6 : !llvm.float, %c7 : !llvm.float) { // expected-error@+1 {{unimplemented mma.sync variant}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (!llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm<"{ float, float, float, float, float, float, float, float }"> - llvm.return %0 : !llvm<"{ float, float, float, float, float, float, float, float }"> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (!llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, float, float, float, float, float, float, float)> + llvm.return %0 : !llvm.struct<(float, float, float, float, float, float, float, float)> } // ----- // CHECK-LABEL: @nvvm_invalid_mma_6 -func @nvvm_invalid_mma_6(%a0 : !llvm<"<2 x half>">, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, +func @nvvm_invalid_mma_6(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, %c0 : !llvm.float, %c1 : !llvm.float, %c2 : !llvm.float, %c3 : !llvm.float, %c4 : !llvm.float, %c5 : !llvm.float, %c6 : !llvm.float, %c7 : !llvm.float) { // expected-error@+1 {{invalid kind of type specified}} - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : !llvm<"{ float, float, float, float, float, float, float, float }"> - llvm.return %0 : !llvm<"{ float, float, float, float, float, float, float, float }"> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : !llvm.struct<(float, float, float, float, float, float, float, float)> + llvm.return %0 : !llvm.struct<(float, float, float, float, float, float, float, float)> } // ----- // CHECK-LABEL: @nvvm_invalid_mma_7 -func @nvvm_invalid_mma_7(%a0 : !llvm<"<2 x half>">, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, +func @nvvm_invalid_mma_7(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, %c0 : !llvm.float, %c1 : !llvm.float, %c2 : !llvm.float, %c3 : !llvm.float, %c4 : !llvm.float, %c5 : !llvm.float, %c6 : !llvm.float, %c7 : !llvm.float) { // expected-error@+1 {{op requires one result}} - %0:2 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (!llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> (!llvm<"{ float, float, float, float, float, float, float, float }">, !llvm.i32) - llvm.return %0#0 : !llvm<"{ float, float, float, float, float, float, float, float }"> -} - -// ----- - -// FIXME: the LLVM-IR dialect should parse mutually recursive types -// CHECK-LABEL: @recursive_type -// expected-error@+1 {{expected end of string}} -llvm.func @recursive_type(%a : !llvm<"%a = type { %a* }">) -> - !llvm<"%a = type { %a* }"> { - llvm.return %a : !llvm<"%a = type { %a* }"> + %0:2 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="col", blayout="row"} : (!llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> (!llvm.struct<(float, float, float, float, float, float, float, float)>, !llvm.i32) + llvm.return %0#0 : !llvm.struct<(float, float, float, float, float, float, float, float)> } // ----- @@ -419,25 +409,25 @@ func @atomicrmw_expected_ptr(%f32 : !llvm.float) { // ----- // CHECK-LABEL: @atomicrmw_mismatched_operands -func @atomicrmw_mismatched_operands(%f32_ptr : !llvm<"float*">, %i32 : !llvm.i32) { +func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %i32 : !llvm.i32) { // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for operand #1}} - %0 = "llvm.atomicrmw"(%f32_ptr, %i32) {bin_op=11, ordering=1} : (!llvm<"float*">, !llvm.i32) -> !llvm.float + %0 = "llvm.atomicrmw"(%f32_ptr, %i32) {bin_op=11, ordering=1} : (!llvm.ptr, !llvm.i32) -> !llvm.float llvm.return } // ----- // CHECK-LABEL: @atomicrmw_mismatched_result -func @atomicrmw_mismatched_operands(%f32_ptr : !llvm<"float*">, %f32 : !llvm.float) { +func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %f32 : !llvm.float) { // expected-error@+1 {{expected LLVM IR result type to match type for operand #1}} - %0 = "llvm.atomicrmw"(%f32_ptr, %f32) {bin_op=11, ordering=1} : (!llvm<"float*">, !llvm.float) -> !llvm.i32 + %0 = "llvm.atomicrmw"(%f32_ptr, %f32) {bin_op=11, ordering=1} : (!llvm.ptr, !llvm.float) -> !llvm.i32 llvm.return } // ----- // CHECK-LABEL: @atomicrmw_expected_float -func @atomicrmw_expected_float(%i32_ptr : !llvm<"i32*">, %i32 : !llvm.i32) { +func @atomicrmw_expected_float(%i32_ptr : !llvm.ptr, %i32 : !llvm.i32) { // expected-error@+1 {{expected LLVM IR floating point type}} %0 = llvm.atomicrmw fadd %i32_ptr, %i32 unordered : !llvm.i32 llvm.return @@ -446,7 +436,7 @@ func @atomicrmw_expected_float(%i32_ptr : !llvm<"i32*">, %i32 : !llvm.i32) { // ----- // CHECK-LABEL: @atomicrmw_unexpected_xchg_type -func @atomicrmw_unexpected_xchg_type(%i1_ptr : !llvm<"i1*">, %i1 : !llvm.i1) { +func @atomicrmw_unexpected_xchg_type(%i1_ptr : !llvm.ptr, %i1 : !llvm.i1) { // expected-error@+1 {{unexpected LLVM IR type for 'xchg' bin_op}} %0 = llvm.atomicrmw xchg %i1_ptr, %i1 unordered : !llvm.i1 llvm.return @@ -455,7 +445,7 @@ func @atomicrmw_unexpected_xchg_type(%i1_ptr : !llvm<"i1*">, %i1 : !llvm.i1) { // ----- // CHECK-LABEL: @atomicrmw_expected_int -func @atomicrmw_expected_int(%f32_ptr : !llvm<"float*">, %f32 : !llvm.float) { +func @atomicrmw_expected_int(%f32_ptr : !llvm.ptr, %f32 : !llvm.float) { // expected-error@+1 {{expected LLVM IR integer type}} %0 = llvm.atomicrmw max %f32_ptr, %f32 unordered : !llvm.float llvm.return @@ -464,25 +454,25 @@ func @atomicrmw_expected_int(%f32_ptr : !llvm<"float*">, %f32 : !llvm.float) { // ----- // CHECK-LABEL: @cmpxchg_expected_ptr -func @cmpxchg_expected_ptr(%f32_ptr : !llvm<"float*">, %f32 : !llvm.float) { +func @cmpxchg_expected_ptr(%f32_ptr : !llvm.ptr, %f32 : !llvm.float) { // expected-error@+1 {{expected LLVM IR pointer type for operand #0}} - %0 = "llvm.cmpxchg"(%f32, %f32, %f32) {success_ordering=2,failure_ordering=2} : (!llvm.float, !llvm.float, !llvm.float) -> !llvm<"{ float, i1 }"> + %0 = "llvm.cmpxchg"(%f32, %f32, %f32) {success_ordering=2,failure_ordering=2} : (!llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, i1)> llvm.return } // ----- // CHECK-LABEL: @cmpxchg_mismatched_operands -func @cmpxchg_mismatched_operands(%f32_ptr : !llvm<"float*">, %i32 : !llvm.i32) { +func @cmpxchg_mismatched_operands(%f32_ptr : !llvm.ptr, %i32 : !llvm.i32) { // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for all other operands}} - %0 = "llvm.cmpxchg"(%f32_ptr, %i32, %i32) {success_ordering=2,failure_ordering=2} : (!llvm<"float*">, !llvm.i32, !llvm.i32) -> !llvm<"{ i32, i1 }"> + %0 = "llvm.cmpxchg"(%f32_ptr, %i32, %i32) {success_ordering=2,failure_ordering=2} : (!llvm.ptr, !llvm.i32, !llvm.i32) -> !llvm.struct<(i32, i1)> llvm.return } // ----- // CHECK-LABEL: @cmpxchg_unexpected_type -func @cmpxchg_unexpected_type(%i1_ptr : !llvm<"i1*">, %i1 : !llvm.i1) { +func @cmpxchg_unexpected_type(%i1_ptr : !llvm.ptr, %i1 : !llvm.i1) { // expected-error@+1 {{unexpected LLVM IR type}} %0 = llvm.cmpxchg %i1_ptr, %i1, %i1 monotonic monotonic : !llvm.i1 llvm.return @@ -491,7 +481,7 @@ func @cmpxchg_unexpected_type(%i1_ptr : !llvm<"i1*">, %i1 : !llvm.i1) { // ----- // CHECK-LABEL: @cmpxchg_at_least_monotonic_success -func @cmpxchg_at_least_monotonic_success(%i32_ptr : !llvm<"i32*">, %i32 : !llvm.i32) { +func @cmpxchg_at_least_monotonic_success(%i32_ptr : !llvm.ptr, %i32 : !llvm.i32) { // expected-error@+1 {{ordering must be at least 'monotonic'}} %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 unordered monotonic : !llvm.i32 llvm.return @@ -500,7 +490,7 @@ func @cmpxchg_at_least_monotonic_success(%i32_ptr : !llvm<"i32*">, %i32 : !llvm. // ----- // CHECK-LABEL: @cmpxchg_at_least_monotonic_failure -func @cmpxchg_at_least_monotonic_failure(%i32_ptr : !llvm<"i32*">, %i32 : !llvm.i32) { +func @cmpxchg_at_least_monotonic_failure(%i32_ptr : !llvm.ptr, %i32 : !llvm.i32) { // expected-error@+1 {{ordering must be at least 'monotonic'}} %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 monotonic unordered : !llvm.i32 llvm.return @@ -509,7 +499,7 @@ func @cmpxchg_at_least_monotonic_failure(%i32_ptr : !llvm<"i32*">, %i32 : !llvm. // ----- // CHECK-LABEL: @cmpxchg_failure_release -func @cmpxchg_failure_release(%i32_ptr : !llvm<"i32*">, %i32 : !llvm.i32) { +func @cmpxchg_failure_release(%i32_ptr : !llvm.ptr, %i32 : !llvm.i32) { // expected-error@+1 {{failure ordering cannot be 'release' or 'acq_rel'}} %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel release : !llvm.i32 llvm.return @@ -518,7 +508,7 @@ func @cmpxchg_failure_release(%i32_ptr : !llvm<"i32*">, %i32 : !llvm.i32) { // ----- // CHECK-LABEL: @cmpxchg_failure_acq_rel -func @cmpxchg_failure_acq_rel(%i32_ptr : !llvm<"i32*">, %i32 : !llvm.i32) { +func @cmpxchg_failure_acq_rel(%i32_ptr : !llvm.ptr, %i32 : !llvm.i32) { // expected-error@+1 {{failure ordering cannot be 'release' or 'acq_rel'}} %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel acq_rel : !llvm.i32 llvm.return @@ -529,7 +519,7 @@ func @cmpxchg_failure_acq_rel(%i32_ptr : !llvm<"i32*">, %i32 : !llvm.i32) { llvm.func @foo(!llvm.i32) -> !llvm.i32 llvm.func @__gxx_personality_v0(...) -> !llvm.i32 -llvm.func @bad_landingpad(%arg0: !llvm<"i8**">) attributes { personality = @__gxx_personality_v0} { +llvm.func @bad_landingpad(%arg0: !llvm.ptr>) attributes { personality = @__gxx_personality_v0} { %0 = llvm.mlir.constant(3 : i32) : !llvm.i32 %1 = llvm.mlir.constant(2 : i32) : !llvm.i32 %2 = llvm.invoke @foo(%1) to ^bb1 unwind ^bb2 : (!llvm.i32) -> !llvm.i32 @@ -537,7 +527,7 @@ llvm.func @bad_landingpad(%arg0: !llvm<"i8**">) attributes { personality = @__gx llvm.return %1 : !llvm.i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{clause #0 is not a known constant - null, addressof, bitcast}} - %3 = llvm.landingpad cleanup (catch %1 : !llvm.i32) (catch %arg0 : !llvm<"i8**">) : !llvm<"{ i8*, i32 }"> + %3 = llvm.landingpad cleanup (catch %1 : !llvm.i32) (catch %arg0 : !llvm.ptr>) : !llvm.struct<(ptr, i32)> llvm.return %0 : !llvm.i32 } @@ -548,15 +538,15 @@ llvm.func @__gxx_personality_v0(...) -> !llvm.i32 llvm.func @caller(%arg0: !llvm.i32) -> !llvm.i32 attributes { personality = @__gxx_personality_v0} { %0 = llvm.mlir.constant(1 : i32) : !llvm.i32 - %1 = llvm.alloca %0 x !llvm<"i8*"> : (!llvm.i32) -> !llvm<"i8**"> + %1 = llvm.alloca %0 x !llvm.ptr : (!llvm.i32) -> !llvm.ptr> // expected-note@+1 {{global addresses expected as operand to bitcast used in clauses for landingpad}} - %2 = llvm.bitcast %1 : !llvm<"i8**"> to !llvm<"i8*"> + %2 = llvm.bitcast %1 : !llvm.ptr> to !llvm.ptr %3 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (!llvm.i32) -> !llvm.i32 ^bb1: // pred: ^bb0 llvm.return %0 : !llvm.i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{constant clauses expected}} - %5 = llvm.landingpad (catch %2 : !llvm<"i8*">) : !llvm<"{ i8*, i32 }"> + %5 = llvm.landingpad (catch %2 : !llvm.ptr) : !llvm.struct<(ptr, i32)> llvm.return %0 : !llvm.i32 } @@ -572,7 +562,7 @@ llvm.func @caller(%arg0: !llvm.i32) -> !llvm.i32 attributes { personality = @__g llvm.return %0 : !llvm.i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{landingpad instruction expects at least one clause or cleanup attribute}} - %2 = llvm.landingpad : !llvm<"{ i8*, i32 }"> + %2 = llvm.landingpad : !llvm.struct<(ptr, i32)> llvm.return %0 : !llvm.i32 } @@ -587,7 +577,7 @@ llvm.func @caller(%arg0: !llvm.i32) -> !llvm.i32 attributes { personality = @__g ^bb1: // pred: ^bb0 llvm.return %0 : !llvm.i32 ^bb2: // pred: ^bb0 - %2 = llvm.landingpad cleanup : !llvm<"{ i8*, i32 }"> + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> // expected-error@+1 {{'llvm.resume' op expects landingpad value as operand}} llvm.resume %0 : !llvm.i32 } @@ -603,8 +593,8 @@ llvm.func @caller(%arg0: !llvm.i32) -> !llvm.i32 { llvm.return %0 : !llvm.i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{llvm.landingpad needs to be in a function with a personality}} - %2 = llvm.landingpad cleanup : !llvm<"{ i8*, i32 }"> - llvm.resume %2 : !llvm<"{ i8*, i32 }"> + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + llvm.resume %2 : !llvm.struct<(ptr, i32)> } // ----- diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index a55b35907db03..67d43f7146d5f 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -46,12 +46,12 @@ func @nvvm_shfl( func @nvvm_shfl_pred( %arg0 : !llvm.i32, %arg1 : !llvm.i32, %arg2 : !llvm.i32, - %arg3 : !llvm.i32, %arg4 : !llvm.float) -> !llvm<"{ i32, i1 }"> { - // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm<"{ i32, i1 }"> - %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm<"{ i32, i1 }"> - // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm<"{ float, i1 }"> - %1 = nvvm.shfl.sync.bfly %arg0, %arg4, %arg1, %arg2 {return_value_and_is_valid} : !llvm<"{ float, i1 }"> - llvm.return %0 : !llvm<"{ i32, i1 }"> + %arg3 : !llvm.i32, %arg4 : !llvm.float) -> !llvm.struct<(i32, i1)> { + // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.struct<(i32, i1)> + %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(i32, i1)> + // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.struct<(float, i1)> + %1 = nvvm.shfl.sync.bfly %arg0, %arg4, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(float, i1)> + llvm.return %0 : !llvm.struct<(i32, i1)> } func @nvvm_vote(%arg0 : !llvm.i32, %arg1 : !llvm.i1) -> !llvm.i32 { @@ -60,11 +60,11 @@ func @nvvm_vote(%arg0 : !llvm.i32, %arg1 : !llvm.i1) -> !llvm.i32 { llvm.return %0 : !llvm.i32 } -func @nvvm_mma(%a0 : !llvm<"<2 x half>">, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, +func @nvvm_mma(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, %c0 : !llvm.float, %c1 : !llvm.float, %c2 : !llvm.float, %c3 : !llvm.float, %c4 : !llvm.float, %c5 : !llvm.float, %c6 : !llvm.float, %c7 : !llvm.float) { - // CHECK: nvvm.mma.sync {{.*}} {alayout = "row", blayout = "col"} : (!llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm<"{ float, float, float, float, float, float, float, float }"> - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm<"{ float, float, float, float, float, float, float, float }"> - llvm.return %0 : !llvm<"{ float, float, float, float, float, float, float, float }"> + // CHECK: nvvm.mma.sync {{.*}} {alayout = "row", blayout = "col"} : (!llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, float, float, float, float, float, float, float)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, float, float, float, float, float, float, float)> + llvm.return %0 : !llvm.struct<(float, float, float, float, float, float, float, float)> } diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir index 5fb28c45805c4..3f640810c543c 100644 --- a/mlir/test/Dialect/LLVMIR/rocdl.mlir +++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir @@ -36,133 +36,133 @@ func @rocdl.barrier() { } func @rocdl.xdlops(%arg0 : !llvm.float, %arg1 : !llvm.float, - %arg2 : !llvm<"<32 x float>">, %arg3 : !llvm.i32, - %arg4 : !llvm<"<16 x float>">, %arg5 : !llvm<"<4 x float>">, - %arg6 : !llvm<"<4 x half>">, %arg7 : !llvm<"<32 x i32>">, - %arg8 : !llvm<"<16 x i32>">, %arg9 : !llvm<"<4 x i32>">, - %arg10 : !llvm<"<2 x i16>">) -> !llvm<"<32 x float>"> { + %arg2 : !llvm.vec<32 x float>, %arg3 : !llvm.i32, + %arg4 : !llvm.vec<16 x float>, %arg5 : !llvm.vec<4 x float>, + %arg6 : !llvm.vec<4 x half>, %arg7 : !llvm.vec<32 x i32>, + %arg8 : !llvm.vec<16 x i32>, %arg9 : !llvm.vec<4 x i32>, + %arg10 : !llvm.vec<2 x i16>) -> !llvm.vec<32 x float> { // CHECK-LABEL: rocdl.xdlops - // CHECK: rocdl.mfma.f32.32x32x1f32 {{.*}} : (!llvm.float, !llvm.float, !llvm<"<32 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x float>"> + // CHECK: rocdl.mfma.f32.32x32x1f32 {{.*}} : (!llvm.float, !llvm.float, !llvm.vec<32 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x float> %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<32 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<32 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x float> - // CHECK: rocdl.mfma.f32.16x16x1f32 {{.*}} : (!llvm.float, !llvm.float, !llvm<"<16 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + // CHECK: rocdl.mfma.f32.16x16x1f32 {{.*}} : (!llvm.float, !llvm.float, !llvm.vec<16 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> - // CHECK: rocdl.mfma.f32.16x16x4f32 {{.*}} : (!llvm.float, !llvm.float, !llvm<"<4 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + // CHECK: rocdl.mfma.f32.16x16x4f32 {{.*}} : (!llvm.float, !llvm.float, !llvm.vec<4 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> - // CHECK: rocdl.mfma.f32.4x4x1f32 {{.*}} : (!llvm.float, !llvm.float, !llvm<"<4 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + // CHECK: rocdl.mfma.f32.4x4x1f32 {{.*}} : (!llvm.float, !llvm.float, !llvm.vec<4 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> - // CHECK: rocdl.mfma.f32.32x32x2f32 {{.*}} : (!llvm.float, !llvm.float, !llvm<"<16 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + // CHECK: rocdl.mfma.f32.32x32x2f32 {{.*}} : (!llvm.float, !llvm.float, !llvm.vec<16 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> - // CHECK: rocdl.mfma.f32.32x32x4f16 {{.*}} : (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<32 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x float>"> + // CHECK: rocdl.mfma.f32.32x32x4f16 {{.*}} : (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<32 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x float> %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<32 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<32 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x float> - // CHECK: rocdl.mfma.f32.16x16x4f16 {{.*}} : (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<16 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + // CHECK: rocdl.mfma.f32.16x16x4f16 {{.*}} : (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<16 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> - // CHECK: rocdl.mfma.f32.4x4x4f16 {{.*}} : (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<4 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + // CHECK: rocdl.mfma.f32.4x4x4f16 {{.*}} : (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<4 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> - // CHECK: rocdl.mfma.f32.32x32x8f16 {{.*}} : (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<16 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + // CHECK: rocdl.mfma.f32.32x32x8f16 {{.*}} : (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<16 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> - // CHECK: rocdl.mfma.f32.16x16x16f16 {{.*}} : (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<4 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + // CHECK: rocdl.mfma.f32.16x16x16f16 {{.*}} : (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<4 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> - // CHECK: rocdl.mfma.i32.32x32x4i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm<"<32 x i32>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x i32>"> + // CHECK: rocdl.mfma.i32.32x32x4i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm.vec<32 x i32>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x i32> %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<32 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<32 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x i32> - // CHECK: rocdl.mfma.i32.16x16x4i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm<"<16 x i32>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x i32>"> + // CHECK: rocdl.mfma.i32.16x16x4i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm.vec<16 x i32>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x i32> %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<16 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<16 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x i32> - // CHECK: rocdl.mfma.i32.4x4x4i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm<"<4 x i32>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x i32>"> + // CHECK: rocdl.mfma.i32.4x4x4i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm.vec<4 x i32>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x i32> %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<4 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<4 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x i32> - // CHECK: rocdl.mfma.i32.32x32x8i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm<"<16 x i32>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x i32>"> + // CHECK: rocdl.mfma.i32.32x32x8i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm.vec<16 x i32>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x i32> %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<16 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<16 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x i32> - // CHECK: rocdl.mfma.i32.16x16x16i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm<"<4 x i32>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x i32>"> + // CHECK: rocdl.mfma.i32.16x16x16i8 {{.*}} : (!llvm.i32, !llvm.i32, !llvm.vec<4 x i32>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x i32> %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<4 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<4 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x i32> - // CHECK: rocdl.mfma.f32.32x32x2bf16 {{.*}} : (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<32 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x float>"> + // CHECK: rocdl.mfma.f32.32x32x2bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<32 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x float> %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<32 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<32 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x float> - // CHECK: rocdl.mfma.f32.16x16x2bf16 {{.*}} : (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<16 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + // CHECK: rocdl.mfma.f32.16x16x2bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> - // CHECK: rocdl.mfma.f32.4x4x2bf16 {{.*}} : (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<4 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + // CHECK: rocdl.mfma.f32.4x4x2bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> - // CHECK: rocdl.mfma.f32.32x32x4bf16 {{.*}} : (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<16 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + // CHECK: rocdl.mfma.f32.32x32x4bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> - // CHECK: rocdl.mfma.f32.16x16x8bf16 {{.*}} : (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<4 x float>">, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + // CHECK: rocdl.mfma.f32.16x16x8bf16 {{.*}} : (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x float>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> - llvm.return %r0 : !llvm<"<32 x float>"> + llvm.return %r0 : !llvm.vec<32 x float> } -llvm.func @rocdl.mubuf(%rsrc : !llvm<"<4 x i32>">, %vindex : !llvm.i32, +llvm.func @rocdl.mubuf(%rsrc : !llvm.vec<4 x i32>, %vindex : !llvm.i32, %offset : !llvm.i32, %glc : !llvm.i1, - %slc : !llvm.i1, %vdata1 : !llvm<"<1 x float>">, - %vdata2 : !llvm<"<2 x float>">, %vdata4 : !llvm<"<4 x float>">) { + %slc : !llvm.i1, %vdata1 : !llvm.vec<1 x float>, + %vdata2 : !llvm.vec<2 x float>, %vdata4 : !llvm.vec<4 x float>) { // CHECK-LABEL: rocdl.mubuf - // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm<"<1 x float>"> - %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<1 x float>"> - // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm<"<2 x float>"> - %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<2 x float>"> - // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm<"<4 x float>"> - %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<4 x float>"> - - // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm<"<1 x float>"> - rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<1 x float>"> - // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm<"<2 x float>"> - rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<2 x float>"> - // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm<"<4 x float>"> - rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<4 x float>"> + // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<1 x float> + %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x float> + // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<2 x float> + %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x float> + // CHECK: %{{.*}} = rocdl.buffer.load %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<4 x float> + %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x float> + + // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<1 x float> + rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x float> + // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<2 x float> + rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x float> + // CHECK: rocdl.buffer.store %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} %{{.*}} : !llvm.vec<4 x float> + rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x float> llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index 1180561cf77b2..ef89d76387d77 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -1,9 +1,9 @@ // RUN: mlir-opt %s | mlir-opt | FileCheck %s // CHECK-LABEL: func @ops -// CHECK-SAME: (%[[I32:.*]]: !llvm.i32, %[[FLOAT:.*]]: !llvm.float, %[[I8PTR1:.*]]: !llvm<"i8*">, %[[I8PTR2:.*]]: !llvm<"i8*">, %[[BOOL:.*]]: !llvm.i1) +// CHECK-SAME: (%[[I32:.*]]: !llvm.i32, %[[FLOAT:.*]]: !llvm.float, %[[I8PTR1:.*]]: !llvm.ptr, %[[I8PTR2:.*]]: !llvm.ptr, %[[BOOL:.*]]: !llvm.i1) func @ops(%arg0: !llvm.i32, %arg1: !llvm.float, - %arg2: !llvm<"i8*">, %arg3: !llvm<"i8*">, + %arg2: !llvm.ptr, %arg3: !llvm.ptr, %arg4: !llvm.i1) { // Integer arithmetic binary operations. // @@ -39,29 +39,29 @@ func @ops(%arg0: !llvm.i32, %arg1: !llvm.float, // Memory-related operations. // -// CHECK-NEXT: %[[ALLOCA:.*]] = llvm.alloca %[[I32]] x !llvm.double : (!llvm.i32) -> !llvm<"double*"> -// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][%[[I32]], %[[I32]]] : (!llvm<"double*">, !llvm.i32, !llvm.i32) -> !llvm<"double*"> -// CHECK-NEXT: %[[VALUE:.*]] = llvm.load %[[GEP]] : !llvm<"double*"> -// CHECK-NEXT: llvm.store %[[VALUE]], %[[ALLOCA]] : !llvm<"double*"> -// CHECK-NEXT: %{{.*}} = llvm.bitcast %[[ALLOCA]] : !llvm<"double*"> to !llvm<"i64*"> - %13 = llvm.alloca %arg0 x !llvm.double : (!llvm.i32) -> !llvm<"double*"> - %14 = llvm.getelementptr %13[%arg0, %arg0] : (!llvm<"double*">, !llvm.i32, !llvm.i32) -> !llvm<"double*"> - %15 = llvm.load %14 : !llvm<"double*"> - llvm.store %15, %13 : !llvm<"double*"> - %16 = llvm.bitcast %13 : !llvm<"double*"> to !llvm<"i64*"> +// CHECK-NEXT: %[[ALLOCA:.*]] = llvm.alloca %[[I32]] x !llvm.double : (!llvm.i32) -> !llvm.ptr +// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][%[[I32]], %[[I32]]] : (!llvm.ptr, !llvm.i32, !llvm.i32) -> !llvm.ptr +// CHECK-NEXT: %[[VALUE:.*]] = llvm.load %[[GEP]] : !llvm.ptr +// CHECK-NEXT: llvm.store %[[VALUE]], %[[ALLOCA]] : !llvm.ptr +// CHECK-NEXT: %{{.*}} = llvm.bitcast %[[ALLOCA]] : !llvm.ptr to !llvm.ptr + %13 = llvm.alloca %arg0 x !llvm.double : (!llvm.i32) -> !llvm.ptr + %14 = llvm.getelementptr %13[%arg0, %arg0] : (!llvm.ptr, !llvm.i32, !llvm.i32) -> !llvm.ptr + %15 = llvm.load %14 : !llvm.ptr + llvm.store %15, %13 : !llvm.ptr + %16 = llvm.bitcast %13 : !llvm.ptr to !llvm.ptr // Function call-related operations. // -// CHECK: %[[STRUCT:.*]] = llvm.call @foo(%[[I32]]) : (!llvm.i32) -> !llvm<"{ i32, double, i32 }"> -// CHECK: %[[VALUE:.*]] = llvm.extractvalue %[[STRUCT]][0] : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[NEW_STRUCT:.*]] = llvm.insertvalue %[[VALUE]], %[[STRUCT]][2] : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[FUNC:.*]] = llvm.mlir.addressof @foo : !llvm<"{ i32, double, i32 } (i32)*"> -// CHECK: %{{.*}} = llvm.call %[[FUNC]](%[[I32]]) : (!llvm.i32) -> !llvm<"{ i32, double, i32 }"> - %17 = llvm.call @foo(%arg0) : (!llvm.i32) -> !llvm<"{ i32, double, i32 }"> - %18 = llvm.extractvalue %17[0] : !llvm<"{ i32, double, i32 }"> - %19 = llvm.insertvalue %18, %17[2] : !llvm<"{ i32, double, i32 }"> - %20 = llvm.mlir.addressof @foo : !llvm<"{ i32, double, i32 } (i32)*"> - %21 = llvm.call %20(%arg0) : (!llvm.i32) -> !llvm<"{ i32, double, i32 }"> +// CHECK: %[[STRUCT:.*]] = llvm.call @foo(%[[I32]]) : (!llvm.i32) -> !llvm.struct<(i32, double, i32)> +// CHECK: %[[VALUE:.*]] = llvm.extractvalue %[[STRUCT]][0] : !llvm.struct<(i32, double, i32)> +// CHECK: %[[NEW_STRUCT:.*]] = llvm.insertvalue %[[VALUE]], %[[STRUCT]][2] : !llvm.struct<(i32, double, i32)> +// CHECK: %[[FUNC:.*]] = llvm.mlir.addressof @foo : !llvm.ptr (i32)>> +// CHECK: %{{.*}} = llvm.call %[[FUNC]](%[[I32]]) : (!llvm.i32) -> !llvm.struct<(i32, double, i32)> + %17 = llvm.call @foo(%arg0) : (!llvm.i32) -> !llvm.struct<(i32, double, i32)> + %18 = llvm.extractvalue %17[0] : !llvm.struct<(i32, double, i32)> + %19 = llvm.insertvalue %18, %17[2] : !llvm.struct<(i32, double, i32)> + %20 = llvm.mlir.addressof @foo : !llvm.ptr (i32)>> + %21 = llvm.call %20(%arg0) : (!llvm.i32) -> !llvm.struct<(i32, double, i32)> // Terminator operations and their successors. @@ -76,9 +76,9 @@ func @ops(%arg0: !llvm.i32, %arg1: !llvm.float, // CHECK: ^[[BB2]] ^bb2: -// CHECK: %{{.*}} = llvm.mlir.undef : !llvm<"{ i32, double, i32 }"> +// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.struct<(i32, double, i32)> // CHECK: %{{.*}} = llvm.mlir.constant(42 : i64) : !llvm.i47 - %22 = llvm.mlir.undef : !llvm<"{ i32, double, i32 }"> + %22 = llvm.mlir.undef : !llvm.struct<(i32, double, i32)> %23 = llvm.mlir.constant(42) : !llvm.i47 // Misc operations. @@ -87,10 +87,10 @@ func @ops(%arg0: !llvm.i32, %arg1: !llvm.float, // Integer to pointer and pointer to integer conversions. // -// CHECK: %[[PTR:.*]] = llvm.inttoptr %[[I32]] : !llvm.i32 to !llvm<"i32*"> -// CHECK: %{{.*}} = llvm.ptrtoint %[[PTR]] : !llvm<"i32*"> to !llvm.i32 - %25 = llvm.inttoptr %arg0 : !llvm.i32 to !llvm<"i32*"> - %26 = llvm.ptrtoint %25 : !llvm<"i32*"> to !llvm.i32 +// CHECK: %[[PTR:.*]] = llvm.inttoptr %[[I32]] : !llvm.i32 to !llvm.ptr +// CHECK: %{{.*}} = llvm.ptrtoint %[[PTR]] : !llvm.ptr to !llvm.i32 + %25 = llvm.inttoptr %arg0 : !llvm.i32 to !llvm.ptr + %26 = llvm.ptrtoint %25 : !llvm.ptr to !llvm.i32 // Extended and Quad floating point // @@ -114,24 +114,24 @@ func @ops(%arg0: !llvm.i32, %arg1: !llvm.float, // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (!llvm.i32) -> !llvm.i32 %33 = "llvm.intr.ctpop"(%arg0) : (!llvm.i32) -> !llvm.i32 -// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> () - "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> () +// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, !llvm.i32, !llvm.i1) -> () + "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, !llvm.i32, !llvm.i1) -> () -// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> () - "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> () +// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, !llvm.i32, !llvm.i1) -> () + "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, !llvm.i32, !llvm.i1) -> () // CHECK: %[[SZ:.*]] = llvm.mlir.constant %sz = llvm.mlir.constant(10: i64) : !llvm.i64 -// CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> () - "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg4) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> () +// CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, !llvm.i64, !llvm.i1) -> () + "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg4) : (!llvm.ptr, !llvm.ptr, !llvm.i64, !llvm.i1) -> () // CHECK: llvm.return llvm.return } // An larger self-contained function. -// CHECK-LABEL: llvm.func @foo(%{{.*}}: !llvm.i32) -> !llvm<"{ i32, double, i32 }"> { -llvm.func @foo(%arg0: !llvm.i32) -> !llvm<"{ i32, double, i32 }"> { +// CHECK-LABEL: llvm.func @foo(%{{.*}}: !llvm.i32) -> !llvm.struct<(i32, double, i32)> { +llvm.func @foo(%arg0: !llvm.i32) -> !llvm.struct<(i32, double, i32)> { // CHECK: %[[V0:.*]] = llvm.mlir.constant(3 : i64) : !llvm.i32 // CHECK: %[[V1:.*]] = llvm.mlir.constant(3 : i64) : !llvm.i32 // CHECK: %[[V2:.*]] = llvm.mlir.constant(4.200000e+01 : f64) : !llvm.double @@ -154,56 +154,56 @@ llvm.func @foo(%arg0: !llvm.i32) -> !llvm<"{ i32, double, i32 }"> { llvm.cond_br %8, ^bb1(%4 : !llvm.i32), ^bb2(%4 : !llvm.i32) // CHECK:^[[BB1]](%[[V9:.*]]: !llvm.i32): -// CHECK: %[[V10:.*]] = llvm.call @foo(%[[V9]]) : (!llvm.i32) -> !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V11:.*]] = llvm.extractvalue %[[V10]][0] : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V12:.*]] = llvm.extractvalue %[[V10]][1] : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V13:.*]] = llvm.extractvalue %[[V10]][2] : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V14:.*]] = llvm.mlir.undef : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V15:.*]] = llvm.insertvalue %[[V5]], %[[V14]][0] : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V16:.*]] = llvm.insertvalue %[[V7]], %[[V15]][1] : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V17:.*]] = llvm.insertvalue %[[V11]], %[[V16]][2] : !llvm<"{ i32, double, i32 }"> -// CHECK: llvm.return %[[V17]] : !llvm<"{ i32, double, i32 }"> +// CHECK: %[[V10:.*]] = llvm.call @foo(%[[V9]]) : (!llvm.i32) -> !llvm.struct<(i32, double, i32)> +// CHECK: %[[V11:.*]] = llvm.extractvalue %[[V10]][0] : !llvm.struct<(i32, double, i32)> +// CHECK: %[[V12:.*]] = llvm.extractvalue %[[V10]][1] : !llvm.struct<(i32, double, i32)> +// CHECK: %[[V13:.*]] = llvm.extractvalue %[[V10]][2] : !llvm.struct<(i32, double, i32)> +// CHECK: %[[V14:.*]] = llvm.mlir.undef : !llvm.struct<(i32, double, i32)> +// CHECK: %[[V15:.*]] = llvm.insertvalue %[[V5]], %[[V14]][0] : !llvm.struct<(i32, double, i32)> +// CHECK: %[[V16:.*]] = llvm.insertvalue %[[V7]], %[[V15]][1] : !llvm.struct<(i32, double, i32)> +// CHECK: %[[V17:.*]] = llvm.insertvalue %[[V11]], %[[V16]][2] : !llvm.struct<(i32, double, i32)> +// CHECK: llvm.return %[[V17]] : !llvm.struct<(i32, double, i32)> ^bb1(%9: !llvm.i32): - %10 = llvm.call @foo(%9) : (!llvm.i32) -> !llvm<"{ i32, double, i32 }"> - %11 = llvm.extractvalue %10[0] : !llvm<"{ i32, double, i32 }"> - %12 = llvm.extractvalue %10[1] : !llvm<"{ i32, double, i32 }"> - %13 = llvm.extractvalue %10[2] : !llvm<"{ i32, double, i32 }"> - %14 = llvm.mlir.undef : !llvm<"{ i32, double, i32 }"> - %15 = llvm.insertvalue %5, %14[0] : !llvm<"{ i32, double, i32 }"> - %16 = llvm.insertvalue %7, %15[1] : !llvm<"{ i32, double, i32 }"> - %17 = llvm.insertvalue %11, %16[2] : !llvm<"{ i32, double, i32 }"> - llvm.return %17 : !llvm<"{ i32, double, i32 }"> + %10 = llvm.call @foo(%9) : (!llvm.i32) -> !llvm.struct<(i32, double, i32)> + %11 = llvm.extractvalue %10[0] : !llvm.struct<(i32, double, i32)> + %12 = llvm.extractvalue %10[1] : !llvm.struct<(i32, double, i32)> + %13 = llvm.extractvalue %10[2] : !llvm.struct<(i32, double, i32)> + %14 = llvm.mlir.undef : !llvm.struct<(i32, double, i32)> + %15 = llvm.insertvalue %5, %14[0] : !llvm.struct<(i32, double, i32)> + %16 = llvm.insertvalue %7, %15[1] : !llvm.struct<(i32, double, i32)> + %17 = llvm.insertvalue %11, %16[2] : !llvm.struct<(i32, double, i32)> + llvm.return %17 : !llvm.struct<(i32, double, i32)> // CHECK:^[[BB2]](%[[V18:.*]]: !llvm.i32): -// CHECK: %[[V19:.*]] = llvm.mlir.undef : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V20:.*]] = llvm.insertvalue %[[V18]], %[[V19]][0] : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V21:.*]] = llvm.insertvalue %[[V7]], %[[V20]][1] : !llvm<"{ i32, double, i32 }"> -// CHECK: %[[V22:.*]] = llvm.insertvalue %[[V5]], %[[V21]][2] : !llvm<"{ i32, double, i32 }"> -// CHECK: llvm.return %[[V22]] : !llvm<"{ i32, double, i32 }"> +// CHECK: %[[V19:.*]] = llvm.mlir.undef : !llvm.struct<(i32, double, i32)> +// CHECK: %[[V20:.*]] = llvm.insertvalue %[[V18]], %[[V19]][0] : !llvm.struct<(i32, double, i32)> +// CHECK: %[[V21:.*]] = llvm.insertvalue %[[V7]], %[[V20]][1] : !llvm.struct<(i32, double, i32)> +// CHECK: %[[V22:.*]] = llvm.insertvalue %[[V5]], %[[V21]][2] : !llvm.struct<(i32, double, i32)> +// CHECK: llvm.return %[[V22]] : !llvm.struct<(i32, double, i32)> ^bb2(%18: !llvm.i32): - %19 = llvm.mlir.undef : !llvm<"{ i32, double, i32 }"> - %20 = llvm.insertvalue %18, %19[0] : !llvm<"{ i32, double, i32 }"> - %21 = llvm.insertvalue %7, %20[1] : !llvm<"{ i32, double, i32 }"> - %22 = llvm.insertvalue %5, %21[2] : !llvm<"{ i32, double, i32 }"> - llvm.return %22 : !llvm<"{ i32, double, i32 }"> + %19 = llvm.mlir.undef : !llvm.struct<(i32, double, i32)> + %20 = llvm.insertvalue %18, %19[0] : !llvm.struct<(i32, double, i32)> + %21 = llvm.insertvalue %7, %20[1] : !llvm.struct<(i32, double, i32)> + %22 = llvm.insertvalue %5, %21[2] : !llvm.struct<(i32, double, i32)> + llvm.return %22 : !llvm.struct<(i32, double, i32)> } // CHECK-LABEL: @casts -// CHECK-SAME: (%[[I32:.*]]: !llvm.i32, %[[I64:.*]]: !llvm.i64, %[[V4I32:.*]]: !llvm<"<4 x i32>">, %[[V4I64:.*]]: !llvm<"<4 x i64>">, %[[I32PTR:.*]]: !llvm<"i32*">) -func @casts(%arg0: !llvm.i32, %arg1: !llvm.i64, %arg2: !llvm<"<4 x i32>">, - %arg3: !llvm<"<4 x i64>">, %arg4: !llvm<"i32*">) { +// CHECK-SAME: (%[[I32:.*]]: !llvm.i32, %[[I64:.*]]: !llvm.i64, %[[V4I32:.*]]: !llvm.vec<4 x i32>, %[[V4I64:.*]]: !llvm.vec<4 x i64>, %[[I32PTR:.*]]: !llvm.ptr) +func @casts(%arg0: !llvm.i32, %arg1: !llvm.i64, %arg2: !llvm.vec<4 x i32>, + %arg3: !llvm.vec<4 x i64>, %arg4: !llvm.ptr) { // CHECK: = llvm.sext %[[I32]] : !llvm.i32 to !llvm.i56 %0 = llvm.sext %arg0 : !llvm.i32 to !llvm.i56 // CHECK: = llvm.zext %[[I32]] : !llvm.i32 to !llvm.i64 %1 = llvm.zext %arg0 : !llvm.i32 to !llvm.i64 // CHECK: = llvm.trunc %[[I64]] : !llvm.i64 to !llvm.i56 %2 = llvm.trunc %arg1 : !llvm.i64 to !llvm.i56 -// CHECK: = llvm.sext %[[V4I32]] : !llvm<"<4 x i32>"> to !llvm<"<4 x i56>"> - %3 = llvm.sext %arg2 : !llvm<"<4 x i32>"> to !llvm<"<4 x i56>"> -// CHECK: = llvm.zext %[[V4I32]] : !llvm<"<4 x i32>"> to !llvm<"<4 x i64>"> - %4 = llvm.zext %arg2 : !llvm<"<4 x i32>"> to !llvm<"<4 x i64>"> -// CHECK: = llvm.trunc %[[V4I64]] : !llvm<"<4 x i64>"> to !llvm<"<4 x i56>"> - %5 = llvm.trunc %arg3 : !llvm<"<4 x i64>"> to !llvm<"<4 x i56>"> +// CHECK: = llvm.sext %[[V4I32]] : !llvm.vec<4 x i32> to !llvm.vec<4 x i56> + %3 = llvm.sext %arg2 : !llvm.vec<4 x i32> to !llvm.vec<4 x i56> +// CHECK: = llvm.zext %[[V4I32]] : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + %4 = llvm.zext %arg2 : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> +// CHECK: = llvm.trunc %[[V4I64]] : !llvm.vec<4 x i64> to !llvm.vec<4 x i56> + %5 = llvm.trunc %arg3 : !llvm.vec<4 x i64> to !llvm.vec<4 x i56> // CHECK: = llvm.sitofp %[[I32]] : !llvm.i32 to !llvm.float %6 = llvm.sitofp %arg0 : !llvm.i32 to !llvm.float // CHECK: %[[FLOAT:.*]] = llvm.uitofp %[[I32]] : !llvm.i32 to !llvm.float @@ -212,89 +212,89 @@ func @casts(%arg0: !llvm.i32, %arg1: !llvm.i64, %arg2: !llvm<"<4 x i32>">, %8 = llvm.fptosi %7 : !llvm.float to !llvm.i32 // CHECK: = llvm.fptoui %[[FLOAT]] : !llvm.float to !llvm.i32 %9 = llvm.fptoui %7 : !llvm.float to !llvm.i32 -// CHECK: = llvm.addrspacecast %[[I32PTR]] : !llvm<"i32*"> to !llvm<"i32 addrspace(2)*"> - %10 = llvm.addrspacecast %arg4 : !llvm<"i32*"> to !llvm<"i32 addrspace(2)*"> +// CHECK: = llvm.addrspacecast %[[I32PTR]] : !llvm.ptr to !llvm.ptr + %10 = llvm.addrspacecast %arg4 : !llvm.ptr to !llvm.ptr llvm.return } // CHECK-LABEL: @vect -func @vect(%arg0: !llvm<"<4 x float>">, %arg1: !llvm.i32, %arg2: !llvm.float) { -// CHECK: = llvm.extractelement {{.*}} : !llvm<"<4 x float>"> - %0 = llvm.extractelement %arg0[%arg1 : !llvm.i32] : !llvm<"<4 x float>"> -// CHECK: = llvm.insertelement {{.*}} : !llvm<"<4 x float>"> - %1 = llvm.insertelement %arg2, %arg0[%arg1 : !llvm.i32] : !llvm<"<4 x float>"> -// CHECK: = llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> - %2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> -// CHECK: = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm<"<4 x float>"> - %3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : !llvm<"<4 x float>"> +func @vect(%arg0: !llvm.vec<4 x float>, %arg1: !llvm.i32, %arg2: !llvm.float) { +// CHECK: = llvm.extractelement {{.*}} : !llvm.vec<4 x float> + %0 = llvm.extractelement %arg0[%arg1 : !llvm.i32] : !llvm.vec<4 x float> +// CHECK: = llvm.insertelement {{.*}} : !llvm.vec<4 x float> + %1 = llvm.insertelement %arg2, %arg0[%arg1 : !llvm.i32] : !llvm.vec<4 x float> +// CHECK: = llvm.shufflevector {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.vec<4 x float>, !llvm.vec<4 x float> + %2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.vec<4 x float>, !llvm.vec<4 x float> +// CHECK: = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm.vec<4 x float> + %3 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : !llvm.vec<4 x float> return } // CHECK-LABEL: @alloca func @alloca(%size : !llvm.i64) { - // CHECK: llvm.alloca %{{.*}} x !llvm.i32 : (!llvm.i64) -> !llvm<"i32*"> - llvm.alloca %size x !llvm.i32 {alignment = 0} : (!llvm.i64) -> (!llvm<"i32*">) - // CHECK: llvm.alloca %{{.*}} x !llvm.i32 {alignment = 8 : i64} : (!llvm.i64) -> !llvm<"i32*"> - llvm.alloca %size x !llvm.i32 {alignment = 8} : (!llvm.i64) -> (!llvm<"i32*">) + // CHECK: llvm.alloca %{{.*}} x !llvm.i32 : (!llvm.i64) -> !llvm.ptr + llvm.alloca %size x !llvm.i32 {alignment = 0} : (!llvm.i64) -> (!llvm.ptr) + // CHECK: llvm.alloca %{{.*}} x !llvm.i32 {alignment = 8 : i64} : (!llvm.i64) -> !llvm.ptr + llvm.alloca %size x !llvm.i32 {alignment = 8} : (!llvm.i64) -> (!llvm.ptr) llvm.return } // CHECK-LABEL: @null func @null() { - // CHECK: llvm.mlir.null : !llvm<"i8*"> - %0 = llvm.mlir.null : !llvm<"i8*"> - // CHECK: llvm.mlir.null : !llvm<"{ void (i32, void ()*)*, i64 }*"> - %1 = llvm.mlir.null : !llvm<"{void(i32, void()*)*, i64}*"> + // CHECK: llvm.mlir.null : !llvm.ptr + %0 = llvm.mlir.null : !llvm.ptr + // CHECK: llvm.mlir.null : !llvm.ptr>)>>, i64)>> + %1 = llvm.mlir.null : !llvm.ptr>)>>, i64)>> llvm.return } // CHECK-LABEL: @atomicrmw -func @atomicrmw(%ptr : !llvm<"float*">, %val : !llvm.float) { +func @atomicrmw(%ptr : !llvm.ptr, %val : !llvm.float) { // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} unordered : !llvm.float %0 = llvm.atomicrmw fadd %ptr, %val unordered : !llvm.float llvm.return } // CHECK-LABEL: @cmpxchg -func @cmpxchg(%ptr : !llvm<"float*">, %cmp : !llvm.float, %new : !llvm.float) { +func @cmpxchg(%ptr : !llvm.ptr, %cmp : !llvm.float, %new : !llvm.float) { // CHECK: llvm.cmpxchg %{{.*}}, %{{.*}}, %{{.*}} acq_rel monotonic : !llvm.float %0 = llvm.cmpxchg %ptr, %cmp, %new acq_rel monotonic : !llvm.float llvm.return } -llvm.mlir.global external constant @_ZTIi() : !llvm<"i8*"> -llvm.func @bar(!llvm<"i8*">, !llvm<"i8*">, !llvm<"i8*">) +llvm.mlir.global external constant @_ZTIi() : !llvm.ptr +llvm.func @bar(!llvm.ptr, !llvm.ptr, !llvm.ptr) llvm.func @__gxx_personality_v0(...) -> !llvm.i32 // CHECK-LABEL: @invokeLandingpad llvm.func @invokeLandingpad() -> !llvm.i32 attributes { personality = @__gxx_personality_v0 } { // CHECK: %[[a0:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: %{{.*}} = llvm.mlir.constant(3 : i32) : !llvm.i32 -// CHECK: %[[a2:.*]] = llvm.mlir.constant("\01") : !llvm<"[1 x i8]"> -// CHECK: %[[a3:.*]] = llvm.mlir.null : !llvm<"i8**"> -// CHECK: %[[a4:.*]] = llvm.mlir.null : !llvm<"i8*"> -// CHECK: %[[a5:.*]] = llvm.mlir.addressof @_ZTIi : !llvm<"i8**"> -// CHECK: %[[a6:.*]] = llvm.bitcast %[[a5]] : !llvm<"i8**"> to !llvm<"i8*"> +// CHECK: %[[a2:.*]] = llvm.mlir.constant("\01") : !llvm.array<1 x i8> +// CHECK: %[[a3:.*]] = llvm.mlir.null : !llvm.ptr> +// CHECK: %[[a4:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[a5:.*]] = llvm.mlir.addressof @_ZTIi : !llvm.ptr> +// CHECK: %[[a6:.*]] = llvm.bitcast %[[a5]] : !llvm.ptr> to !llvm.ptr // CHECK: %[[a7:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 -// CHECK: %[[a8:.*]] = llvm.alloca %[[a7]] x !llvm.i8 : (!llvm.i32) -> !llvm<"i8*"> -// CHECK: %{{.*}} = llvm.invoke @foo(%[[a7]]) to ^[[BB2:.*]] unwind ^[[BB1:.*]] : (!llvm.i32) -> !llvm<"{ i32, double, i32 }"> +// CHECK: %[[a8:.*]] = llvm.alloca %[[a7]] x !llvm.i8 : (!llvm.i32) -> !llvm.ptr +// CHECK: %{{.*}} = llvm.invoke @foo(%[[a7]]) to ^[[BB2:.*]] unwind ^[[BB1:.*]] : (!llvm.i32) -> !llvm.struct<(i32, double, i32)> %0 = llvm.mlir.constant(0 : i32) : !llvm.i32 %1 = llvm.mlir.constant(3 : i32) : !llvm.i32 - %2 = llvm.mlir.constant("\01") : !llvm<"[1 x i8]"> - %3 = llvm.mlir.null : !llvm<"i8**"> - %4 = llvm.mlir.null : !llvm<"i8*"> - %5 = llvm.mlir.addressof @_ZTIi : !llvm<"i8**"> - %6 = llvm.bitcast %5 : !llvm<"i8**"> to !llvm<"i8*"> + %2 = llvm.mlir.constant("\01") : !llvm.array<1 x i8> + %3 = llvm.mlir.null : !llvm.ptr> + %4 = llvm.mlir.null : !llvm.ptr + %5 = llvm.mlir.addressof @_ZTIi : !llvm.ptr> + %6 = llvm.bitcast %5 : !llvm.ptr> to !llvm.ptr %7 = llvm.mlir.constant(1 : i32) : !llvm.i32 - %8 = llvm.alloca %7 x !llvm.i8 : (!llvm.i32) -> !llvm<"i8*"> - %9 = llvm.invoke @foo(%7) to ^bb2 unwind ^bb1 : (!llvm.i32) -> !llvm<"{ i32, double, i32 }"> + %8 = llvm.alloca %7 x !llvm.i8 : (!llvm.i32) -> !llvm.ptr + %9 = llvm.invoke @foo(%7) to ^bb2 unwind ^bb1 : (!llvm.i32) -> !llvm.struct<(i32, double, i32)> // CHECK: ^[[BB1]]: -// CHECK: %[[lp:.*]] = llvm.landingpad cleanup (catch %[[a3]] : !llvm<"i8**">) (catch %[[a6]] : !llvm<"i8*">) (filter %[[a2]] : !llvm<"[1 x i8]">) : !llvm<"{ i8*, i32 }"> -// CHECK: llvm.resume %[[lp]] : !llvm<"{ i8*, i32 }"> +// CHECK: %[[lp:.*]] = llvm.landingpad cleanup (catch %[[a3]] : !llvm.ptr>) (catch %[[a6]] : !llvm.ptr) (filter %[[a2]] : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> +// CHECK: llvm.resume %[[lp]] : !llvm.struct<(ptr, i32)> ^bb1: - %10 = llvm.landingpad cleanup (catch %3 : !llvm<"i8**">) (catch %6 : !llvm<"i8*">) (filter %2 : !llvm<"[1 x i8]">) : !llvm<"{ i8*, i32 }"> - llvm.resume %10 : !llvm<"{ i8*, i32 }"> + %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr>) (catch %6 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> + llvm.resume %10 : !llvm.struct<(ptr, i32)> // CHECK: ^[[BB2]]: // CHECK: llvm.return %[[a7]] : !llvm.i32 @@ -302,9 +302,9 @@ llvm.func @invokeLandingpad() -> !llvm.i32 attributes { personality = @__gxx_per llvm.return %7 : !llvm.i32 // CHECK: ^[[BB3:.*]]: -// CHECK: llvm.invoke @bar(%[[a8]], %[[a6]], %[[a4]]) to ^[[BB2]] unwind ^[[BB1]] : (!llvm<"i8*">, !llvm<"i8*">, !llvm<"i8*">) -> () +// CHECK: llvm.invoke @bar(%[[a8]], %[[a6]], %[[a4]]) to ^[[BB2]] unwind ^[[BB1]] : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () ^bb3: - llvm.invoke @bar(%8, %6, %4) to ^bb2 unwind ^bb1 : (!llvm<"i8*">, !llvm<"i8*">, !llvm<"i8*">) -> () + llvm.invoke @bar(%8, %6, %4) to ^bb2 unwind ^bb1 : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () // CHECK: ^[[BB4:.*]]: // CHECK: llvm.return %[[a0]] : !llvm.i32 diff --git a/mlir/test/Dialect/LLVMIR/types.mlir b/mlir/test/Dialect/LLVMIR/types.mlir index 7ce606fe8c6a8..bd24c68b8883a 100644 --- a/mlir/test/Dialect/LLVMIR/types.mlir +++ b/mlir/test/Dialect/LLVMIR/types.mlir @@ -2,183 +2,183 @@ // CHECK-LABEL: @primitive func @primitive() { - // CHECK: !llvm2.void - "some.op"() : () -> !llvm2.void - // CHECK: !llvm2.half - "some.op"() : () -> !llvm2.half - // CHECK: !llvm2.bfloat - "some.op"() : () -> !llvm2.bfloat - // CHECK: !llvm2.float - "some.op"() : () -> !llvm2.float - // CHECK: !llvm2.double - "some.op"() : () -> !llvm2.double - // CHECK: !llvm2.fp128 - "some.op"() : () -> !llvm2.fp128 - // CHECK: !llvm2.x86_fp80 - "some.op"() : () -> !llvm2.x86_fp80 - // CHECK: !llvm2.ppc_fp128 - "some.op"() : () -> !llvm2.ppc_fp128 - // CHECK: !llvm2.x86_mmx - "some.op"() : () -> !llvm2.x86_mmx - // CHECK: !llvm2.token - "some.op"() : () -> !llvm2.token - // CHECK: !llvm2.label - "some.op"() : () -> !llvm2.label - // CHECK: !llvm2.metadata - "some.op"() : () -> !llvm2.metadata + // CHECK: !llvm.void + "some.op"() : () -> !llvm.void + // CHECK: !llvm.half + "some.op"() : () -> !llvm.half + // CHECK: !llvm.bfloat + "some.op"() : () -> !llvm.bfloat + // CHECK: !llvm.float + "some.op"() : () -> !llvm.float + // CHECK: !llvm.double + "some.op"() : () -> !llvm.double + // CHECK: !llvm.fp128 + "some.op"() : () -> !llvm.fp128 + // CHECK: !llvm.x86_fp80 + "some.op"() : () -> !llvm.x86_fp80 + // CHECK: !llvm.ppc_fp128 + "some.op"() : () -> !llvm.ppc_fp128 + // CHECK: !llvm.x86_mmx + "some.op"() : () -> !llvm.x86_mmx + // CHECK: !llvm.token + "some.op"() : () -> !llvm.token + // CHECK: !llvm.label + "some.op"() : () -> !llvm.label + // CHECK: !llvm.metadata + "some.op"() : () -> !llvm.metadata return } // CHECK-LABEL: @func func @func() { - // CHECK: !llvm2.func - "some.op"() : () -> !llvm2.func - // CHECK: !llvm2.func - "some.op"() : () -> !llvm2.func - // CHECK: !llvm2.func - "some.op"() : () -> !llvm2.func - // CHECK: !llvm2.func - "some.op"() : () -> !llvm2.func - // CHECK: !llvm2.func - "some.op"() : () -> !llvm2.func - // CHECK: !llvm2.func - "some.op"() : () -> !llvm2.func - // CHECK: !llvm2.func - "some.op"() : () -> !llvm2.func + // CHECK: !llvm.func + "some.op"() : () -> !llvm.func + // CHECK: !llvm.func + "some.op"() : () -> !llvm.func + // CHECK: !llvm.func + "some.op"() : () -> !llvm.func + // CHECK: !llvm.func + "some.op"() : () -> !llvm.func + // CHECK: !llvm.func + "some.op"() : () -> !llvm.func + // CHECK: !llvm.func + "some.op"() : () -> !llvm.func + // CHECK: !llvm.func + "some.op"() : () -> !llvm.func return } // CHECK-LABEL: @integer func @integer() { - // CHECK: !llvm2.i1 - "some.op"() : () -> !llvm2.i1 - // CHECK: !llvm2.i8 - "some.op"() : () -> !llvm2.i8 - // CHECK: !llvm2.i16 - "some.op"() : () -> !llvm2.i16 - // CHECK: !llvm2.i32 - "some.op"() : () -> !llvm2.i32 - // CHECK: !llvm2.i64 - "some.op"() : () -> !llvm2.i64 - // CHECK: !llvm2.i57 - "some.op"() : () -> !llvm2.i57 - // CHECK: !llvm2.i129 - "some.op"() : () -> !llvm2.i129 + // CHECK: !llvm.i1 + "some.op"() : () -> !llvm.i1 + // CHECK: !llvm.i8 + "some.op"() : () -> !llvm.i8 + // CHECK: !llvm.i16 + "some.op"() : () -> !llvm.i16 + // CHECK: !llvm.i32 + "some.op"() : () -> !llvm.i32 + // CHECK: !llvm.i64 + "some.op"() : () -> !llvm.i64 + // CHECK: !llvm.i57 + "some.op"() : () -> !llvm.i57 + // CHECK: !llvm.i129 + "some.op"() : () -> !llvm.i129 return } // CHECK-LABEL: @ptr func @ptr() { - // CHECK: !llvm2.ptr - "some.op"() : () -> !llvm2.ptr - // CHECK: !llvm2.ptr - "some.op"() : () -> !llvm2.ptr - // CHECK: !llvm2.ptr> - "some.op"() : () -> !llvm2.ptr> - // CHECK: !llvm2.ptr>>>> - "some.op"() : () -> !llvm2.ptr>>>> - // CHECK: !llvm2.ptr - "some.op"() : () -> !llvm2.ptr - // CHECK: !llvm2.ptr - "some.op"() : () -> !llvm2.ptr - // CHECK: !llvm2.ptr - "some.op"() : () -> !llvm2.ptr - // CHECK: !llvm2.ptr, 9> - "some.op"() : () -> !llvm2.ptr, 9> + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr> + "some.op"() : () -> !llvm.ptr> + // CHECK: !llvm.ptr>>>> + "some.op"() : () -> !llvm.ptr>>>> + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr, 9> + "some.op"() : () -> !llvm.ptr, 9> return } // CHECK-LABEL: @vec func @vec() { - // CHECK: !llvm2.vec<4 x i32> - "some.op"() : () -> !llvm2.vec<4 x i32> - // CHECK: !llvm2.vec<4 x float> - "some.op"() : () -> !llvm2.vec<4 x float> - // CHECK: !llvm2.vec - "some.op"() : () -> !llvm2.vec - // CHECK: !llvm2.vec - "some.op"() : () -> !llvm2.vec - // CHECK: !llvm2.vec<4 x ptr> - "some.op"() : () -> !llvm2.vec<4 x ptr> + // CHECK: !llvm.vec<4 x i32> + "some.op"() : () -> !llvm.vec<4 x i32> + // CHECK: !llvm.vec<4 x float> + "some.op"() : () -> !llvm.vec<4 x float> + // CHECK: !llvm.vec + "some.op"() : () -> !llvm.vec + // CHECK: !llvm.vec + "some.op"() : () -> !llvm.vec + // CHECK: !llvm.vec<4 x ptr> + "some.op"() : () -> !llvm.vec<4 x ptr> return } // CHECK-LABEL: @array func @array() { - // CHECK: !llvm2.array<10 x i32> - "some.op"() : () -> !llvm2.array<10 x i32> - // CHECK: !llvm2.array<8 x float> - "some.op"() : () -> !llvm2.array<8 x float> - // CHECK: !llvm2.array<10 x ptr> - "some.op"() : () -> !llvm2.array<10 x ptr> - // CHECK: !llvm2.array<10 x array<4 x float>> - "some.op"() : () -> !llvm2.array<10 x array<4 x float>> + // CHECK: !llvm.array<10 x i32> + "some.op"() : () -> !llvm.array<10 x i32> + // CHECK: !llvm.array<8 x float> + "some.op"() : () -> !llvm.array<8 x float> + // CHECK: !llvm.array<10 x ptr> + "some.op"() : () -> !llvm.array<10 x ptr> + // CHECK: !llvm.array<10 x array<4 x float>> + "some.op"() : () -> !llvm.array<10 x array<4 x float>> return } // CHECK-LABEL: @literal_struct func @literal_struct() { - // CHECK: !llvm2.struct<()> - "some.op"() : () -> !llvm2.struct<()> - // CHECK: !llvm2.struct<(i32)> - "some.op"() : () -> !llvm2.struct<(i32)> - // CHECK: !llvm2.struct<(float, i32)> - "some.op"() : () -> !llvm2.struct<(float, i32)> - // CHECK: !llvm2.struct<(struct<(i32)>)> - "some.op"() : () -> !llvm2.struct<(struct<(i32)>)> - // CHECK: !llvm2.struct<(i32, struct<(i32)>, float)> - "some.op"() : () -> !llvm2.struct<(i32, struct<(i32)>, float)> + // CHECK: !llvm.struct<()> + "some.op"() : () -> !llvm.struct<()> + // CHECK: !llvm.struct<(i32)> + "some.op"() : () -> !llvm.struct<(i32)> + // CHECK: !llvm.struct<(float, i32)> + "some.op"() : () -> !llvm.struct<(float, i32)> + // CHECK: !llvm.struct<(struct<(i32)>)> + "some.op"() : () -> !llvm.struct<(struct<(i32)>)> + // CHECK: !llvm.struct<(i32, struct<(i32)>, float)> + "some.op"() : () -> !llvm.struct<(i32, struct<(i32)>, float)> - // CHECK: !llvm2.struct - "some.op"() : () -> !llvm2.struct - // CHECK: !llvm2.struct - "some.op"() : () -> !llvm2.struct - // CHECK: !llvm2.struct - "some.op"() : () -> !llvm2.struct - // CHECK: !llvm2.struct - "some.op"() : () -> !llvm2.struct - // CHECK: !llvm2.struct)> - "some.op"() : () -> !llvm2.struct)> - // CHECK: !llvm2.struct, float)> - "some.op"() : () -> !llvm2.struct, float)> + // CHECK: !llvm.struct + "some.op"() : () -> !llvm.struct + // CHECK: !llvm.struct + "some.op"() : () -> !llvm.struct + // CHECK: !llvm.struct + "some.op"() : () -> !llvm.struct + // CHECK: !llvm.struct + "some.op"() : () -> !llvm.struct + // CHECK: !llvm.struct)> + "some.op"() : () -> !llvm.struct)> + // CHECK: !llvm.struct, float)> + "some.op"() : () -> !llvm.struct, float)> - // CHECK: !llvm2.struct<(struct)> - "some.op"() : () -> !llvm2.struct<(struct)> - // CHECK: !llvm2.struct)> - "some.op"() : () -> !llvm2.struct)> + // CHECK: !llvm.struct<(struct)> + "some.op"() : () -> !llvm.struct<(struct)> + // CHECK: !llvm.struct)> + "some.op"() : () -> !llvm.struct)> return } // CHECK-LABEL: @identified_struct func @identified_struct() { - // CHECK: !llvm2.struct<"empty", ()> - "some.op"() : () -> !llvm2.struct<"empty", ()> - // CHECK: !llvm2.struct<"opaque", opaque> - "some.op"() : () -> !llvm2.struct<"opaque", opaque> - // CHECK: !llvm2.struct<"long", (i32, struct<(i32, i1)>, float, ptr>)> - "some.op"() : () -> !llvm2.struct<"long", (i32, struct<(i32, i1)>, float, ptr>)> - // CHECK: !llvm2.struct<"self-recursive", (ptr>)> - "some.op"() : () -> !llvm2.struct<"self-recursive", (ptr>)> - // CHECK: !llvm2.struct<"unpacked", (i32)> - "some.op"() : () -> !llvm2.struct<"unpacked", (i32)> - // CHECK: !llvm2.struct<"packed", packed (i32)> - "some.op"() : () -> !llvm2.struct<"packed", packed (i32)> - // CHECK: !llvm2.struct<"name with spaces and !^$@$#", packed (i32)> - "some.op"() : () -> !llvm2.struct<"name with spaces and !^$@$#", packed (i32)> + // CHECK: !llvm.struct<"empty", ()> + "some.op"() : () -> !llvm.struct<"empty", ()> + // CHECK: !llvm.struct<"opaque", opaque> + "some.op"() : () -> !llvm.struct<"opaque", opaque> + // CHECK: !llvm.struct<"long", (i32, struct<(i32, i1)>, float, ptr>)> + "some.op"() : () -> !llvm.struct<"long", (i32, struct<(i32, i1)>, float, ptr>)> + // CHECK: !llvm.struct<"self-recursive", (ptr>)> + "some.op"() : () -> !llvm.struct<"self-recursive", (ptr>)> + // CHECK: !llvm.struct<"unpacked", (i32)> + "some.op"() : () -> !llvm.struct<"unpacked", (i32)> + // CHECK: !llvm.struct<"packed", packed (i32)> + "some.op"() : () -> !llvm.struct<"packed", packed (i32)> + // CHECK: !llvm.struct<"name with spaces and !^$@$#", packed (i32)> + "some.op"() : () -> !llvm.struct<"name with spaces and !^$@$#", packed (i32)> - // CHECK: !llvm2.struct<"mutually-a", (ptr, 3>)>>)> - "some.op"() : () -> !llvm2.struct<"mutually-a", (ptr, 3>)>>)> - // CHECK: !llvm2.struct<"mutually-b", (ptr>)>, 3>)> - "some.op"() : () -> !llvm2.struct<"mutually-b", (ptr>)>, 3>)> - // CHECK: !llvm2.struct<"referring-another", (ptr>)> - "some.op"() : () -> !llvm2.struct<"referring-another", (ptr>)> + // CHECK: !llvm.struct<"mutually-a", (ptr, 3>)>>)> + "some.op"() : () -> !llvm.struct<"mutually-a", (ptr, 3>)>>)> + // CHECK: !llvm.struct<"mutually-b", (ptr>)>, 3>)> + "some.op"() : () -> !llvm.struct<"mutually-b", (ptr>)>, 3>)> + // CHECK: !llvm.struct<"referring-another", (ptr>)> + "some.op"() : () -> !llvm.struct<"referring-another", (ptr>)> - // CHECK: !llvm2.struct<"struct-of-arrays", (array<10 x i32>)> - "some.op"() : () -> !llvm2.struct<"struct-of-arrays", (array<10 x i32>)> - // CHECK: !llvm2.array<10 x struct<"array-of-structs", (i32)>> - "some.op"() : () -> !llvm2.array<10 x struct<"array-of-structs", (i32)>> - // CHECK: !llvm2.ptr> - "some.op"() : () -> !llvm2.ptr> + // CHECK: !llvm.struct<"struct-of-arrays", (array<10 x i32>)> + "some.op"() : () -> !llvm.struct<"struct-of-arrays", (array<10 x i32>)> + // CHECK: !llvm.array<10 x struct<"array-of-structs", (i32)>> + "some.op"() : () -> !llvm.array<10 x struct<"array-of-structs", (i32)>> + // CHECK: !llvm.ptr> + "some.op"() : () -> !llvm.ptr> return } diff --git a/mlir/test/Dialect/Linalg/llvm.mlir b/mlir/test/Dialect/Linalg/llvm.mlir index 9b052fd2fab47..02693e5d1be46 100644 --- a/mlir/test/Dialect/Linalg/llvm.mlir +++ b/mlir/test/Dialect/Linalg/llvm.mlir @@ -9,10 +9,10 @@ func @range(%arg0: index) { // CHECK-LABEL: func @range(%{{.*}}: !llvm.i64) { // CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK-NEXT: llvm.mlir.undef : !llvm<"{ i64, i64, i64 }"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"{ i64, i64, i64 }"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ i64, i64, i64 }"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm<"{ i64, i64, i64 }"> +// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(i64, i64, i64)> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(i64, i64, i64)> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(i64, i64, i64)> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm.struct<(i64, i64, i64)> func @slice(%arg0: memref, %arg1: !linalg.range) { %1 = linalg.slice %arg0[%arg1] : memref, !linalg.range, memref @@ -20,20 +20,20 @@ func @slice(%arg0: memref, %arg1: !linalg.range) } // CHECK-LABEL: func @slice // insert data ptr for slice op -// CHECK: llvm.extractvalue %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> -// CHECK-NEXT: llvm.extractvalue %{{.*}}[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> -// CHECK-NEXT: llvm.extractvalue %{{.*}}[0] : !llvm<"{ i64, i64, i64 }"> +// CHECK: llvm.extractvalue %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(i64, i64, i64)> // CHECK-NEXT: llvm.mul %{{.*}}, %{{.*}} : !llvm.i64 // CHECK-NEXT: llvm.add %{{.*}}, %{{.*}} : !llvm.i64 // insert offset -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK-NEXT: llvm.mlir.constant(0 : index) -// CHECK-NEXT: llvm.extractvalue %{{.*}}[0] : !llvm<"{ i64, i64, i64 }"> -// CHECK-NEXT: llvm.extractvalue %{{.*}}[1] : !llvm<"{ i64, i64, i64 }"> -// CHECK-NEXT: llvm.extractvalue %{{.*}}[2] : !llvm<"{ i64, i64, i64 }"> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(i64, i64, i64)> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[1] : !llvm.struct<(i64, i64, i64)> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[2] : !llvm.struct<(i64, i64, i64)> // get size[0] from parent view -// CHECK-NEXT: llvm.extractvalue %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> // CHECK-NEXT: llvm.icmp "slt" %{{.*}}, %{{.*}} : !llvm.i64 // CHECK-NEXT: llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i1, !llvm.i64 // compute size[0] bounded by parent view's size[0] @@ -44,8 +44,8 @@ func @slice(%arg0: memref, %arg1: !linalg.range) // compute stride[0] using bounded size // CHECK-NEXT: llvm.mul %{{.*}}, %{{.*}} : !llvm.i64 // insert size and stride -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> -// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK-NEXT: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> func @slice_with_range_and_index(%arg0: memref) { %c0 = constant 0 : index @@ -58,32 +58,32 @@ func @slice_with_range_and_index(%arg0: memref -// CHECK: llvm.extractvalue %{{.*}}[4, 0] : !llvm<"{ double*, double*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[4, 1] : !llvm<"{ double*, double*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm<"{ double*, double*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"{ double*, double*, i64, [1 x i64], [1 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}[2] : !llvm<"{ double*, double*, i64, [1 x i64], [1 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm<"{ i64, i64, i64 }"> -// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ i64, i64, i64 }"> -// CHECK: llvm.insertvalue %{{.*}}[3, 0] : !llvm<"{ double*, double*, i64, [1 x i64], [1 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}[4, 0] : !llvm<"{ double*, double*, i64, [1 x i64], [1 x i64] }"> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(i64, i64, i64)> +// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.struct<(i64, i64, i64)> +// CHECK: llvm.insertvalue %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> func @transpose(%arg0: memref) { %0 = linalg.transpose %arg0 (i, j, k) -> (k, i, j) : memref return } // CHECK-LABEL: func @transpose -// CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.insertvalue {{.*}}[0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.insertvalue {{.*}}[1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.insertvalue {{.*}}[2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.extractvalue {{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.insertvalue {{.*}}[3, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.extractvalue {{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.insertvalue {{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.extractvalue {{.*}}[3, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.insertvalue {{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.extractvalue {{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.extractvalue {{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.extractvalue {{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue {{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> func @reshape_static_expand(%arg0: memref<3x4x5xf32>) -> memref<1x3x4x1x5xf32> { // Reshapes that expand a contiguous tensor with some 1's. @@ -94,33 +94,33 @@ func @reshape_static_expand(%arg0: memref<3x4x5xf32>) -> memref<1x3x4x1x5xf32> { return %0 : memref<1x3x4x1x5xf32> } // CHECK-LABEL: func @reshape_static_expand -// CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 2] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 3] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 3] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(5 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 4] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 4] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(60 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(20 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(5 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 2] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 2] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(5 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 3] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 3] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 4] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 4] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> func @reshape_static_collapse(%arg0: memref<1x3x4x1x5xf32>) -> memref<3x4x5xf32> { %0 = linalg.reshape %arg0 [affine_map<(i, j, k, l, m) -> (i, j)>, @@ -130,56 +130,56 @@ func @reshape_static_collapse(%arg0: memref<1x3x4x1x5xf32>) -> memref<3x4x5xf32> return %0 : memref<3x4x5xf32> } // CHECK-LABEL: func @reshape_static_collapse -// CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm<"{ float*, float*, i64, [5 x i64], [5 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<5 x i64>, array<5 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: llvm.mlir.constant(3 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: llvm.mlir.constant(5 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: llvm.mlir.constant(20 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: llvm.mlir.constant(5 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 2] : !llvm<"{ float*, float*, i64, [3 x i64], [3 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> func @reshape_fold_zero_dim(%arg0 : memref<1x1xf32>) -> memref { %0 = linalg.reshape %arg0 [] : memref<1x1xf32> into memref return %0 : memref } // CHECK-LABEL: func @reshape_fold_zero_dim -// CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64 }"> -// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"{ float*, float*, i64 }"> -// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64 }"> -// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm<"{ float*, float*, i64 }"> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> +// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64)> +// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64)> +// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64)> func @reshape_expand_zero_dim(%arg0 : memref) -> memref<1x1xf32> { %0 = linalg.reshape %arg0 [] : memref into memref<1x1xf32> return %0 : memref<1x1xf32> } // CHECK-LABEL: func @reshape_expand_zero_dim -// CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm<"{ float*, float*, i64 }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ float*, float*, i64 }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm<"{ float*, float*, i64 }"> -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64)> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> diff --git a/mlir/test/Target/avx512.mlir b/mlir/test/Target/avx512.mlir index 5e75a98dc4ef8..0cc336d29df0c 100644 --- a/mlir/test/Target/avx512.mlir +++ b/mlir/test/Target/avx512.mlir @@ -1,31 +1,31 @@ // RUN: mlir-opt -verify-diagnostics %s | mlir-opt | mlir-translate --avx512-mlir-to-llvmir | FileCheck %s // CHECK-LABEL: define <16 x float> @LLVM_x86_avx512_mask_ps_512 -llvm.func @LLVM_x86_avx512_mask_ps_512(%a: !llvm<"<16 x float>">, +llvm.func @LLVM_x86_avx512_mask_ps_512(%a: !llvm.vec<16 x float>, %b: !llvm.i32, %c: !llvm.i16) - -> (!llvm<"<16 x float>">) + -> (!llvm.vec<16 x float>) { // CHECK: call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %0 = "llvm_avx512.mask.rndscale.ps.512"(%a, %b, %a, %c, %b) : - (!llvm<"<16 x float>">, !llvm.i32, !llvm<"<16 x float>">, !llvm.i16, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.vec<16 x float>, !llvm.i32, !llvm.vec<16 x float>, !llvm.i16, !llvm.i32) -> !llvm.vec<16 x float> // CHECK: call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %1 = "llvm_avx512.mask.scalef.ps.512"(%a, %a, %a, %c, %b) : - (!llvm<"<16 x float>">, !llvm<"<16 x float>">, !llvm<"<16 x float>">, !llvm.i16, !llvm.i32) -> !llvm<"<16 x float>"> - llvm.return %1: !llvm<"<16 x float>"> + (!llvm.vec<16 x float>, !llvm.vec<16 x float>, !llvm.vec<16 x float>, !llvm.i16, !llvm.i32) -> !llvm.vec<16 x float> + llvm.return %1: !llvm.vec<16 x float> } // CHECK-LABEL: define <8 x double> @LLVM_x86_avx512_mask_pd_512 -llvm.func @LLVM_x86_avx512_mask_pd_512(%a: !llvm<"<8 x double>">, +llvm.func @LLVM_x86_avx512_mask_pd_512(%a: !llvm.vec<8 x double>, %b: !llvm.i32, %c: !llvm.i8) - -> (!llvm<"<8 x double>">) + -> (!llvm.vec<8 x double>) { // CHECK: call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %0 = "llvm_avx512.mask.rndscale.pd.512"(%a, %b, %a, %c, %b) : - (!llvm<"<8 x double>">, !llvm.i32, !llvm<"<8 x double>">, !llvm.i8, !llvm.i32) -> !llvm<"<8 x double>"> + (!llvm.vec<8 x double>, !llvm.i32, !llvm.vec<8 x double>, !llvm.i8, !llvm.i32) -> !llvm.vec<8 x double> // CHECK: call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %1 = "llvm_avx512.mask.scalef.pd.512"(%a, %a, %a, %c, %b) : - (!llvm<"<8 x double>">, !llvm<"<8 x double>">, !llvm<"<8 x double>">, !llvm.i8, !llvm.i32) -> !llvm<"<8 x double>"> - llvm.return %1: !llvm<"<8 x double>"> + (!llvm.vec<8 x double>, !llvm.vec<8 x double>, !llvm.vec<8 x double>, !llvm.i8, !llvm.i32) -> !llvm.vec<8 x double> + llvm.return %1: !llvm.vec<8 x double> } diff --git a/mlir/test/Target/import.ll b/mlir/test/Target/import.ll index 24b4a0b392b0f..d67bbb029f8ac 100644 --- a/mlir/test/Target/import.ll +++ b/mlir/test/Target/import.ll @@ -3,22 +3,22 @@ %struct.t = type {} %struct.s = type { %struct.t, i64 } -; CHECK: llvm.mlir.global external @g1() : !llvm<"{ {}, i64 }"> +; CHECK: llvm.mlir.global external @g1() : !llvm.struct<(struct<()>, i64)> @g1 = external global %struct.s, align 8 ; CHECK: llvm.mlir.global external @g2() : !llvm.double @g2 = external global double, align 8 ; CHECK: llvm.mlir.global internal @g3("string") @g3 = internal global [6 x i8] c"string" -; CHECK: llvm.mlir.global external @g5() : !llvm<"<8 x i32>"> +; CHECK: llvm.mlir.global external @g5() : !llvm.vec<8 x i32> @g5 = external global <8 x i32> @g4 = external global i32, align 8 -; CHECK: llvm.mlir.global internal constant @int_gep() : !llvm<"i32*"> { -; CHECK-DAG: %[[addr:[0-9]+]] = llvm.mlir.addressof @g4 : !llvm<"i32*"> +; CHECK: llvm.mlir.global internal constant @int_gep() : !llvm.ptr { +; CHECK-DAG: %[[addr:[0-9]+]] = llvm.mlir.addressof @g4 : !llvm.ptr ; CHECK-DAG: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : !llvm.i32 -; CHECK-NEXT: %[[gepinit:[0-9]+]] = llvm.getelementptr %[[addr]][%[[c2]]] : (!llvm<"i32*">, !llvm.i32) -> !llvm<"i32*"> -; CHECK-NEXT: llvm.return %[[gepinit]] : !llvm<"i32*"> +; CHECK-NEXT: %[[gepinit:[0-9]+]] = llvm.getelementptr %[[addr]][%[[c2]]] : (!llvm.ptr, !llvm.i32) -> !llvm.ptr +; CHECK-NEXT: llvm.return %[[gepinit]] : !llvm.ptr ; CHECK-NEXT: } @int_gep = internal constant i32* getelementptr (i32, i32* @g4, i32 2) @@ -53,15 +53,15 @@ ; Sequential constants. ; -; CHECK: llvm.mlir.global internal constant @vector_constant(dense<[1, 2]> : vector<2xi32>) : !llvm<"<2 x i32>"> +; CHECK: llvm.mlir.global internal constant @vector_constant(dense<[1, 2]> : vector<2xi32>) : !llvm.vec<2 x i32> @vector_constant = internal constant <2 x i32> -; CHECK: llvm.mlir.global internal constant @array_constant(dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf32>) : !llvm<"[2 x float]"> +; CHECK: llvm.mlir.global internal constant @array_constant(dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf32>) : !llvm.array<2 x float> @array_constant = internal constant [2 x float] [float 1., float 2.] -; CHECK: llvm.mlir.global internal constant @nested_array_constant(dense<[{{\[}}1, 2], [3, 4]]> : tensor<2x2xi32>) : !llvm<"[2 x [2 x i32]]"> +; CHECK: llvm.mlir.global internal constant @nested_array_constant(dense<[{{\[}}1, 2], [3, 4]]> : tensor<2x2xi32>) : !llvm.array<2 x array<2 x i32>> @nested_array_constant = internal constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]] -; CHECK: llvm.mlir.global internal constant @nested_array_constant3(dense<[{{\[}}[1, 2], [3, 4]]]> : tensor<1x2x2xi32>) : !llvm<"[1 x [2 x [2 x i32]]]"> +; CHECK: llvm.mlir.global internal constant @nested_array_constant3(dense<[{{\[}}[1, 2], [3, 4]]]> : tensor<1x2x2xi32>) : !llvm.array<1 x array<2 x array<2 x i32>>> @nested_array_constant3 = internal constant [1 x [2 x [2 x i32]]] [[2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]]] -; CHECK: llvm.mlir.global internal constant @nested_array_vector(dense<[{{\[}}[1, 2], [3, 4]]]> : vector<1x2x2xi32>) : !llvm<"[1 x [2 x <2 x i32>]]"> +; CHECK: llvm.mlir.global internal constant @nested_array_vector(dense<[{{\[}}[1, 2], [3, 4]]]> : vector<1x2x2xi32>) : !llvm.array<1 x array<2 x vec<2 x i32>>> @nested_array_vector = internal constant [1 x [2 x <2 x i32>]] [[2 x <2 x i32>] [<2 x i32> , <2 x i32> ]] ; @@ -84,13 +84,13 @@ declare float @fe(i32) ; CHECK-DAG: %[[c43:[0-9]+]] = llvm.mlir.constant(43 : i32) : !llvm.i32 define internal dso_local i32 @f1(i64 %a) norecurse { entry: -; CHECK: %{{[0-9]+}} = llvm.inttoptr %arg0 : !llvm.i64 to !llvm<"i64*"> +; CHECK: %{{[0-9]+}} = llvm.inttoptr %arg0 : !llvm.i64 to !llvm.ptr %aa = inttoptr i64 %a to i64* -; %[[addrof:[0-9]+]] = llvm.mlir.addressof @g2 : !llvm<"double*"> -; %[[addrof2:[0-9]+]] = llvm.mlir.addressof @g2 : !llvm<"double*"> -; %{{[0-9]+}} = llvm.inttoptr %arg0 : !llvm.i64 to !llvm<"i64*"> -; %{{[0-9]+}} = llvm.ptrtoint %[[addrof2]] : !llvm<"double*"> to !llvm.i64 -; %{{[0-9]+}} = llvm.getelementptr %[[addrof]][%3] : (!llvm<"double*">, !llvm.i32) -> !llvm<"double*"> +; %[[addrof:[0-9]+]] = llvm.mlir.addressof @g2 : !llvm.ptr +; %[[addrof2:[0-9]+]] = llvm.mlir.addressof @g2 : !llvm.ptr +; %{{[0-9]+}} = llvm.inttoptr %arg0 : !llvm.i64 to !llvm.ptr +; %{{[0-9]+}} = llvm.ptrtoint %[[addrof2]] : !llvm.ptr to !llvm.i64 +; %{{[0-9]+}} = llvm.getelementptr %[[addrof]][%3] : (!llvm.ptr, !llvm.i32) -> !llvm.ptr %bb = ptrtoint double* @g2 to i64 %cc = getelementptr double, double* @g2, i32 2 ; CHECK: %[[b:[0-9]+]] = llvm.trunc %arg0 : !llvm.i64 to !llvm.i32 @@ -161,18 +161,18 @@ next: br label %end } -; CHECK-LABEL: llvm.func @f3() -> !llvm<"i32*"> +; CHECK-LABEL: llvm.func @f3() -> !llvm.ptr define i32* @f3() { -; CHECK: %[[c:[0-9]+]] = llvm.mlir.addressof @g2 : !llvm<"double*"> -; CHECK: %[[b:[0-9]+]] = llvm.bitcast %[[c]] : !llvm<"double*"> to !llvm<"i32*"> -; CHECK: llvm.return %[[b]] : !llvm<"i32*"> +; CHECK: %[[c:[0-9]+]] = llvm.mlir.addressof @g2 : !llvm.ptr +; CHECK: %[[b:[0-9]+]] = llvm.bitcast %[[c]] : !llvm.ptr to !llvm.ptr +; CHECK: llvm.return %[[b]] : !llvm.ptr ret i32* bitcast (double* @g2 to i32*) } -; CHECK-LABEL: llvm.func @f4() -> !llvm<"i32*"> +; CHECK-LABEL: llvm.func @f4() -> !llvm.ptr define i32* @f4() { -; CHECK: %[[b:[0-9]+]] = llvm.mlir.null : !llvm<"i32*"> -; CHECK: llvm.return %[[b]] : !llvm<"i32*"> +; CHECK: %[[b:[0-9]+]] = llvm.mlir.null : !llvm.ptr +; CHECK: llvm.return %[[b]] : !llvm.ptr ret i32* bitcast (double* null to i32*) } @@ -198,7 +198,7 @@ define void @f5(i32 %d) { ret void } -; CHECK-LABEL: llvm.func @f6(%arg0: !llvm<"void (i16)*">) +; CHECK-LABEL: llvm.func @f6(%arg0: !llvm.ptr>) define void @f6(void (i16) *%fn) { ; CHECK: %[[c:[0-9]+]] = llvm.mlir.constant(0 : i16) : !llvm.i16 ; CHECK: llvm.call %arg0(%[[c]]) @@ -243,7 +243,7 @@ define void @FPArithmetic(float %a, float %b, double %c, double %d) { ; CHECK-LABEL: @precaller define i32 @precaller() { %1 = alloca i32 ()* - ; CHECK: %[[func:.*]] = llvm.mlir.addressof @callee : !llvm<"i32 ()*"> + ; CHECK: %[[func:.*]] = llvm.mlir.addressof @callee : !llvm.ptr> ; CHECK: llvm.store %[[func]], %[[loc:.*]] store i32 ()* @callee, i32 ()** %1 ; CHECK: %[[indir:.*]] = llvm.load %[[loc]] @@ -261,7 +261,7 @@ define i32 @callee() { ; CHECK-LABEL: @postcaller define i32 @postcaller() { %1 = alloca i32 ()* - ; CHECK: %[[func:.*]] = llvm.mlir.addressof @callee : !llvm<"i32 ()*"> + ; CHECK: %[[func:.*]] = llvm.mlir.addressof @callee : !llvm.ptr> ; CHECK: llvm.store %[[func]], %[[loc:.*]] store i32 ()* @callee, i32 ()** %1 ; CHECK: %[[indir:.*]] = llvm.load %[[loc]] @@ -279,16 +279,16 @@ declare i32 @__gxx_personality_v0(...) ; CHECK-LABEL: @invokeLandingpad define i32 @invokeLandingpad() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { - ; CHECK: %[[a1:[0-9]+]] = llvm.bitcast %{{[0-9]+}} : !llvm<"i8***"> to !llvm<"i8*"> - ; CHECK: %[[a3:[0-9]+]] = llvm.alloca %{{[0-9]+}} x !llvm.i8 : (!llvm.i32) -> !llvm<"i8*"> + ; CHECK: %[[a1:[0-9]+]] = llvm.bitcast %{{[0-9]+}} : !llvm.ptr>> to !llvm.ptr + ; CHECK: %[[a3:[0-9]+]] = llvm.alloca %{{[0-9]+}} x !llvm.i8 : (!llvm.i32) -> !llvm.ptr %1 = alloca i8 - ; CHECK: llvm.invoke @foo(%[[a3]]) to ^bb2 unwind ^bb1 : (!llvm<"i8*">) -> () + ; CHECK: llvm.invoke @foo(%[[a3]]) to ^bb2 unwind ^bb1 : (!llvm.ptr) -> () invoke void @foo(i8* %1) to label %4 unwind label %2 ; CHECK: ^bb1: - ; CHECK: %{{[0-9]+}} = llvm.landingpad (catch %{{[0-9]+}} : !llvm<"i8**">) (catch %[[a1]] : !llvm<"i8*">) (filter %{{[0-9]+}} : !llvm<"[1 x i8]">) : !llvm<"{ i8*, i32 }"> + ; CHECK: %{{[0-9]+}} = llvm.landingpad (catch %{{[0-9]+}} : !llvm.ptr>) (catch %[[a1]] : !llvm.ptr) (filter %{{[0-9]+}} : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> %3 = landingpad { i8*, i32 } catch i8** @_ZTIi catch i8* bitcast (i8*** @_ZTIii to i8*) - ; FIXME: Change filter to a constant array once they are handled. + ; FIXME: Change filter to a constant array once they are handled. ; Currently, even though it parses this, LLVM module is broken filter [1 x i8] [i8 1] resume { i8*, i32 } %3 @@ -298,7 +298,7 @@ define i32 @invokeLandingpad() personality i8* bitcast (i32 (...)* @__gxx_person ret i32 1 ; CHECK: ^bb3: - ; CHECK: %{{[0-9]+}} = llvm.invoke @bar(%[[a3]]) to ^bb2 unwind ^bb1 : (!llvm<"i8*">) -> !llvm<"i8*"> + ; CHECK: %{{[0-9]+}} = llvm.invoke @bar(%[[a3]]) to ^bb2 unwind ^bb1 : (!llvm.ptr) -> !llvm.ptr %6 = invoke i8* @bar(i8* %1) to label %4 unwind label %2 ; CHECK: ^bb4: diff --git a/mlir/test/Target/llvmir-intrinsics.mlir b/mlir/test/Target/llvmir-intrinsics.mlir index 286d8fbe5f68b..fc286599ee955 100644 --- a/mlir/test/Target/llvmir-intrinsics.mlir +++ b/mlir/test/Target/llvmir-intrinsics.mlir @@ -1,277 +1,277 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s // CHECK-LABEL: @intrinsics -llvm.func @intrinsics(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm<"<8 x float>">, %arg3: !llvm<"i8*">) { +llvm.func @intrinsics(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.vec<8 x float>, %arg3: !llvm.ptr) { %c3 = llvm.mlir.constant(3 : i32) : !llvm.i32 %c1 = llvm.mlir.constant(1 : i32) : !llvm.i32 %c0 = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK: call float @llvm.fmuladd.f32 "llvm.intr.fmuladd"(%arg0, %arg1, %arg0) : (!llvm.float, !llvm.float, !llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.fmuladd.v8f32 - "llvm.intr.fmuladd"(%arg2, %arg2, %arg2) : (!llvm<"<8 x float>">, !llvm<"<8 x float>">, !llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.fmuladd"(%arg2, %arg2, %arg2) : (!llvm.vec<8 x float>, !llvm.vec<8 x float>, !llvm.vec<8 x float>) -> !llvm.vec<8 x float> // CHECK: call float @llvm.fma.f32 "llvm.intr.fma"(%arg0, %arg1, %arg0) : (!llvm.float, !llvm.float, !llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.fma.v8f32 - "llvm.intr.fma"(%arg2, %arg2, %arg2) : (!llvm<"<8 x float>">, !llvm<"<8 x float>">, !llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.fma"(%arg2, %arg2, %arg2) : (!llvm.vec<8 x float>, !llvm.vec<8 x float>, !llvm.vec<8 x float>) -> !llvm.vec<8 x float> // CHECK: call void @llvm.prefetch.p0i8(i8* %3, i32 0, i32 3, i32 1) - "llvm.intr.prefetch"(%arg3, %c0, %c3, %c1) : (!llvm<"i8*">, !llvm.i32, !llvm.i32, !llvm.i32) -> () + "llvm.intr.prefetch"(%arg3, %c0, %c3, %c1) : (!llvm.ptr, !llvm.i32, !llvm.i32, !llvm.i32) -> () llvm.return } // CHECK-LABEL: @exp_test -llvm.func @exp_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @exp_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.exp.f32 "llvm.intr.exp"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.exp.v8f32 - "llvm.intr.exp"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.exp"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @exp2_test -llvm.func @exp2_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @exp2_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.exp2.f32 "llvm.intr.exp2"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.exp2.v8f32 - "llvm.intr.exp2"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.exp2"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @log_test -llvm.func @log_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @log_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.log.f32 "llvm.intr.log"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.log.v8f32 - "llvm.intr.log"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.log"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @log10_test -llvm.func @log10_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @log10_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.log10.f32 "llvm.intr.log10"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.log10.v8f32 - "llvm.intr.log10"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.log10"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @log2_test -llvm.func @log2_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @log2_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.log2.f32 "llvm.intr.log2"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.log2.v8f32 - "llvm.intr.log2"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.log2"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @fabs_test -llvm.func @fabs_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @fabs_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.fabs.f32 "llvm.intr.fabs"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.fabs.v8f32 - "llvm.intr.fabs"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.fabs"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @sqrt_test -llvm.func @sqrt_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @sqrt_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.sqrt.f32 "llvm.intr.sqrt"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.sqrt.v8f32 - "llvm.intr.sqrt"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.sqrt"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @ceil_test -llvm.func @ceil_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @ceil_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.ceil.f32 "llvm.intr.ceil"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.ceil.v8f32 - "llvm.intr.ceil"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.ceil"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @floor_test -llvm.func @floor_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @floor_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.floor.f32 "llvm.intr.floor"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.floor.v8f32 - "llvm.intr.floor"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.floor"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @cos_test -llvm.func @cos_test(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">) { +llvm.func @cos_test(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>) { // CHECK: call float @llvm.cos.f32 "llvm.intr.cos"(%arg0) : (!llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.cos.v8f32 - "llvm.intr.cos"(%arg1) : (!llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.cos"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @copysign_test -llvm.func @copysign_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm<"<8 x float>">, %arg3: !llvm<"<8 x float>">) { +llvm.func @copysign_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.vec<8 x float>, %arg3: !llvm.vec<8 x float>) { // CHECK: call float @llvm.copysign.f32 "llvm.intr.copysign"(%arg0, %arg1) : (!llvm.float, !llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.copysign.v8f32 - "llvm.intr.copysign"(%arg2, %arg3) : (!llvm<"<8 x float>">, !llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.copysign"(%arg2, %arg3) : (!llvm.vec<8 x float>, !llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @pow_test -llvm.func @pow_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm<"<8 x float>">, %arg3: !llvm<"<8 x float>">) { +llvm.func @pow_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.vec<8 x float>, %arg3: !llvm.vec<8 x float>) { // CHECK: call float @llvm.pow.f32 "llvm.intr.pow"(%arg0, %arg1) : (!llvm.float, !llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.pow.v8f32 - "llvm.intr.pow"(%arg2, %arg3) : (!llvm<"<8 x float>">, !llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.pow"(%arg2, %arg3) : (!llvm.vec<8 x float>, !llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @bitreverse_test -llvm.func @bitreverse_test(%arg0: !llvm.i32, %arg1: !llvm<"<8 x i32>">) { +llvm.func @bitreverse_test(%arg0: !llvm.i32, %arg1: !llvm.vec<8 x i32>) { // CHECK: call i32 @llvm.bitreverse.i32 "llvm.intr.bitreverse"(%arg0) : (!llvm.i32) -> !llvm.i32 // CHECK: call <8 x i32> @llvm.bitreverse.v8i32 - "llvm.intr.bitreverse"(%arg1) : (!llvm<"<8 x i32>">) -> !llvm<"<8 x i32>"> + "llvm.intr.bitreverse"(%arg1) : (!llvm.vec<8 x i32>) -> !llvm.vec<8 x i32> llvm.return } // CHECK-LABEL: @ctpop_test -llvm.func @ctpop_test(%arg0: !llvm.i32, %arg1: !llvm<"<8 x i32>">) { +llvm.func @ctpop_test(%arg0: !llvm.i32, %arg1: !llvm.vec<8 x i32>) { // CHECK: call i32 @llvm.ctpop.i32 "llvm.intr.ctpop"(%arg0) : (!llvm.i32) -> !llvm.i32 // CHECK: call <8 x i32> @llvm.ctpop.v8i32 - "llvm.intr.ctpop"(%arg1) : (!llvm<"<8 x i32>">) -> !llvm<"<8 x i32>"> + "llvm.intr.ctpop"(%arg1) : (!llvm.vec<8 x i32>) -> !llvm.vec<8 x i32> llvm.return } // CHECK-LABEL: @maxnum_test -llvm.func @maxnum_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm<"<8 x float>">, %arg3: !llvm<"<8 x float>">) { +llvm.func @maxnum_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.vec<8 x float>, %arg3: !llvm.vec<8 x float>) { // CHECK: call float @llvm.maxnum.f32 "llvm.intr.maxnum"(%arg0, %arg1) : (!llvm.float, !llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.maxnum.v8f32 - "llvm.intr.maxnum"(%arg2, %arg3) : (!llvm<"<8 x float>">, !llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.maxnum"(%arg2, %arg3) : (!llvm.vec<8 x float>, !llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @minnum_test -llvm.func @minnum_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm<"<8 x float>">, %arg3: !llvm<"<8 x float>">) { +llvm.func @minnum_test(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.vec<8 x float>, %arg3: !llvm.vec<8 x float>) { // CHECK: call float @llvm.minnum.f32 "llvm.intr.minnum"(%arg0, %arg1) : (!llvm.float, !llvm.float) -> !llvm.float // CHECK: call <8 x float> @llvm.minnum.v8f32 - "llvm.intr.minnum"(%arg2, %arg3) : (!llvm<"<8 x float>">, !llvm<"<8 x float>">) -> !llvm<"<8 x float>"> + "llvm.intr.minnum"(%arg2, %arg3) : (!llvm.vec<8 x float>, !llvm.vec<8 x float>) -> !llvm.vec<8 x float> llvm.return } // CHECK-LABEL: @smax_test -llvm.func @smax_test(%arg0: !llvm.i32, %arg1: !llvm.i32, %arg2: !llvm<"<8 x i32>">, %arg3: !llvm<"<8 x i32>">) { +llvm.func @smax_test(%arg0: !llvm.i32, %arg1: !llvm.i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { // CHECK: call i32 @llvm.smax.i32 "llvm.intr.smax"(%arg0, %arg1) : (!llvm.i32, !llvm.i32) -> !llvm.i32 // CHECK: call <8 x i32> @llvm.smax.v8i32 - "llvm.intr.smax"(%arg2, %arg3) : (!llvm<"<8 x i32>">, !llvm<"<8 x i32>">) -> !llvm<"<8 x i32>"> + "llvm.intr.smax"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.vec<8 x i32> llvm.return } // CHECK-LABEL: @smin_test -llvm.func @smin_test(%arg0: !llvm.i32, %arg1: !llvm.i32, %arg2: !llvm<"<8 x i32>">, %arg3: !llvm<"<8 x i32>">) { +llvm.func @smin_test(%arg0: !llvm.i32, %arg1: !llvm.i32, %arg2: !llvm.vec<8 x i32>, %arg3: !llvm.vec<8 x i32>) { // CHECK: call i32 @llvm.smin.i32 "llvm.intr.smin"(%arg0, %arg1) : (!llvm.i32, !llvm.i32) -> !llvm.i32 // CHECK: call <8 x i32> @llvm.smin.v8i32 - "llvm.intr.smin"(%arg2, %arg3) : (!llvm<"<8 x i32>">, !llvm<"<8 x i32>">) -> !llvm<"<8 x i32>"> + "llvm.intr.smin"(%arg2, %arg3) : (!llvm.vec<8 x i32>, !llvm.vec<8 x i32>) -> !llvm.vec<8 x i32> llvm.return } // CHECK-LABEL: @vector_reductions -llvm.func @vector_reductions(%arg0: !llvm.float, %arg1: !llvm<"<8 x float>">, %arg2: !llvm<"<8 x i32>">) { +llvm.func @vector_reductions(%arg0: !llvm.float, %arg1: !llvm.vec<8 x float>, %arg2: !llvm.vec<8 x i32>) { // CHECK: call i32 @llvm.experimental.vector.reduce.add.v8i32 - "llvm.intr.experimental.vector.reduce.add"(%arg2) : (!llvm<"<8 x i32>">) -> !llvm.i32 + "llvm.intr.experimental.vector.reduce.add"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 // CHECK: call i32 @llvm.experimental.vector.reduce.and.v8i32 - "llvm.intr.experimental.vector.reduce.and"(%arg2) : (!llvm<"<8 x i32>">) -> !llvm.i32 + "llvm.intr.experimental.vector.reduce.and"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 // CHECK: call float @llvm.experimental.vector.reduce.fmax.v8f32 - "llvm.intr.experimental.vector.reduce.fmax"(%arg1) : (!llvm<"<8 x float>">) -> !llvm.float + "llvm.intr.experimental.vector.reduce.fmax"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.float // CHECK: call float @llvm.experimental.vector.reduce.fmin.v8f32 - "llvm.intr.experimental.vector.reduce.fmin"(%arg1) : (!llvm<"<8 x float>">) -> !llvm.float + "llvm.intr.experimental.vector.reduce.fmin"(%arg1) : (!llvm.vec<8 x float>) -> !llvm.float // CHECK: call i32 @llvm.experimental.vector.reduce.mul.v8i32 - "llvm.intr.experimental.vector.reduce.mul"(%arg2) : (!llvm<"<8 x i32>">) -> !llvm.i32 + "llvm.intr.experimental.vector.reduce.mul"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 // CHECK: call i32 @llvm.experimental.vector.reduce.or.v8i32 - "llvm.intr.experimental.vector.reduce.or"(%arg2) : (!llvm<"<8 x i32>">) -> !llvm.i32 + "llvm.intr.experimental.vector.reduce.or"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 // CHECK: call i32 @llvm.experimental.vector.reduce.smax.v8i32 - "llvm.intr.experimental.vector.reduce.smax"(%arg2) : (!llvm<"<8 x i32>">) -> !llvm.i32 + "llvm.intr.experimental.vector.reduce.smax"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 // CHECK: call i32 @llvm.experimental.vector.reduce.smin.v8i32 - "llvm.intr.experimental.vector.reduce.smin"(%arg2) : (!llvm<"<8 x i32>">) -> !llvm.i32 + "llvm.intr.experimental.vector.reduce.smin"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 // CHECK: call i32 @llvm.experimental.vector.reduce.umax.v8i32 - "llvm.intr.experimental.vector.reduce.umax"(%arg2) : (!llvm<"<8 x i32>">) -> !llvm.i32 + "llvm.intr.experimental.vector.reduce.umax"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 // CHECK: call i32 @llvm.experimental.vector.reduce.umin.v8i32 - "llvm.intr.experimental.vector.reduce.umin"(%arg2) : (!llvm<"<8 x i32>">) -> !llvm.i32 + "llvm.intr.experimental.vector.reduce.umin"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 // CHECK: call float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32 - "llvm.intr.experimental.vector.reduce.v2.fadd"(%arg0, %arg1) : (!llvm.float, !llvm<"<8 x float>">) -> !llvm.float + "llvm.intr.experimental.vector.reduce.v2.fadd"(%arg0, %arg1) : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float // CHECK: call float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32 - "llvm.intr.experimental.vector.reduce.v2.fmul"(%arg0, %arg1) : (!llvm.float, !llvm<"<8 x float>">) -> !llvm.float + "llvm.intr.experimental.vector.reduce.v2.fmul"(%arg0, %arg1) : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float // CHECK: call reassoc float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32 - "llvm.intr.experimental.vector.reduce.v2.fadd"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm<"<8 x float>">) -> !llvm.float + "llvm.intr.experimental.vector.reduce.v2.fadd"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float // CHECK: call reassoc float @llvm.experimental.vector.reduce.v2.fmul.f32.v8f32 - "llvm.intr.experimental.vector.reduce.v2.fmul"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm<"<8 x float>">) -> !llvm.float + "llvm.intr.experimental.vector.reduce.v2.fmul"(%arg0, %arg1) {reassoc = true} : (!llvm.float, !llvm.vec<8 x float>) -> !llvm.float // CHECK: call i32 @llvm.experimental.vector.reduce.xor.v8i32 - "llvm.intr.experimental.vector.reduce.xor"(%arg2) : (!llvm<"<8 x i32>">) -> !llvm.i32 + "llvm.intr.experimental.vector.reduce.xor"(%arg2) : (!llvm.vec<8 x i32>) -> !llvm.i32 llvm.return } // CHECK-LABEL: @matrix_intrinsics // 4x16 16x3 -llvm.func @matrix_intrinsics(%A: !llvm<"<64 x float>">, %B: !llvm<"<48 x float>">, - %ptr: !llvm<"float*">, %stride: !llvm.i64) { +llvm.func @matrix_intrinsics(%A: !llvm.vec<64 x float>, %B: !llvm.vec<48 x float>, + %ptr: !llvm.ptr, %stride: !llvm.i64) { // CHECK: call <12 x float> @llvm.matrix.multiply.v12f32.v64f32.v48f32(<64 x float> %0, <48 x float> %1, i32 4, i32 16, i32 3) %C = llvm.intr.matrix.multiply %A, %B { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32} : - (!llvm<"<64 x float>">, !llvm<"<48 x float>">) -> !llvm<"<12 x float>"> + (!llvm.vec<64 x float>, !llvm.vec<48 x float>) -> !llvm.vec<12 x float> // CHECK: call <48 x float> @llvm.matrix.transpose.v48f32(<48 x float> %1, i32 3, i32 16) %D = llvm.intr.matrix.transpose %B { rows = 3: i32, columns = 16: i32} : - !llvm<"<48 x float>"> into !llvm<"<48 x float>"> + !llvm.vec<48 x float> into !llvm.vec<48 x float> // CHECK: call <48 x float> @llvm.matrix.column.major.load.v48f32(float* align 4 %2, i64 %3, i1 false, i32 3, i32 16) %E = llvm.intr.matrix.column.major.load %ptr, { isVolatile = 0: i1, rows = 3: i32, columns = 16: i32} : - !llvm<"<48 x float>"> from !llvm<"float*"> stride !llvm.i64 + !llvm.vec<48 x float> from !llvm.ptr stride !llvm.i64 // CHECK: call void @llvm.matrix.column.major.store.v48f32(<48 x float> %7, float* align 4 %2, i64 %3, i1 false, i32 3, i32 16) llvm.intr.matrix.column.major.store %E, %ptr, { isVolatile = 0: i1, rows = 3: i32, columns = 16: i32} : - !llvm<"<48 x float>"> to !llvm<"float*"> stride !llvm.i64 + !llvm.vec<48 x float> to !llvm.ptr stride !llvm.i64 llvm.return } // CHECK-LABEL: @masked_intrinsics -llvm.func @masked_intrinsics(%A: !llvm<"<7 x float>*">, %mask: !llvm<"<7 x i1>">) { +llvm.func @masked_intrinsics(%A: !llvm.ptr>, %mask: !llvm.vec<7 x i1>) { // CHECK: call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> undef) %a = llvm.intr.masked.load %A, %mask { alignment = 1: i32} : - (!llvm<"<7 x float>*">, !llvm<"<7 x i1>">) -> !llvm<"<7 x float>"> + (!llvm.ptr>, !llvm.vec<7 x i1>) -> !llvm.vec<7 x float> // CHECK: call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> %{{.*}}) %b = llvm.intr.masked.load %A, %mask, %a { alignment = 1: i32} : - (!llvm<"<7 x float>*">, !llvm<"<7 x i1>">, !llvm<"<7 x float>">) -> !llvm<"<7 x float>"> + (!llvm.ptr>, !llvm.vec<7 x i1>, !llvm.vec<7 x float>) -> !llvm.vec<7 x float> // CHECK: call void @llvm.masked.store.v7f32.p0v7f32(<7 x float> %{{.*}}, <7 x float>* %0, i32 {{.*}}, <7 x i1> %{{.*}}) llvm.intr.masked.store %b, %A, %mask { alignment = 1: i32} : - !llvm<"<7 x float>">, !llvm<"<7 x i1>"> into !llvm<"<7 x float>*"> + !llvm.vec<7 x float>, !llvm.vec<7 x i1> into !llvm.ptr> llvm.return } // CHECK-LABEL: @masked_gather_scatter_intrinsics -llvm.func @masked_gather_scatter_intrinsics(%M: !llvm<"<7 x float*>">, %mask: !llvm<"<7 x i1>">) { +llvm.func @masked_gather_scatter_intrinsics(%M: !llvm.vec<7 x ptr>, %mask: !llvm.vec<7 x i1>) { // CHECK: call <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*> %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> undef) %a = llvm.intr.masked.gather %M, %mask { alignment = 1: i32} : - (!llvm<"<7 x float*>">, !llvm<"<7 x i1>">) -> !llvm<"<7 x float>"> + (!llvm.vec<7 x ptr>, !llvm.vec<7 x i1>) -> !llvm.vec<7 x float> // CHECK: call <7 x float> @llvm.masked.gather.v7f32.v7p0f32(<7 x float*> %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> %{{.*}}) %b = llvm.intr.masked.gather %M, %mask, %a { alignment = 1: i32} : - (!llvm<"<7 x float*>">, !llvm<"<7 x i1>">, !llvm<"<7 x float>">) -> !llvm<"<7 x float>"> + (!llvm.vec<7 x ptr>, !llvm.vec<7 x i1>, !llvm.vec<7 x float>) -> !llvm.vec<7 x float> // CHECK: call void @llvm.masked.scatter.v7f32.v7p0f32(<7 x float> %{{.*}}, <7 x float*> %{{.*}}, i32 1, <7 x i1> %{{.*}}) llvm.intr.masked.scatter %b, %M, %mask { alignment = 1: i32} : - !llvm<"<7 x float>">, !llvm<"<7 x i1>"> into !llvm<"<7 x float*>"> + !llvm.vec<7 x float>, !llvm.vec<7 x i1> into !llvm.vec<7 x ptr> llvm.return } // CHECK-LABEL: @memcpy_test -llvm.func @memcpy_test(%arg0: !llvm.i32, %arg1: !llvm.i1, %arg2: !llvm<"i8*">, %arg3: !llvm<"i8*">) { +llvm.func @memcpy_test(%arg0: !llvm.i32, %arg1: !llvm.i1, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 %{{.*}}) - "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg1) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i32, !llvm.i1) -> () + "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg1) : (!llvm.ptr, !llvm.ptr, !llvm.i32, !llvm.i1) -> () %sz = llvm.mlir.constant(10: i64) : !llvm.i64 // CHECK: call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 10, i1 %{{.*}}) - "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg1) : (!llvm<"i8*">, !llvm<"i8*">, !llvm.i64, !llvm.i1) -> () + "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg1) : (!llvm.ptr, !llvm.ptr, !llvm.i64, !llvm.i1) -> () llvm.return } diff --git a/mlir/test/Target/llvmir-invalid.mlir b/mlir/test/Target/llvmir-invalid.mlir index 5effcdc60d374..14117594e2f89 100644 --- a/mlir/test/Target/llvmir-invalid.mlir +++ b/mlir/test/Target/llvmir-invalid.mlir @@ -21,16 +21,16 @@ llvm.func @invalid_align(%arg0 : !llvm.float {llvm.align = 4}) -> !llvm.float { // ----- -llvm.func @no_nested_struct() -> !llvm<"[2 x [2 x [2 x {i32}]]]"> { +llvm.func @no_nested_struct() -> !llvm.array<2 x array<2 x array<2 x struct<(i32)>>>> { // expected-error @+1 {{struct types are not supported in constants}} - %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : tensor<2x2x2xi32>) : !llvm<"[2 x [2 x [2 x {i32}]]]"> - llvm.return %0 : !llvm<"[2 x [2 x [2 x {i32}]]]"> + %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : tensor<2x2x2xi32>) : !llvm.array<2 x array<2 x array<2 x struct<(i32)>>>> + llvm.return %0 : !llvm.array<2 x array<2 x array<2 x struct<(i32)>>>> } // ----- // expected-error @+1 {{unsupported constant value}} -llvm.mlir.global internal constant @test([2.5, 7.4]) : !llvm<"[2 x double]"> +llvm.mlir.global internal constant @test([2.5, 7.4]) : !llvm.array<2 x double> // ----- diff --git a/mlir/test/Target/llvmir.mlir b/mlir/test/Target/llvmir.mlir index d6180cbf18494..5e57f1c7c6986 100644 --- a/mlir/test/Target/llvmir.mlir +++ b/mlir/test/Target/llvmir.mlir @@ -7,7 +7,7 @@ llvm.mlir.global internal @i32_global(42: i32) : !llvm.i32 llvm.mlir.global internal constant @i32_const(52: i53) : !llvm.i53 // CHECK: @int_global_array = internal global [3 x i32] [i32 62, i32 62, i32 62] -llvm.mlir.global internal @int_global_array(dense<62> : vector<3xi32>) : !llvm<"[3 x i32]"> +llvm.mlir.global internal @int_global_array(dense<62> : vector<3xi32>) : !llvm.array<3 x i32> // CHECK: @i32_global_addr_space = internal addrspace(7) global i32 62 llvm.mlir.global internal @i32_global_addr_space(62: i32) {addr_space = 7 : i32} : !llvm.i32 @@ -16,20 +16,20 @@ llvm.mlir.global internal @i32_global_addr_space(62: i32) {addr_space = 7 : i32} llvm.mlir.global internal @float_global(0.0: f32) : !llvm.float // CHECK: @float_global_array = internal global [1 x float] [float -5.000000e+00] -llvm.mlir.global internal @float_global_array(dense<[-5.0]> : vector<1xf32>) : !llvm<"[1 x float]"> +llvm.mlir.global internal @float_global_array(dense<[-5.0]> : vector<1xf32>) : !llvm.array<1 x float> // CHECK: @string_const = internal constant [6 x i8] c"foobar" -llvm.mlir.global internal constant @string_const("foobar") : !llvm<"[6 x i8]"> +llvm.mlir.global internal constant @string_const("foobar") : !llvm.array<6 x i8> // CHECK: @int_global_undef = internal global i64 undef llvm.mlir.global internal @int_global_undef() : !llvm.i64 // CHECK: @int_gep = internal constant i32* getelementptr (i32, i32* @i32_global, i32 2) -llvm.mlir.global internal constant @int_gep() : !llvm<"i32*"> { - %addr = llvm.mlir.addressof @i32_global : !llvm<"i32*"> +llvm.mlir.global internal constant @int_gep() : !llvm.ptr { + %addr = llvm.mlir.addressof @i32_global : !llvm.ptr %_c0 = llvm.mlir.constant(2: i32) :!llvm.i32 - %gepinit = llvm.getelementptr %addr[%_c0] : (!llvm<"i32*">, !llvm.i32) -> !llvm<"i32*"> - llvm.return %gepinit : !llvm<"i32*"> + %gepinit = llvm.getelementptr %addr[%_c0] : (!llvm.ptr, !llvm.i32) -> !llvm.ptr + llvm.return %gepinit : !llvm.ptr } // @@ -66,7 +66,7 @@ llvm.mlir.global external @external() : !llvm.i32 // // CHECK: declare i8* @malloc(i64) -llvm.func @malloc(!llvm.i64) -> !llvm<"i8*"> +llvm.func @malloc(!llvm.i64) -> !llvm.ptr // CHECK: declare void @free(i8*) @@ -86,15 +86,15 @@ llvm.func @empty() { llvm.func @global_refs() { // Check load from globals. // CHECK: load i32, i32* @i32_global - %0 = llvm.mlir.addressof @i32_global : !llvm<"i32*"> - %1 = llvm.load %0 : !llvm<"i32*"> + %0 = llvm.mlir.addressof @i32_global : !llvm.ptr + %1 = llvm.load %0 : !llvm.ptr // Check the contracted form of load from array constants. // CHECK: load i8, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @string_const, i64 0, i64 0) - %2 = llvm.mlir.addressof @string_const : !llvm<"[6 x i8]*"> + %2 = llvm.mlir.addressof @string_const : !llvm.ptr> %c0 = llvm.mlir.constant(0 : index) : !llvm.i64 - %3 = llvm.getelementptr %2[%c0, %c0] : (!llvm<"[6 x i8]*">, !llvm.i64, !llvm.i64) -> !llvm<"i8*"> - %4 = llvm.load %3 : !llvm<"i8*"> + %3 = llvm.getelementptr %2[%c0, %c0] : (!llvm.ptr>, !llvm.i64, !llvm.i64) -> !llvm.ptr + %4 = llvm.load %3 : !llvm.ptr llvm.return } @@ -414,12 +414,12 @@ llvm.func @memref_alloc() { %0 = llvm.mlir.constant(10 : index) : !llvm.i64 %1 = llvm.mlir.constant(10 : index) : !llvm.i64 %2 = llvm.mul %0, %1 : !llvm.i64 - %3 = llvm.mlir.undef : !llvm<"{ float* }"> + %3 = llvm.mlir.undef : !llvm.struct<(ptr)> %4 = llvm.mlir.constant(4 : index) : !llvm.i64 %5 = llvm.mul %2, %4 : !llvm.i64 - %6 = llvm.call @malloc(%5) : (!llvm.i64) -> !llvm<"i8*"> - %7 = llvm.bitcast %6 : !llvm<"i8*"> to !llvm<"float*"> - %8 = llvm.insertvalue %7, %3[0] : !llvm<"{ float* }"> + %6 = llvm.call @malloc(%5) : (!llvm.i64) -> !llvm.ptr + %7 = llvm.bitcast %6 : !llvm.ptr to !llvm.ptr + %8 = llvm.insertvalue %7, %3[0] : !llvm.struct<(ptr)> // CHECK-NEXT: ret void llvm.return } @@ -434,12 +434,12 @@ llvm.func @store_load_static() { // CHECK-NEXT: %{{[0-9]+}} = bitcast i8* %{{[0-9]+}} to float* // CHECK-NEXT: %{{[0-9]+}} = insertvalue { float* } undef, float* %{{[0-9]+}}, 0 %0 = llvm.mlir.constant(10 : index) : !llvm.i64 - %1 = llvm.mlir.undef : !llvm<"{ float* }"> + %1 = llvm.mlir.undef : !llvm.struct<(ptr)> %2 = llvm.mlir.constant(4 : index) : !llvm.i64 %3 = llvm.mul %0, %2 : !llvm.i64 - %4 = llvm.call @malloc(%3) : (!llvm.i64) -> !llvm<"i8*"> - %5 = llvm.bitcast %4 : !llvm<"i8*"> to !llvm<"float*"> - %6 = llvm.insertvalue %5, %1[0] : !llvm<"{ float* }"> + %4 = llvm.call @malloc(%3) : (!llvm.i64) -> !llvm.ptr + %5 = llvm.bitcast %4 : !llvm.ptr to !llvm.ptr + %6 = llvm.insertvalue %5, %1[0] : !llvm.struct<(ptr)> %7 = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float llvm.br ^bb1 ^bb1: // pred: ^bb0 @@ -457,9 +457,9 @@ llvm.func @store_load_static() { // CHECK-NEXT: %{{[0-9]+}} = getelementptr float, float* %{{[0-9]+}}, i64 %{{[0-9]+}} // CHECK-NEXT: store float 1.000000e+00, float* %{{[0-9]+}} %12 = llvm.mlir.constant(10 : index) : !llvm.i64 - %13 = llvm.extractvalue %6[0] : !llvm<"{ float* }"> - %14 = llvm.getelementptr %13[%10] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - llvm.store %7, %14 : !llvm<"float*"> + %13 = llvm.extractvalue %6[0] : !llvm.struct<(ptr)> + %14 = llvm.getelementptr %13[%10] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %7, %14 : !llvm.ptr %15 = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %{{[0-9]+}} = add i64 %{{[0-9]+}}, 1 %16 = llvm.add %10, %15 : !llvm.i64 @@ -482,9 +482,9 @@ llvm.func @store_load_static() { // CHECK-NEXT: %{{[0-9]+}} = getelementptr float, float* %{{[0-9]+}}, i64 %{{[0-9]+}} // CHECK-NEXT: %{{[0-9]+}} = load float, float* %{{[0-9]+}} %21 = llvm.mlir.constant(10 : index) : !llvm.i64 - %22 = llvm.extractvalue %6[0] : !llvm<"{ float* }"> - %23 = llvm.getelementptr %22[%19] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %24 = llvm.load %23 : !llvm<"float*"> + %22 = llvm.extractvalue %6[0] : !llvm.struct<(ptr)> + %23 = llvm.getelementptr %22[%19] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %24 = llvm.load %23 : !llvm.ptr %25 = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %{{[0-9]+}} = add i64 %{{[0-9]+}}, 1 %26 = llvm.add %19, %25 : !llvm.i64 @@ -502,13 +502,13 @@ llvm.func @store_load_dynamic(%arg0: !llvm.i64) { // CHECK-NEXT: %{{[0-9]+}} = bitcast i8* %{{[0-9]+}} to float* // CHECK-NEXT: %{{[0-9]+}} = insertvalue { float*, i64 } undef, float* %{{[0-9]+}}, 0 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { float*, i64 } %{{[0-9]+}}, i64 %{{[0-9]+}}, 1 - %0 = llvm.mlir.undef : !llvm<"{ float*, i64 }"> + %0 = llvm.mlir.undef : !llvm.struct<(ptr, i64)> %1 = llvm.mlir.constant(4 : index) : !llvm.i64 %2 = llvm.mul %arg0, %1 : !llvm.i64 - %3 = llvm.call @malloc(%2) : (!llvm.i64) -> !llvm<"i8*"> - %4 = llvm.bitcast %3 : !llvm<"i8*"> to !llvm<"float*"> - %5 = llvm.insertvalue %4, %0[0] : !llvm<"{ float*, i64 }"> - %6 = llvm.insertvalue %arg0, %5[1] : !llvm<"{ float*, i64 }"> + %3 = llvm.call @malloc(%2) : (!llvm.i64) -> !llvm.ptr + %4 = llvm.bitcast %3 : !llvm.ptr to !llvm.ptr + %5 = llvm.insertvalue %4, %0[0] : !llvm.struct<(ptr, i64)> + %6 = llvm.insertvalue %arg0, %5[1] : !llvm.struct<(ptr, i64)> %7 = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float // CHECK-NEXT: br label %{{[0-9]+}} llvm.br ^bb1 @@ -526,10 +526,10 @@ llvm.func @store_load_dynamic(%arg0: !llvm.i64) { // CHECK-NEXT: %{{[0-9]+}} = extractvalue { float*, i64 } %{{[0-9]+}}, 0 // CHECK-NEXT: %{{[0-9]+}} = getelementptr float, float* %{{[0-9]+}}, i64 %{{[0-9]+}} // CHECK-NEXT: store float 1.000000e+00, float* %{{[0-9]+}} - %11 = llvm.extractvalue %6[1] : !llvm<"{ float*, i64 }"> - %12 = llvm.extractvalue %6[0] : !llvm<"{ float*, i64 }"> - %13 = llvm.getelementptr %12[%9] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - llvm.store %7, %13 : !llvm<"float*"> + %11 = llvm.extractvalue %6[1] : !llvm.struct<(ptr, i64)> + %12 = llvm.extractvalue %6[0] : !llvm.struct<(ptr, i64)> + %13 = llvm.getelementptr %12[%9] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %7, %13 : !llvm.ptr %14 = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %{{[0-9]+}} = add i64 %{{[0-9]+}}, 1 %15 = llvm.add %9, %14 : !llvm.i64 @@ -551,10 +551,10 @@ llvm.func @store_load_dynamic(%arg0: !llvm.i64) { // CHECK-NEXT: %{{[0-9]+}} = extractvalue { float*, i64 } %{{[0-9]+}}, 0 // CHECK-NEXT: %{{[0-9]+}} = getelementptr float, float* %{{[0-9]+}}, i64 %{{[0-9]+}} // CHECK-NEXT: %{{[0-9]+}} = load float, float* %{{[0-9]+}} - %19 = llvm.extractvalue %6[1] : !llvm<"{ float*, i64 }"> - %20 = llvm.extractvalue %6[0] : !llvm<"{ float*, i64 }"> - %21 = llvm.getelementptr %20[%17] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %22 = llvm.load %21 : !llvm<"float*"> + %19 = llvm.extractvalue %6[1] : !llvm.struct<(ptr, i64)> + %20 = llvm.extractvalue %6[0] : !llvm.struct<(ptr, i64)> + %21 = llvm.getelementptr %20[%17] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %22 = llvm.load %21 : !llvm.ptr %23 = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: %{{[0-9]+}} = add i64 %{{[0-9]+}}, 1 %24 = llvm.add %17, %23 : !llvm.i64 @@ -582,14 +582,14 @@ llvm.func @store_load_mixed(%arg0: !llvm.i64) { %3 = llvm.mul %1, %arg0 : !llvm.i64 %4 = llvm.mul %3, %2 : !llvm.i64 %5 = llvm.mul %4, %0 : !llvm.i64 - %6 = llvm.mlir.undef : !llvm<"{ float*, i64, i64 }"> + %6 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i64)> %7 = llvm.mlir.constant(4 : index) : !llvm.i64 %8 = llvm.mul %5, %7 : !llvm.i64 - %9 = llvm.call @malloc(%8) : (!llvm.i64) -> !llvm<"i8*"> - %10 = llvm.bitcast %9 : !llvm<"i8*"> to !llvm<"float*"> - %11 = llvm.insertvalue %10, %6[0] : !llvm<"{ float*, i64, i64 }"> - %12 = llvm.insertvalue %arg0, %11[1] : !llvm<"{ float*, i64, i64 }"> - %13 = llvm.insertvalue %0, %12[2] : !llvm<"{ float*, i64, i64 }"> + %9 = llvm.call @malloc(%8) : (!llvm.i64) -> !llvm.ptr + %10 = llvm.bitcast %9 : !llvm.ptr to !llvm.ptr + %11 = llvm.insertvalue %10, %6[0] : !llvm.struct<(ptr, i64, i64)> + %12 = llvm.insertvalue %arg0, %11[1] : !llvm.struct<(ptr, i64, i64)> + %13 = llvm.insertvalue %0, %12[2] : !llvm.struct<(ptr, i64, i64)> // CHECK-NEXT: %{{[0-9]+}} = call i64 @get_index() // CHECK-NEXT: %{{[0-9]+}} = call i64 @get_index() @@ -610,18 +610,18 @@ llvm.func @store_load_mixed(%arg0: !llvm.i64) { // CHECK-NEXT: %{{[0-9]+}} = extractvalue { float*, i64, i64 } %{{[0-9]+}}, 0 // CHECK-NEXT: %{{[0-9]+}} = getelementptr float, float* %{{[0-9]+}}, i64 %{{[0-9]+}} // CHECK-NEXT: store float 4.200000e+01, float* %{{[0-9]+}} - %20 = llvm.extractvalue %13[1] : !llvm<"{ float*, i64, i64 }"> + %20 = llvm.extractvalue %13[1] : !llvm.struct<(ptr, i64, i64)> %21 = llvm.mlir.constant(4 : index) : !llvm.i64 - %22 = llvm.extractvalue %13[2] : !llvm<"{ float*, i64, i64 }"> + %22 = llvm.extractvalue %13[2] : !llvm.struct<(ptr, i64, i64)> %23 = llvm.mul %14, %20 : !llvm.i64 %24 = llvm.add %23, %15 : !llvm.i64 %25 = llvm.mul %24, %21 : !llvm.i64 %26 = llvm.add %25, %16 : !llvm.i64 %27 = llvm.mul %26, %22 : !llvm.i64 %28 = llvm.add %27, %17 : !llvm.i64 - %29 = llvm.extractvalue %13[0] : !llvm<"{ float*, i64, i64 }"> - %30 = llvm.getelementptr %29[%28] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - llvm.store %18, %30 : !llvm<"float*"> + %29 = llvm.extractvalue %13[0] : !llvm.struct<(ptr, i64, i64)> + %30 = llvm.getelementptr %29[%28] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %18, %30 : !llvm.ptr // CHECK-NEXT: %{{[0-9]+}} = extractvalue { float*, i64, i64 } %{{[0-9]+}}, 1 // CHECK-NEXT: %{{[0-9]+}} = extractvalue { float*, i64, i64 } %{{[0-9]+}}, 2 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, %{{[0-9]+}} @@ -634,24 +634,24 @@ llvm.func @store_load_mixed(%arg0: !llvm.i64) { // CHECK-NEXT: %{{[0-9]+}} = getelementptr float, float* %{{[0-9]+}}, i64 %{{[0-9]+}} // CHECK-NEXT: %{{[0-9]+}} = load float, float* %{{[0-9]+}} %31 = llvm.mlir.constant(2 : index) : !llvm.i64 - %32 = llvm.extractvalue %13[1] : !llvm<"{ float*, i64, i64 }"> + %32 = llvm.extractvalue %13[1] : !llvm.struct<(ptr, i64, i64)> %33 = llvm.mlir.constant(4 : index) : !llvm.i64 - %34 = llvm.extractvalue %13[2] : !llvm<"{ float*, i64, i64 }"> + %34 = llvm.extractvalue %13[2] : !llvm.struct<(ptr, i64, i64)> %35 = llvm.mul %17, %32 : !llvm.i64 %36 = llvm.add %35, %16 : !llvm.i64 %37 = llvm.mul %36, %33 : !llvm.i64 %38 = llvm.add %37, %15 : !llvm.i64 %39 = llvm.mul %38, %34 : !llvm.i64 %40 = llvm.add %39, %14 : !llvm.i64 - %41 = llvm.extractvalue %13[0] : !llvm<"{ float*, i64, i64 }"> - %42 = llvm.getelementptr %41[%40] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %43 = llvm.load %42 : !llvm<"float*"> + %41 = llvm.extractvalue %13[0] : !llvm.struct<(ptr, i64, i64)> + %42 = llvm.getelementptr %41[%40] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %43 = llvm.load %42 : !llvm.ptr // CHECK-NEXT: ret void llvm.return } // CHECK-LABEL: define { float*, i64 } @memref_args_rets({ float* } {{%.*}}, { float*, i64 } {{%.*}}, { float*, i64 } {{%.*}}) -llvm.func @memref_args_rets(%arg0: !llvm<"{ float* }">, %arg1: !llvm<"{ float*, i64 }">, %arg2: !llvm<"{ float*, i64 }">) -> !llvm<"{ float*, i64 }"> { +llvm.func @memref_args_rets(%arg0: !llvm.struct<(ptr)>, %arg1: !llvm.struct<(ptr, i64)>, %arg2: !llvm.struct<(ptr, i64)>) -> !llvm.struct<(ptr, i64)> { %0 = llvm.mlir.constant(7 : index) : !llvm.i64 // CHECK-NEXT: %{{[0-9]+}} = call i64 @get_index() %1 = llvm.call @get_index() : () -> !llvm.i64 @@ -660,17 +660,17 @@ llvm.func @memref_args_rets(%arg0: !llvm<"{ float* }">, %arg1: !llvm<"{ float*, // CHECK-NEXT: %{{[0-9]+}} = getelementptr float, float* %{{[0-9]+}}, i64 7 // CHECK-NEXT: store float 4.200000e+01, float* %{{[0-9]+}} %3 = llvm.mlir.constant(10 : index) : !llvm.i64 - %4 = llvm.extractvalue %arg0[0] : !llvm<"{ float* }"> - %5 = llvm.getelementptr %4[%0] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - llvm.store %2, %5 : !llvm<"float*"> + %4 = llvm.extractvalue %arg0[0] : !llvm.struct<(ptr)> + %5 = llvm.getelementptr %4[%0] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %2, %5 : !llvm.ptr // CHECK-NEXT: %{{[0-9]+}} = extractvalue { float*, i64 } %{{[0-9]+}}, 1 // CHECK-NEXT: %{{[0-9]+}} = extractvalue { float*, i64 } %{{[0-9]+}}, 0 // CHECK-NEXT: %{{[0-9]+}} = getelementptr float, float* %{{[0-9]+}}, i64 7 // CHECK-NEXT: store float 4.200000e+01, float* %{{[0-9]+}} - %6 = llvm.extractvalue %arg1[1] : !llvm<"{ float*, i64 }"> - %7 = llvm.extractvalue %arg1[0] : !llvm<"{ float*, i64 }"> - %8 = llvm.getelementptr %7[%0] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - llvm.store %2, %8 : !llvm<"float*"> + %6 = llvm.extractvalue %arg1[1] : !llvm.struct<(ptr, i64)> + %7 = llvm.extractvalue %arg1[0] : !llvm.struct<(ptr, i64)> + %8 = llvm.getelementptr %7[%0] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %2, %8 : !llvm.ptr // CHECK-NEXT: %{{[0-9]+}} = extractvalue { float*, i64 } %{{[0-9]+}}, 1 // CHECK-NEXT: %{{[0-9]+}} = mul i64 7, %{{[0-9]+}} // CHECK-NEXT: %{{[0-9]+}} = add i64 %{{[0-9]+}}, %{{[0-9]+}} @@ -678,12 +678,12 @@ llvm.func @memref_args_rets(%arg0: !llvm<"{ float* }">, %arg1: !llvm<"{ float*, // CHECK-NEXT: %{{[0-9]+}} = getelementptr float, float* %{{[0-9]+}}, i64 %{{[0-9]+}} // CHECK-NEXT: store float 4.200000e+01, float* %{{[0-9]+}} %9 = llvm.mlir.constant(10 : index) : !llvm.i64 - %10 = llvm.extractvalue %arg2[1] : !llvm<"{ float*, i64 }"> + %10 = llvm.extractvalue %arg2[1] : !llvm.struct<(ptr, i64)> %11 = llvm.mul %0, %10 : !llvm.i64 %12 = llvm.add %11, %1 : !llvm.i64 - %13 = llvm.extractvalue %arg2[0] : !llvm<"{ float*, i64 }"> - %14 = llvm.getelementptr %13[%12] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - llvm.store %2, %14 : !llvm<"float*"> + %13 = llvm.extractvalue %arg2[0] : !llvm.struct<(ptr, i64)> + %14 = llvm.getelementptr %13[%12] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %2, %14 : !llvm.ptr // CHECK-NEXT: %{{[0-9]+}} = mul i64 10, %{{[0-9]+}} // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 4 // CHECK-NEXT: %{{[0-9]+}} = call i8* @malloc(i64 %{{[0-9]+}}) @@ -692,28 +692,28 @@ llvm.func @memref_args_rets(%arg0: !llvm<"{ float* }">, %arg1: !llvm<"{ float*, // CHECK-NEXT: %{{[0-9]+}} = insertvalue { float*, i64 } %{{[0-9]+}}, i64 %{{[0-9]+}}, 1 %15 = llvm.mlir.constant(10 : index) : !llvm.i64 %16 = llvm.mul %15, %1 : !llvm.i64 - %17 = llvm.mlir.undef : !llvm<"{ float*, i64 }"> + %17 = llvm.mlir.undef : !llvm.struct<(ptr, i64)> %18 = llvm.mlir.constant(4 : index) : !llvm.i64 %19 = llvm.mul %16, %18 : !llvm.i64 - %20 = llvm.call @malloc(%19) : (!llvm.i64) -> !llvm<"i8*"> - %21 = llvm.bitcast %20 : !llvm<"i8*"> to !llvm<"float*"> - %22 = llvm.insertvalue %21, %17[0] : !llvm<"{ float*, i64 }"> - %23 = llvm.insertvalue %1, %22[1] : !llvm<"{ float*, i64 }"> + %20 = llvm.call @malloc(%19) : (!llvm.i64) -> !llvm.ptr + %21 = llvm.bitcast %20 : !llvm.ptr to !llvm.ptr + %22 = llvm.insertvalue %21, %17[0] : !llvm.struct<(ptr, i64)> + %23 = llvm.insertvalue %1, %22[1] : !llvm.struct<(ptr, i64)> // CHECK-NEXT: ret { float*, i64 } %{{[0-9]+}} - llvm.return %23 : !llvm<"{ float*, i64 }"> + llvm.return %23 : !llvm.struct<(ptr, i64)> } // CHECK-LABEL: define i64 @memref_dim({ float*, i64, i64 } {{%.*}}) -llvm.func @memref_dim(%arg0: !llvm<"{ float*, i64, i64 }">) -> !llvm.i64 { +llvm.func @memref_dim(%arg0: !llvm.struct<(ptr, i64, i64)>) -> !llvm.i64 { // Expecting this to create an LLVM constant. %0 = llvm.mlir.constant(42 : index) : !llvm.i64 // CHECK-NEXT: %2 = extractvalue { float*, i64, i64 } %0, 1 - %1 = llvm.extractvalue %arg0[1] : !llvm<"{ float*, i64, i64 }"> + %1 = llvm.extractvalue %arg0[1] : !llvm.struct<(ptr, i64, i64)> // Expecting this to create an LLVM constant. %2 = llvm.mlir.constant(10 : index) : !llvm.i64 // CHECK-NEXT: %3 = extractvalue { float*, i64, i64 } %0, 2 - %3 = llvm.extractvalue %arg0[2] : !llvm<"{ float*, i64, i64 }"> + %3 = llvm.extractvalue %arg0[2] : !llvm.struct<(ptr, i64, i64)> // Checking that the constant for d0 has been created. // CHECK-NEXT: %4 = add i64 42, %2 %4 = llvm.add %0, %1 : !llvm.i64 @@ -728,22 +728,22 @@ llvm.func @memref_dim(%arg0: !llvm<"{ float*, i64, i64 }">) -> !llvm.i64 { llvm.func @get_i64() -> !llvm.i64 llvm.func @get_f32() -> !llvm.float -llvm.func @get_memref() -> !llvm<"{ float*, i64, i64 }"> +llvm.func @get_memref() -> !llvm.struct<(ptr, i64, i64)> // CHECK-LABEL: define { i64, float, { float*, i64, i64 } } @multireturn() -llvm.func @multireturn() -> !llvm<"{ i64, float, { float*, i64, i64 } }"> { +llvm.func @multireturn() -> !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> { %0 = llvm.call @get_i64() : () -> !llvm.i64 %1 = llvm.call @get_f32() : () -> !llvm.float - %2 = llvm.call @get_memref() : () -> !llvm<"{ float*, i64, i64 }"> + %2 = llvm.call @get_memref() : () -> !llvm.struct<(ptr, i64, i64)> // CHECK: %{{[0-9]+}} = insertvalue { i64, float, { float*, i64, i64 } } undef, i64 %{{[0-9]+}}, 0 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { i64, float, { float*, i64, i64 } } %{{[0-9]+}}, float %{{[0-9]+}}, 1 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { i64, float, { float*, i64, i64 } } %{{[0-9]+}}, { float*, i64, i64 } %{{[0-9]+}}, 2 // CHECK-NEXT: ret { i64, float, { float*, i64, i64 } } %{{[0-9]+}} - %3 = llvm.mlir.undef : !llvm<"{ i64, float, { float*, i64, i64 } }"> - %4 = llvm.insertvalue %0, %3[0] : !llvm<"{ i64, float, { float*, i64, i64 } }"> - %5 = llvm.insertvalue %1, %4[1] : !llvm<"{ i64, float, { float*, i64, i64 } }"> - %6 = llvm.insertvalue %2, %5[2] : !llvm<"{ i64, float, { float*, i64, i64 } }"> - llvm.return %6 : !llvm<"{ i64, float, { float*, i64, i64 } }"> + %3 = llvm.mlir.undef : !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> + %4 = llvm.insertvalue %0, %3[0] : !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> + %5 = llvm.insertvalue %1, %4[1] : !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> + %6 = llvm.insertvalue %2, %5[2] : !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> + llvm.return %6 : !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> } @@ -753,10 +753,10 @@ llvm.func @multireturn_caller() { // CHECK-NEXT: [[ret0:%[0-9]+]] = extractvalue { i64, float, { float*, i64, i64 } } %1, 0 // CHECK-NEXT: [[ret1:%[0-9]+]] = extractvalue { i64, float, { float*, i64, i64 } } %1, 1 // CHECK-NEXT: [[ret2:%[0-9]+]] = extractvalue { i64, float, { float*, i64, i64 } } %1, 2 - %0 = llvm.call @multireturn() : () -> !llvm<"{ i64, float, { float*, i64, i64 } }"> - %1 = llvm.extractvalue %0[0] : !llvm<"{ i64, float, { float*, i64, i64 } }"> - %2 = llvm.extractvalue %0[1] : !llvm<"{ i64, float, { float*, i64, i64 } }"> - %3 = llvm.extractvalue %0[2] : !llvm<"{ i64, float, { float*, i64, i64 } }"> + %0 = llvm.call @multireturn() : () -> !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> + %1 = llvm.extractvalue %0[0] : !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> + %2 = llvm.extractvalue %0[1] : !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> + %3 = llvm.extractvalue %0[2] : !llvm.struct<(i64, float, struct<(ptr, i64, i64)>)> %4 = llvm.mlir.constant(42) : !llvm.i64 // CHECK: add i64 [[ret0]], 42 %5 = llvm.add %1, %4 : !llvm.i64 @@ -766,86 +766,86 @@ llvm.func @multireturn_caller() { %8 = llvm.mlir.constant(0 : index) : !llvm.i64 %9 = llvm.mlir.constant(42 : index) : !llvm.i64 // CHECK: extractvalue { float*, i64, i64 } [[ret2]], 0 - %10 = llvm.extractvalue %3[1] : !llvm<"{ float*, i64, i64 }"> + %10 = llvm.extractvalue %3[1] : !llvm.struct<(ptr, i64, i64)> %11 = llvm.mlir.constant(10 : index) : !llvm.i64 - %12 = llvm.extractvalue %3[2] : !llvm<"{ float*, i64, i64 }"> + %12 = llvm.extractvalue %3[2] : !llvm.struct<(ptr, i64, i64)> %13 = llvm.mul %8, %10 : !llvm.i64 %14 = llvm.add %13, %8 : !llvm.i64 %15 = llvm.mul %14, %11 : !llvm.i64 %16 = llvm.add %15, %8 : !llvm.i64 %17 = llvm.mul %16, %12 : !llvm.i64 %18 = llvm.add %17, %8 : !llvm.i64 - %19 = llvm.extractvalue %3[0] : !llvm<"{ float*, i64, i64 }"> - %20 = llvm.getelementptr %19[%18] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %21 = llvm.load %20 : !llvm<"float*"> + %19 = llvm.extractvalue %3[0] : !llvm.struct<(ptr, i64, i64)> + %20 = llvm.getelementptr %19[%18] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %21 = llvm.load %20 : !llvm.ptr llvm.return } // CHECK-LABEL: define <4 x float> @vector_ops(<4 x float> {{%.*}}, <4 x i1> {{%.*}}, <4 x i64> {{%.*}}) -llvm.func @vector_ops(%arg0: !llvm<"<4 x float>">, %arg1: !llvm<"<4 x i1>">, %arg2: !llvm<"<4 x i64>">) -> !llvm<"<4 x float>"> { - %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : !llvm<"<4 x float>"> +llvm.func @vector_ops(%arg0: !llvm.vec<4 x float>, %arg1: !llvm.vec<4 x i1>, %arg2: !llvm.vec<4 x i64>) -> !llvm.vec<4 x float> { + %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : !llvm.vec<4 x float> // CHECK-NEXT: %4 = fadd <4 x float> %0, - %1 = llvm.fadd %arg0, %0 : !llvm<"<4 x float>"> + %1 = llvm.fadd %arg0, %0 : !llvm.vec<4 x float> // CHECK-NEXT: %5 = select <4 x i1> %1, <4 x float> %4, <4 x float> %0 - %2 = llvm.select %arg1, %1, %arg0 : !llvm<"<4 x i1>">, !llvm<"<4 x float>"> + %2 = llvm.select %arg1, %1, %arg0 : !llvm.vec<4 x i1>, !llvm.vec<4 x float> // CHECK-NEXT: %6 = sdiv <4 x i64> %2, %2 - %3 = llvm.sdiv %arg2, %arg2 : !llvm<"<4 x i64>"> + %3 = llvm.sdiv %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: %7 = udiv <4 x i64> %2, %2 - %4 = llvm.udiv %arg2, %arg2 : !llvm<"<4 x i64>"> + %4 = llvm.udiv %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: %8 = srem <4 x i64> %2, %2 - %5 = llvm.srem %arg2, %arg2 : !llvm<"<4 x i64>"> + %5 = llvm.srem %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: %9 = urem <4 x i64> %2, %2 - %6 = llvm.urem %arg2, %arg2 : !llvm<"<4 x i64>"> + %6 = llvm.urem %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: %10 = fdiv <4 x float> %0, - %7 = llvm.fdiv %arg0, %0 : !llvm<"<4 x float>"> + %7 = llvm.fdiv %arg0, %0 : !llvm.vec<4 x float> // CHECK-NEXT: %11 = frem <4 x float> %0, - %8 = llvm.frem %arg0, %0 : !llvm<"<4 x float>"> + %8 = llvm.frem %arg0, %0 : !llvm.vec<4 x float> // CHECK-NEXT: %12 = and <4 x i64> %2, %2 - %9 = llvm.and %arg2, %arg2 : !llvm<"<4 x i64>"> + %9 = llvm.and %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: %13 = or <4 x i64> %2, %2 - %10 = llvm.or %arg2, %arg2 : !llvm<"<4 x i64>"> + %10 = llvm.or %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: %14 = xor <4 x i64> %2, %2 - %11 = llvm.xor %arg2, %arg2 : !llvm<"<4 x i64>"> + %11 = llvm.xor %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: %15 = shl <4 x i64> %2, %2 - %12 = llvm.shl %arg2, %arg2 : !llvm<"<4 x i64>"> + %12 = llvm.shl %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: %16 = lshr <4 x i64> %2, %2 - %13 = llvm.lshr %arg2, %arg2 : !llvm<"<4 x i64>"> + %13 = llvm.lshr %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: %17 = ashr <4 x i64> %2, %2 - %14 = llvm.ashr %arg2, %arg2 : !llvm<"<4 x i64>"> + %14 = llvm.ashr %arg2, %arg2 : !llvm.vec<4 x i64> // CHECK-NEXT: ret <4 x float> %4 - llvm.return %1 : !llvm<"<4 x float>"> + llvm.return %1 : !llvm.vec<4 x float> } // CHECK-LABEL: @vector_splat_1d -llvm.func @vector_splat_1d() -> !llvm<"<4 x float>"> { +llvm.func @vector_splat_1d() -> !llvm.vec<4 x float> { // CHECK: ret <4 x float> zeroinitializer - %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4xf32>) : !llvm<"<4 x float>"> - llvm.return %0 : !llvm<"<4 x float>"> + %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4xf32>) : !llvm.vec<4 x float> + llvm.return %0 : !llvm.vec<4 x float> } // CHECK-LABEL: @vector_splat_2d -llvm.func @vector_splat_2d() -> !llvm<"[4 x <16 x float>]"> { +llvm.func @vector_splat_2d() -> !llvm.array<4 x vec<16 x float>> { // CHECK: ret [4 x <16 x float>] zeroinitializer - %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16xf32>) : !llvm<"[4 x <16 x float>]"> - llvm.return %0 : !llvm<"[4 x <16 x float>]"> + %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16xf32>) : !llvm.array<4 x vec<16 x float>> + llvm.return %0 : !llvm.array<4 x vec<16 x float>> } // CHECK-LABEL: @vector_splat_3d -llvm.func @vector_splat_3d() -> !llvm<"[4 x [16 x <4 x float>]]"> { +llvm.func @vector_splat_3d() -> !llvm.array<4 x array<16 x vec<4 x float>>> { // CHECK: ret [4 x [16 x <4 x float>]] zeroinitializer - %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16x4xf32>) : !llvm<"[4 x [16 x <4 x float>]]"> - llvm.return %0 : !llvm<"[4 x [16 x <4 x float>]]"> + %0 = llvm.mlir.constant(dense<0.000000e+00> : vector<4x16x4xf32>) : !llvm.array<4 x array<16 x vec<4 x float>>> + llvm.return %0 : !llvm.array<4 x array<16 x vec<4 x float>>> } // CHECK-LABEL: @vector_splat_nonzero -llvm.func @vector_splat_nonzero() -> !llvm<"<4 x float>"> { +llvm.func @vector_splat_nonzero() -> !llvm.vec<4 x float> { // CHECK: ret <4 x float> - %0 = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm<"<4 x float>"> - llvm.return %0 : !llvm<"<4 x float>"> + %0 = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : !llvm.vec<4 x float> + llvm.return %0 : !llvm.vec<4 x float> } // CHECK-LABEL: @ops -llvm.func @ops(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.i32, %arg3: !llvm.i32) -> !llvm<"{ float, i32 }"> { +llvm.func @ops(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.i32, %arg3: !llvm.i32) -> !llvm.struct<(float, i32)> { // CHECK-NEXT: fsub float %0, %1 %0 = llvm.fsub %arg0, %arg1 : !llvm.float // CHECK-NEXT: %6 = sub i32 %2, %3 @@ -863,9 +863,9 @@ llvm.func @ops(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.i32, %arg3: // CHECK-NEXT: %12 = urem i32 %2, %3 %7 = llvm.urem %arg2, %arg3 : !llvm.i32 - %8 = llvm.mlir.undef : !llvm<"{ float, i32 }"> - %9 = llvm.insertvalue %0, %8[0] : !llvm<"{ float, i32 }"> - %10 = llvm.insertvalue %3, %9[1] : !llvm<"{ float, i32 }"> + %8 = llvm.mlir.undef : !llvm.struct<(float, i32)> + %9 = llvm.insertvalue %0, %8[0] : !llvm.struct<(float, i32)> + %10 = llvm.insertvalue %3, %9[1] : !llvm.struct<(float, i32)> // CHECK: %15 = fdiv float %0, %1 %11 = llvm.fdiv %arg0, %arg1 : !llvm.float @@ -888,7 +888,7 @@ llvm.func @ops(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.i32, %arg3: // CHECK-NEXT: fneg float %0 %19 = llvm.fneg %arg0 : !llvm.float - llvm.return %10 : !llvm<"{ float, i32 }"> + llvm.return %10 : !llvm.struct<(float, i32)> } // @@ -898,14 +898,14 @@ llvm.func @ops(%arg0: !llvm.float, %arg1: !llvm.float, %arg2: !llvm.i32, %arg3: // CHECK-LABEL: define void @indirect_const_call(i64 {{%.*}}) llvm.func @indirect_const_call(%arg0: !llvm.i64) { // CHECK-NEXT: call void @body(i64 %0) - %0 = llvm.mlir.addressof @body : !llvm<"void (i64)*"> + %0 = llvm.mlir.addressof @body : !llvm.ptr> llvm.call %0(%arg0) : (!llvm.i64) -> () // CHECK-NEXT: ret void llvm.return } // CHECK-LABEL: define i32 @indirect_call(i32 (float)* {{%.*}}, float {{%.*}}) -llvm.func @indirect_call(%arg0: !llvm<"i32 (float)*">, %arg1: !llvm.float) -> !llvm.i32 { +llvm.func @indirect_call(%arg0: !llvm.ptr>, %arg1: !llvm.float) -> !llvm.i32 { // CHECK-NEXT: %3 = call i32 %0(float %1) %0 = llvm.call %arg0(%arg1) : (!llvm.float) -> !llvm.i32 // CHECK-NEXT: ret i32 %3 @@ -935,12 +935,12 @@ llvm.func @cond_br_arguments(%arg0: !llvm.i1, %arg1: !llvm.i1) { } // CHECK-LABEL: define void @llvm_noalias(float* noalias {{%*.}}) -llvm.func @llvm_noalias(%arg0: !llvm<"float*"> {llvm.noalias = true}) { +llvm.func @llvm_noalias(%arg0: !llvm.ptr {llvm.noalias = true}) { llvm.return } // CHECK-LABEL: define void @llvm_align(float* align 4 {{%*.}}) -llvm.func @llvm_align(%arg0: !llvm<"float*"> {llvm.align = 4}) { +llvm.func @llvm_align(%arg0: !llvm.ptr {llvm.align = 4}) { llvm.return } @@ -950,8 +950,8 @@ llvm.func @llvm_varargs(...) llvm.func @intpointerconversion(%arg0 : !llvm.i32) -> !llvm.i32 { // CHECK: %2 = inttoptr i32 %0 to i32* // CHECK-NEXT: %3 = ptrtoint i32* %2 to i32 - %1 = llvm.inttoptr %arg0 : !llvm.i32 to !llvm<"i32*"> - %2 = llvm.ptrtoint %1 : !llvm<"i32*"> to !llvm.i32 + %1 = llvm.inttoptr %arg0 : !llvm.i32 to !llvm.ptr + %2 = llvm.ptrtoint %1 : !llvm.ptr to !llvm.i32 llvm.return %2 : !llvm.i32 } @@ -968,16 +968,16 @@ llvm.func @fpconversion(%arg0 : !llvm.i32) -> !llvm.i32 { } // CHECK-LABEL: @addrspace -llvm.func @addrspace(%arg0 : !llvm<"i32*">) -> !llvm<"i32 addrspace(2)*"> { +llvm.func @addrspace(%arg0 : !llvm.ptr) -> !llvm.ptr { // CHECK: %2 = addrspacecast i32* %0 to i32 addrspace(2)* - %1 = llvm.addrspacecast %arg0 : !llvm<"i32*"> to !llvm<"i32 addrspace(2)*"> - llvm.return %1 : !llvm<"i32 addrspace(2)*"> + %1 = llvm.addrspacecast %arg0 : !llvm.ptr to !llvm.ptr + llvm.return %1 : !llvm.ptr } -llvm.func @stringconstant() -> !llvm<"i8*"> { - %1 = llvm.mlir.constant("Hello world!") : !llvm<"i8*"> +llvm.func @stringconstant() -> !llvm.ptr { + %1 = llvm.mlir.constant("Hello world!") : !llvm.ptr // CHECK: ret [12 x i8] c"Hello world!" - llvm.return %1 : !llvm<"i8*"> + llvm.return %1 : !llvm.ptr } llvm.func @noreach() { @@ -1019,22 +1019,22 @@ llvm.func @fcmp(%arg0: !llvm.float, %arg1: !llvm.float) { } // CHECK-LABEL: @vect -llvm.func @vect(%arg0: !llvm<"<4 x float>">, %arg1: !llvm.i32, %arg2: !llvm.float) { +llvm.func @vect(%arg0: !llvm.vec<4 x float>, %arg1: !llvm.i32, %arg2: !llvm.float) { // CHECK-NEXT: extractelement <4 x float> {{.*}}, i32 // CHECK-NEXT: insertelement <4 x float> {{.*}}, float %2, i32 // CHECK-NEXT: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <5 x i32> - %0 = llvm.extractelement %arg0[%arg1 : !llvm.i32] : !llvm<"<4 x float>"> - %1 = llvm.insertelement %arg2, %arg0[%arg1 : !llvm.i32] : !llvm<"<4 x float>"> - %2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>"> + %0 = llvm.extractelement %arg0[%arg1 : !llvm.i32] : !llvm.vec<4 x float> + %1 = llvm.insertelement %arg2, %arg0[%arg1 : !llvm.i32] : !llvm.vec<4 x float> + %2 = llvm.shufflevector %arg0, %arg0 [0 : i32, 0 : i32, 0 : i32, 0 : i32, 7 : i32] : !llvm.vec<4 x float>, !llvm.vec<4 x float> llvm.return } // CHECK-LABEL: @vect_i64idx -llvm.func @vect_i64idx(%arg0: !llvm<"<4 x float>">, %arg1: !llvm.i64, %arg2: !llvm.float) { +llvm.func @vect_i64idx(%arg0: !llvm.vec<4 x float>, %arg1: !llvm.i64, %arg2: !llvm.float) { // CHECK-NEXT: extractelement <4 x float> {{.*}}, i64 // CHECK-NEXT: insertelement <4 x float> {{.*}}, float %2, i64 - %0 = llvm.extractelement %arg0[%arg1 : !llvm.i64] : !llvm<"<4 x float>"> - %1 = llvm.insertelement %arg2, %arg0[%arg1 : !llvm.i64] : !llvm<"<4 x float>"> + %0 = llvm.extractelement %arg0[%arg1 : !llvm.i64] : !llvm.vec<4 x float> + %1 = llvm.insertelement %arg2, %arg0[%arg1 : !llvm.i64] : !llvm.vec<4 x float> llvm.return } @@ -1043,27 +1043,27 @@ llvm.func @alloca(%size : !llvm.i64) { // Alignment automatically set by the LLVM IR builder when alignment attribute // is 0. // CHECK: alloca {{.*}} align 4 - llvm.alloca %size x !llvm.i32 {alignment = 0} : (!llvm.i64) -> (!llvm<"i32*">) + llvm.alloca %size x !llvm.i32 {alignment = 0} : (!llvm.i64) -> (!llvm.ptr) // CHECK-NEXT: alloca {{.*}} align 8 - llvm.alloca %size x !llvm.i32 {alignment = 8} : (!llvm.i64) -> (!llvm<"i32*">) + llvm.alloca %size x !llvm.i32 {alignment = 8} : (!llvm.i64) -> (!llvm.ptr) llvm.return } // CHECK-LABEL: @constants -llvm.func @constants() -> !llvm<"<4 x float>"> { +llvm.func @constants() -> !llvm.vec<4 x float> { // CHECK: ret <4 x float> - %0 = llvm.mlir.constant(sparse<[[0]], [4.2e+01]> : vector<4xf32>) : !llvm<"<4 x float>"> - llvm.return %0 : !llvm<"<4 x float>"> + %0 = llvm.mlir.constant(sparse<[[0]], [4.2e+01]> : vector<4xf32>) : !llvm.vec<4 x float> + llvm.return %0 : !llvm.vec<4 x float> } // CHECK-LABEL: @fp_casts -llvm.func @fp_casts(%fp1 : !llvm<"float">, %fp2 : !llvm<"double">) -> !llvm.i16 { +llvm.func @fp_casts(%fp1 : !llvm.float, %fp2 : !llvm.double) -> !llvm.i16 { // CHECK: fptrunc double {{.*}} to float - %a = llvm.fptrunc %fp2 : !llvm<"double"> to !llvm<"float"> + %a = llvm.fptrunc %fp2 : !llvm.double to !llvm.float // CHECK: fpext float {{.*}} to double - %b = llvm.fpext %fp1 : !llvm<"float"> to !llvm<"double"> + %b = llvm.fpext %fp1 : !llvm.float to !llvm.double // CHECK: fptosi double {{.*}} to i16 - %c = llvm.fptosi %b : !llvm<"double"> to !llvm.i16 + %c = llvm.fptosi %b : !llvm.double to !llvm.i16 llvm.return %c : !llvm.i16 } @@ -1080,35 +1080,35 @@ llvm.func @integer_extension_and_truncation(%a : !llvm.i32) { // Check that the auxiliary `null` operation is converted into a `null` value. // CHECK-LABEL: @null -llvm.func @null() -> !llvm<"i32*"> { - %0 = llvm.mlir.null : !llvm<"i32*"> +llvm.func @null() -> !llvm.ptr { + %0 = llvm.mlir.null : !llvm.ptr // CHECK: ret i32* null - llvm.return %0 : !llvm<"i32*"> + llvm.return %0 : !llvm.ptr } // Check that dense elements attributes are exported properly in constants. // CHECK-LABEL: @elements_constant_3d_vector -llvm.func @elements_constant_3d_vector() -> !llvm<"[2 x [2 x <2 x i32>]]"> { +llvm.func @elements_constant_3d_vector() -> !llvm.array<2 x array<2 x vec<2 x i32>>> { // CHECK: ret [2 x [2 x <2 x i32>]] // CHECK-SAME: {{\[}}[2 x <2 x i32>] [<2 x i32> , <2 x i32> ], // CHECK-SAME: [2 x <2 x i32>] [<2 x i32> , <2 x i32> ]] - %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : vector<2x2x2xi32>) : !llvm<"[2 x [2 x <2 x i32>]]"> - llvm.return %0 : !llvm<"[2 x [2 x <2 x i32>]]"> + %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : vector<2x2x2xi32>) : !llvm.array<2 x array<2 x vec<2 x i32>>> + llvm.return %0 : !llvm.array<2 x array<2 x vec<2 x i32>>> } // CHECK-LABEL: @elements_constant_3d_array -llvm.func @elements_constant_3d_array() -> !llvm<"[2 x [2 x [2 x i32]]]"> { +llvm.func @elements_constant_3d_array() -> !llvm.array<2 x array<2 x array<2 x i32>>> { // CHECK: ret [2 x [2 x [2 x i32]]] // CHECK-SAME: {{\[}}[2 x [2 x i32]] {{\[}}[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], // CHECK-SAME: [2 x [2 x i32]] {{\[}}[2 x i32] [i32 42, i32 43], [2 x i32] [i32 44, i32 45]]] - %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : tensor<2x2x2xi32>) : !llvm<"[2 x [2 x [2 x i32]]]"> - llvm.return %0 : !llvm<"[2 x [2 x [2 x i32]]]"> + %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : tensor<2x2x2xi32>) : !llvm.array<2 x array<2 x array<2 x i32>>> + llvm.return %0 : !llvm.array<2 x array<2 x array<2 x i32>>> } // CHECK-LABEL: @atomicrmw llvm.func @atomicrmw( - %f32_ptr : !llvm<"float*">, %f32 : !llvm.float, - %i32_ptr : !llvm<"i32*">, %i32 : !llvm.i32) { + %f32_ptr : !llvm.ptr, %f32 : !llvm.float, + %i32_ptr : !llvm.ptr, %i32 : !llvm.i32) { // CHECK: atomicrmw fadd float* %{{.*}}, float %{{.*}} unordered %0 = llvm.atomicrmw fadd %f32_ptr, %f32 unordered : !llvm.float // CHECK: atomicrmw fsub float* %{{.*}}, float %{{.*}} unordered @@ -1139,34 +1139,34 @@ llvm.func @atomicrmw( } // CHECK-LABEL: @cmpxchg -llvm.func @cmpxchg(%ptr : !llvm<"float*">, %cmp : !llvm.float, %val: !llvm.float) { +llvm.func @cmpxchg(%ptr : !llvm.ptr, %cmp : !llvm.float, %val: !llvm.float) { // CHECK: cmpxchg float* %{{.*}}, float %{{.*}}, float %{{.*}} acq_rel monotonic %0 = llvm.cmpxchg %ptr, %cmp, %val acq_rel monotonic : !llvm.float // CHECK: %{{[0-9]+}} = extractvalue { float, i1 } %{{[0-9]+}}, 0 - %1 = llvm.extractvalue %0[0] : !llvm<"{ float, i1 }"> + %1 = llvm.extractvalue %0[0] : !llvm.struct<(float, i1)> // CHECK: %{{[0-9]+}} = extractvalue { float, i1 } %{{[0-9]+}}, 1 - %2 = llvm.extractvalue %0[1] : !llvm<"{ float, i1 }"> + %2 = llvm.extractvalue %0[1] : !llvm.struct<(float, i1)> llvm.return } -llvm.mlir.global external constant @_ZTIi() : !llvm<"i8*"> -llvm.func @foo(!llvm<"i8*">) -llvm.func @bar(!llvm<"i8*">) -> !llvm<"i8*"> +llvm.mlir.global external constant @_ZTIi() : !llvm.ptr +llvm.func @foo(!llvm.ptr) +llvm.func @bar(!llvm.ptr) -> !llvm.ptr llvm.func @__gxx_personality_v0(...) -> !llvm.i32 // CHECK-LABEL: @invokeLandingpad llvm.func @invokeLandingpad() -> !llvm.i32 attributes { personality = @__gxx_personality_v0 } { // CHECK: %[[a1:[0-9]+]] = alloca i8 %0 = llvm.mlir.constant(0 : i32) : !llvm.i32 - %1 = llvm.mlir.constant("\01") : !llvm<"[1 x i8]"> - %2 = llvm.mlir.addressof @_ZTIi : !llvm<"i8**"> - %3 = llvm.bitcast %2 : !llvm<"i8**"> to !llvm<"i8*"> - %4 = llvm.mlir.null : !llvm<"i8**"> + %1 = llvm.mlir.constant("\01") : !llvm.array<1 x i8> + %2 = llvm.mlir.addressof @_ZTIi : !llvm.ptr> + %3 = llvm.bitcast %2 : !llvm.ptr> to !llvm.ptr + %4 = llvm.mlir.null : !llvm.ptr> %5 = llvm.mlir.constant(1 : i32) : !llvm.i32 - %6 = llvm.alloca %5 x !llvm.i8 : (!llvm.i32) -> !llvm<"i8*"> + %6 = llvm.alloca %5 x !llvm.i8 : (!llvm.i32) -> !llvm.ptr // CHECK: invoke void @foo(i8* %[[a1]]) // CHECK-NEXT: to label %[[normal:[0-9]+]] unwind label %[[unwind:[0-9]+]] - llvm.invoke @foo(%6) to ^bb2 unwind ^bb1 : (!llvm<"i8*">) -> () + llvm.invoke @foo(%6) to ^bb2 unwind ^bb1 : (!llvm.ptr) -> () // CHECK: [[unwind]]: ^bb1: @@ -1174,7 +1174,7 @@ llvm.func @invokeLandingpad() -> !llvm.i32 attributes { personality = @__gxx_per // CHECK-NEXT: catch i8** null // CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) // CHECK-NEXT: filter [1 x i8] c"\01" - %7 = llvm.landingpad (catch %4 : !llvm<"i8**">) (catch %3 : !llvm<"i8*">) (filter %1 : !llvm<"[1 x i8]">) : !llvm<"{ i8*, i32 }"> + %7 = llvm.landingpad (catch %4 : !llvm.ptr>) (catch %3 : !llvm.ptr) (filter %1 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> // CHECK: br label %[[final:[0-9]+]] llvm.br ^bb3 @@ -1187,7 +1187,7 @@ llvm.func @invokeLandingpad() -> !llvm.i32 attributes { personality = @__gxx_per // CHECK-NEXT: %{{[0-9]+}} = invoke i8* @bar(i8* %[[a1]]) // CHECK-NEXT: to label %[[normal]] unwind label %[[unwind]] ^bb3: // pred: ^bb1 - %8 = llvm.invoke @bar(%6) to ^bb2 unwind ^bb1 : (!llvm<"i8*">) -> !llvm<"i8*"> + %8 = llvm.invoke @bar(%6) to ^bb2 unwind ^bb1 : (!llvm.ptr) -> !llvm.ptr } // CHECK-LABEL: @callFreezeOp @@ -1235,9 +1235,9 @@ llvm.func @passthrough() attributes {passthrough = ["noinline", ["alignstack", " // ----- // CHECK-LABEL: @constant_bf16 -llvm.func @constant_bf16() -> !llvm<"bfloat"> { - %0 = llvm.mlir.constant(1.000000e+01 : bf16) : !llvm<"bfloat"> - llvm.return %0 : !llvm<"bfloat"> +llvm.func @constant_bf16() -> !llvm.bfloat { + %0 = llvm.mlir.constant(1.000000e+01 : bf16) : !llvm.bfloat + llvm.return %0 : !llvm.bfloat } // CHECK: ret bfloat 0xR4120 @@ -1248,9 +1248,9 @@ llvm.func @address_taken() { llvm.return } -llvm.mlir.global internal constant @taker_of_address() : !llvm<"void()*"> { - %0 = llvm.mlir.addressof @address_taken : !llvm<"void()*"> - llvm.return %0 : !llvm<"void()*"> +llvm.mlir.global internal constant @taker_of_address() : !llvm.ptr> { + %0 = llvm.mlir.addressof @address_taken : !llvm.ptr> + llvm.return %0 : !llvm.ptr> } // ----- @@ -1272,11 +1272,11 @@ llvm.func @cond_br_weights(%cond : !llvm.i1, %arg0 : !llvm.i32, %arg1 : !llvm.i llvm.func @volatile_store_and_load() { %val = llvm.mlir.constant(5 : i32) : !llvm.i32 %size = llvm.mlir.constant(1 : i64) : !llvm.i64 - %0 = llvm.alloca %size x !llvm.i32 : (!llvm.i64) -> (!llvm<"i32*">) + %0 = llvm.alloca %size x !llvm.i32 : (!llvm.i64) -> (!llvm.ptr) // CHECK: store volatile i32 5, i32* %{{.*}} - llvm.store volatile %val, %0 : !llvm<"i32*"> + llvm.store volatile %val, %0 : !llvm.ptr // CHECK: %{{.*}} = load volatile i32, i32* %{{.*}} - %1 = llvm.load volatile %0: !llvm<"i32*"> + %1 = llvm.load volatile %0: !llvm.ptr llvm.return } @@ -1286,11 +1286,11 @@ llvm.func @volatile_store_and_load() { llvm.func @nontemoral_store_and_load() { %val = llvm.mlir.constant(5 : i32) : !llvm.i32 %size = llvm.mlir.constant(1 : i64) : !llvm.i64 - %0 = llvm.alloca %size x !llvm.i32 : (!llvm.i64) -> (!llvm<"i32*">) + %0 = llvm.alloca %size x !llvm.i32 : (!llvm.i64) -> (!llvm.ptr) // CHECK: !nontemporal ![[NODE:[0-9]+]] - llvm.store %val, %0 {nontemporal} : !llvm<"i32*"> + llvm.store %val, %0 {nontemporal} : !llvm.ptr // CHECK: !nontemporal ![[NODE]] - %1 = llvm.load %0 {nontemporal} : !llvm<"i32*"> + %1 = llvm.load %0 {nontemporal} : !llvm.ptr llvm.return } diff --git a/mlir/test/Target/nvvmir.mlir b/mlir/test/Target/nvvmir.mlir index 7e8cfb6c0a389..fdb438b4e6a96 100644 --- a/mlir/test/Target/nvvmir.mlir +++ b/mlir/test/Target/nvvmir.mlir @@ -50,12 +50,12 @@ llvm.func @nvvm_shfl( llvm.func @nvvm_shfl_pred( %0 : !llvm.i32, %1 : !llvm.i32, %2 : !llvm.i32, - %3 : !llvm.i32, %4 : !llvm.float) -> !llvm<"{ i32, i1 }"> { + %3 : !llvm.i32, %4 : !llvm.float) -> !llvm.struct<(i32, i1)> { // CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %6 = nvvm.shfl.sync.bfly %0, %3, %1, %2 {return_value_and_is_valid} : !llvm<"{ i32, i1 }"> + %6 = nvvm.shfl.sync.bfly %0, %3, %1, %2 {return_value_and_is_valid} : !llvm.struct<(i32, i1)> // CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %7 = nvvm.shfl.sync.bfly %0, %4, %1, %2 {return_value_and_is_valid} : !llvm<"{ float, i1 }"> - llvm.return %6 : !llvm<"{ i32, i1 }"> + %7 = nvvm.shfl.sync.bfly %0, %4, %1, %2 {return_value_and_is_valid} : !llvm.struct<(float, i1)> + llvm.return %6 : !llvm.struct<(i32, i1)> } llvm.func @nvvm_vote(%0 : !llvm.i32, %1 : !llvm.i1) -> !llvm.i32 { @@ -64,13 +64,13 @@ llvm.func @nvvm_vote(%0 : !llvm.i32, %1 : !llvm.i1) -> !llvm.i32 { llvm.return %3 : !llvm.i32 } -llvm.func @nvvm_mma(%a0 : !llvm<"<2 x half>">, %a1 : !llvm<"<2 x half>">, - %b0 : !llvm<"<2 x half>">, %b1 : !llvm<"<2 x half>">, +llvm.func @nvvm_mma(%a0 : !llvm.vec<2 x half>, %a1 : !llvm.vec<2 x half>, + %b0 : !llvm.vec<2 x half>, %b1 : !llvm.vec<2 x half>, %c0 : !llvm.float, %c1 : !llvm.float, %c2 : !llvm.float, %c3 : !llvm.float, %c4 : !llvm.float, %c5 : !llvm.float, %c6 : !llvm.float, %c7 : !llvm.float) { // CHECK: call { float, float, float, float, float, float, float, float } @llvm.nvvm.mma.m8n8k4.row.col.f32.f32 - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm<"<2 x half>">, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm<"{ float, float, float, float, float, float, float, float }"> - llvm.return %0 : !llvm<"{ float, float, float, float, float, float, float, float }"> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (!llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.vec<2 x half>, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float, !llvm.float) -> !llvm.struct<(float, float, float, float, float, float, float, float)> + llvm.return %0 : !llvm.struct<(float, float, float, float, float, float, float, float)> } // This function has the "kernel" attribute attached and should appear in the diff --git a/mlir/test/Target/rocdl.mlir b/mlir/test/Target/rocdl.mlir index 127f654893519..949b1e74ae29a 100644 --- a/mlir/test/Target/rocdl.mlir +++ b/mlir/test/Target/rocdl.mlir @@ -43,133 +43,133 @@ llvm.func @rocdl.barrier() { } llvm.func @rocdl.xdlops(%arg0 : !llvm.float, %arg1 : !llvm.float, - %arg2 : !llvm<"<32 x float>">, %arg3 : !llvm.i32, - %arg4 : !llvm<"<16 x float>">, %arg5 : !llvm<"<4 x float>">, - %arg6 : !llvm<"<4 x half>">, %arg7 : !llvm<"<32 x i32>">, - %arg8 : !llvm<"<16 x i32>">, %arg9 : !llvm<"<4 x i32>">, - %arg10 : !llvm<"<2 x i16>">) -> !llvm<"<32 x float>"> { + %arg2 : !llvm.vec<32 x float>, %arg3 : !llvm.i32, + %arg4 : !llvm.vec<16 x float>, %arg5 : !llvm.vec<4 x float>, + %arg6 : !llvm.vec<4 x half>, %arg7 : !llvm.vec<32 x i32>, + %arg8 : !llvm.vec<16 x i32>, %arg9 : !llvm.vec<4 x i32>, + %arg10 : !llvm.vec<2 x i16>) -> !llvm.vec<32 x float> { // CHECK-LABEL: rocdl.xdlops // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %{{.*}}, float %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r0 = rocdl.mfma.f32.32x32x1f32 %arg0, %arg1, %arg2, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<32 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<32 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x float> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r1 = rocdl.mfma.f32.16x16x1f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r2 = rocdl.mfma.f32.16x16x4f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float %{{.*}}, float %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r3 = rocdl.mfma.f32.4x4x1f32 %arg0, %arg1, %arg5, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float %{{.*}}, float %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r4= rocdl.mfma.f32.32x32x2f32 %arg0, %arg1, %arg4, %arg3, %arg3, %arg3 : - (!llvm.float, !llvm.float, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.float, !llvm.float, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r5 = rocdl.mfma.f32.32x32x4f16 %arg6, %arg6, %arg2, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<32 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<32 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x float> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r6 = rocdl.mfma.f32.16x16x4f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r7 = rocdl.mfma.f32.4x4x4f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r8 = rocdl.mfma.f32.32x32x8f16 %arg6, %arg6, %arg4, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r9 = rocdl.mfma.f32.16x16x16f16 %arg6, %arg6, %arg5, %arg3, %arg3, %arg3 : - (!llvm<"<4 x half>">, !llvm<"<4 x half>">, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.vec<4 x half>, !llvm.vec<4 x half>, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> // CHECK: call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 %{{.*}}, i32 %{{.*}}, <32 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r10 = rocdl.mfma.i32.32x32x4i8 %arg3, %arg3, %arg7, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<32 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<32 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x i32> // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r11 = rocdl.mfma.i32.16x16x4i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<16 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<16 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x i32> // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r12 = rocdl.mfma.i32.4x4x4i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<4 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<4 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x i32> // CHECK: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %{{.*}}, i32 %{{.*}}, <16 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r13 = rocdl.mfma.i32.32x32x8i8 %arg3, %arg3, %arg8, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<16 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<16 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x i32> // CHECK: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %{{.*}}, i32 %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r14 = rocdl.mfma.i32.16x16x16i8 %arg3, %arg3, %arg9, %arg3, %arg3, %arg3 : - (!llvm.i32, !llvm.i32, !llvm<"<4 x i32>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x i32>"> + (!llvm.i32, !llvm.i32, !llvm.vec<4 x i32>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x i32> // CHECK: call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <32 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r15 = rocdl.mfma.f32.32x32x2bf16 %arg10, %arg10, %arg2, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<32 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<32 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<32 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<32 x float> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r16 = rocdl.mfma.f32.16x16x2bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r17 = rocdl.mfma.f32.4x4x2bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> // CHECK: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <16 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r18 = rocdl.mfma.f32.32x32x4bf16 %arg10, %arg10, %arg4, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<16 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<16 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<16 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<16 x float> // CHECK: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <4 x float> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) %r19 = rocdl.mfma.f32.16x16x8bf16 %arg10, %arg10, %arg5, %arg3, %arg3, %arg3 : - (!llvm<"<2 x i16>">, !llvm<"<2 x i16>">, !llvm<"<4 x float>">, - !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm<"<4 x float>"> + (!llvm.vec<2 x i16>, !llvm.vec<2 x i16>, !llvm.vec<4 x float>, + !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.vec<4 x float> - llvm.return %r0 : !llvm<"<32 x float>"> + llvm.return %r0 : !llvm.vec<32 x float> } -llvm.func @rocdl.mubuf(%rsrc : !llvm<"<4 x i32>">, %vindex : !llvm.i32, +llvm.func @rocdl.mubuf(%rsrc : !llvm.vec<4 x i32>, %vindex : !llvm.i32, %offset : !llvm.i32, %glc : !llvm.i1, - %slc : !llvm.i1, %vdata1 : !llvm<"<1 x float>">, - %vdata2 : !llvm<"<2 x float>">, %vdata4 : !llvm<"<4 x float>">) { + %slc : !llvm.i1, %vdata1 : !llvm.vec<1 x float>, + %vdata2 : !llvm.vec<2 x float>, %vdata4 : !llvm.vec<4 x float>) { // CHECK-LABEL: rocdl.mubuf // CHECK: call <1 x float> @llvm.amdgcn.buffer.load.v1f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<1 x float>"> + %r1 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x float> // CHECK: call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<2 x float>"> + %r2 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x float> // CHECK: call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<4 x float>"> + %r4 = rocdl.buffer.load %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x float> // CHECK: call void @llvm.amdgcn.buffer.store.v1f32(<1 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<1 x float>"> + rocdl.buffer.store %vdata1, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<1 x float> // CHECK: call void @llvm.amdgcn.buffer.store.v2f32(<2 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<2 x float>"> + rocdl.buffer.store %vdata2, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<2 x float> // CHECK: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 %{{.*}}, i1 %{{.*}}) - rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : !llvm<"<4 x float>"> + rocdl.buffer.store %vdata4, %rsrc, %vindex, %offset, %glc, %slc : !llvm.vec<4 x float> llvm.return } diff --git a/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp b/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp index 8ac1ef0a8c170..873ed16169093 100644 --- a/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp +++ b/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp @@ -11,25 +11,25 @@ class LLVMDialectNewTypes : public Dialect { public: LLVMDialectNewTypes(MLIRContext *ctx) : Dialect(getDialectNamespace(), ctx) { // clang-format off - addTypes(); + // addTypes(); // clang-format on } static StringRef getDialectNamespace() { return "llvm2"; } @@ -38,7 +38,7 @@ class LLVMDialectNewTypes : public Dialect { return detail::parseType(parser); } void printType(Type type, DialectAsmPrinter &printer) const override { - detail::printType(type.cast(), printer); + detail::printType(type.cast(), printer); } }; } // namespace diff --git a/mlir/test/lib/Target/TestLLVMTypeTranslation.cpp b/mlir/test/lib/Target/TestLLVMTypeTranslation.cpp index b76ac2a13344b..e51734308aa66 100644 --- a/mlir/test/lib/Target/TestLLVMTypeTranslation.cpp +++ b/mlir/test/lib/Target/TestLLVMTypeTranslation.cpp @@ -34,7 +34,7 @@ class TestLLVMTypeTranslation : public LLVM::ModuleTranslation { if (op.getName().getStringRef() == "llvm.test_introduce_func") { auto attr = op.getAttrOfType("type"); assert(attr && "expected 'type' attribute"); - auto type = attr.getValue().cast(); + auto type = attr.getValue().cast(); auto nameAttr = op.getAttrOfType("name"); assert(nameAttr && "expected 'name' attributes"); @@ -49,7 +49,7 @@ class TestLLVMTypeTranslation : public LLVM::ModuleTranslation { module->getOrInsertFunction(nameAttr.getValue(), translated); std::string roundtripName = (Twine(nameAttr.getValue()) + "_round").str(); - LLVM::LLVMTypeNew translatedBack = + LLVM::LLVMType translatedBack = LLVM::translateTypeFromLLVMIR(translated, *op.getContext()); llvm::Type *translatedBackAndForth = LLVM::translateTypeToLLVMIR(translatedBack, builder.getContext()); diff --git a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir index b48b523165bd7..5f5ff6f1fbb9f 100644 --- a/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir +++ b/mlir/test/mlir-cpu-runner/bare_ptr_call_conv.mlir @@ -26,8 +26,8 @@ func @simple_add1_add2_test(%arg0: memref<2xf32>, %arg1: memref<2xf32>) { } // External declarations. -llvm.func @malloc(!llvm.i64) -> !llvm<"i8*"> -llvm.func @free(!llvm<"i8*">) +llvm.func @malloc(!llvm.i64) -> !llvm.ptr +llvm.func @free(!llvm.ptr) func @print_f32(%arg0: f32) func @print_comma() func @print_newline() @@ -79,105 +79,105 @@ llvm.func @main() { %3 = llvm.mlir.constant(1.000000e+00 : f32) : !llvm.float %4 = llvm.mlir.constant(2.000000e+00 : f32) : !llvm.float %5 = llvm.mlir.constant(2 : index) : !llvm.i64 - %6 = llvm.mlir.null : !llvm<"float*"> + %6 = llvm.mlir.null : !llvm.ptr %7 = llvm.mlir.constant(1 : index) : !llvm.i64 - %8 = llvm.getelementptr %6[%7] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %9 = llvm.ptrtoint %8 : !llvm<"float*"> to !llvm.i64 + %8 = llvm.getelementptr %6[%7] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %9 = llvm.ptrtoint %8 : !llvm.ptr to !llvm.i64 %10 = llvm.mul %5, %9 : !llvm.i64 - %11 = llvm.call @malloc(%10) : (!llvm.i64) -> !llvm<"i8*"> - %12 = llvm.bitcast %11 : !llvm<"i8*"> to !llvm<"float*"> - %13 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %14 = llvm.insertvalue %12, %13[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %15 = llvm.insertvalue %12, %14[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %11 = llvm.call @malloc(%10) : (!llvm.i64) -> !llvm.ptr + %12 = llvm.bitcast %11 : !llvm.ptr to !llvm.ptr + %13 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %14 = llvm.insertvalue %12, %13[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %15 = llvm.insertvalue %12, %14[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %16 = llvm.mlir.constant(0 : index) : !llvm.i64 - %17 = llvm.insertvalue %16, %15[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %17 = llvm.insertvalue %16, %15[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %18 = llvm.mlir.constant(1 : index) : !llvm.i64 - %19 = llvm.insertvalue %5, %17[3, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %20 = llvm.insertvalue %18, %19[4, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %19 = llvm.insertvalue %5, %17[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %20 = llvm.insertvalue %18, %19[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %21 = llvm.mlir.constant(2 : index) : !llvm.i64 - %22 = llvm.mlir.null : !llvm<"float*"> + %22 = llvm.mlir.null : !llvm.ptr %23 = llvm.mlir.constant(1 : index) : !llvm.i64 - %24 = llvm.getelementptr %22[%23] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %25 = llvm.ptrtoint %24 : !llvm<"float*"> to !llvm.i64 + %24 = llvm.getelementptr %22[%23] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %25 = llvm.ptrtoint %24 : !llvm.ptr to !llvm.i64 %26 = llvm.mul %21, %25 : !llvm.i64 - %27 = llvm.call @malloc(%26) : (!llvm.i64) -> !llvm<"i8*"> - %28 = llvm.bitcast %27 : !llvm<"i8*"> to !llvm<"float*"> - %29 = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %30 = llvm.insertvalue %28, %29[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %31 = llvm.insertvalue %28, %30[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %27 = llvm.call @malloc(%26) : (!llvm.i64) -> !llvm.ptr + %28 = llvm.bitcast %27 : !llvm.ptr to !llvm.ptr + %29 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %30 = llvm.insertvalue %28, %29[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %31 = llvm.insertvalue %28, %30[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %32 = llvm.mlir.constant(0 : index) : !llvm.i64 - %33 = llvm.insertvalue %32, %31[2] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %33 = llvm.insertvalue %32, %31[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %34 = llvm.mlir.constant(1 : index) : !llvm.i64 - %35 = llvm.insertvalue %21, %33[3, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %36 = llvm.insertvalue %34, %35[4, 0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %35 = llvm.insertvalue %21, %33[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %36 = llvm.insertvalue %34, %35[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> llvm.br ^bb1(%1 : !llvm.i64) ^bb1(%37: !llvm.i64): // 2 preds: ^bb0, ^bb2 %38 = llvm.icmp "slt" %37, %0 : !llvm.i64 llvm.cond_br %38, ^bb2, ^bb3 ^bb2: // pred: ^bb1 - %39 = llvm.extractvalue %20[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %39 = llvm.extractvalue %20[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %40 = llvm.mlir.constant(0 : index) : !llvm.i64 %41 = llvm.mlir.constant(1 : index) : !llvm.i64 %42 = llvm.mul %37, %41 : !llvm.i64 %43 = llvm.add %40, %42 : !llvm.i64 - %44 = llvm.getelementptr %39[%43] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - llvm.store %3, %44 : !llvm<"float*"> - %45 = llvm.extractvalue %36[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %44 = llvm.getelementptr %39[%43] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %3, %44 : !llvm.ptr + %45 = llvm.extractvalue %36[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %46 = llvm.mlir.constant(0 : index) : !llvm.i64 %47 = llvm.mlir.constant(1 : index) : !llvm.i64 %48 = llvm.mul %37, %47 : !llvm.i64 %49 = llvm.add %46, %48 : !llvm.i64 - %50 = llvm.getelementptr %45[%49] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - llvm.store %3, %50 : !llvm<"float*"> + %50 = llvm.getelementptr %45[%49] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %3, %50 : !llvm.ptr %51 = llvm.add %37, %2 : !llvm.i64 llvm.br ^bb1(%51 : !llvm.i64) ^bb3: // pred: ^bb1 %52 = llvm.mlir.constant(1 : index) : !llvm.i64 %53 = llvm.mlir.constant(1 : index) : !llvm.i64 - %54 = llvm.extractvalue %20[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %55 = llvm.extractvalue %36[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - llvm.call @simple_add1_add2_test(%54, %55) : (!llvm<"float*">, !llvm<"float*">) -> () - %56 = llvm.extractvalue %20[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %54 = llvm.extractvalue %20[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %55 = llvm.extractvalue %36[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + llvm.call @simple_add1_add2_test(%54, %55) : (!llvm.ptr, !llvm.ptr) -> () + %56 = llvm.extractvalue %20[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %57 = llvm.mlir.constant(0 : index) : !llvm.i64 %58 = llvm.mlir.constant(1 : index) : !llvm.i64 %59 = llvm.mul %1, %58 : !llvm.i64 %60 = llvm.add %57, %59 : !llvm.i64 - %61 = llvm.getelementptr %56[%60] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %62 = llvm.load %61 : !llvm<"float*"> + %61 = llvm.getelementptr %56[%60] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %62 = llvm.load %61 : !llvm.ptr llvm.call @print_f32(%62) : (!llvm.float) -> () llvm.call @print_comma() : () -> () - %63 = llvm.extractvalue %20[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %63 = llvm.extractvalue %20[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %64 = llvm.mlir.constant(0 : index) : !llvm.i64 %65 = llvm.mlir.constant(1 : index) : !llvm.i64 %66 = llvm.mul %2, %65 : !llvm.i64 %67 = llvm.add %64, %66 : !llvm.i64 - %68 = llvm.getelementptr %63[%67] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %69 = llvm.load %68 : !llvm<"float*"> + %68 = llvm.getelementptr %63[%67] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %69 = llvm.load %68 : !llvm.ptr llvm.call @print_f32(%69) : (!llvm.float) -> () llvm.call @print_newline() : () -> () - %70 = llvm.extractvalue %36[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %70 = llvm.extractvalue %36[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %71 = llvm.mlir.constant(0 : index) : !llvm.i64 %72 = llvm.mlir.constant(1 : index) : !llvm.i64 %73 = llvm.mul %1, %72 : !llvm.i64 %74 = llvm.add %71, %73 : !llvm.i64 - %75 = llvm.getelementptr %70[%74] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %76 = llvm.load %75 : !llvm<"float*"> + %75 = llvm.getelementptr %70[%74] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %76 = llvm.load %75 : !llvm.ptr llvm.call @print_f32(%76) : (!llvm.float) -> () llvm.call @print_comma() : () -> () - %77 = llvm.extractvalue %36[1] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> + %77 = llvm.extractvalue %36[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %78 = llvm.mlir.constant(0 : index) : !llvm.i64 %79 = llvm.mlir.constant(1 : index) : !llvm.i64 %80 = llvm.mul %2, %79 : !llvm.i64 %81 = llvm.add %78, %80 : !llvm.i64 - %82 = llvm.getelementptr %77[%81] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %83 = llvm.load %82 : !llvm<"float*"> + %82 = llvm.getelementptr %77[%81] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %83 = llvm.load %82 : !llvm.ptr llvm.call @print_f32(%83) : (!llvm.float) -> () llvm.call @print_newline() : () -> () - %84 = llvm.extractvalue %20[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %85 = llvm.bitcast %84 : !llvm<"float*"> to !llvm<"i8*"> - llvm.call @free(%85) : (!llvm<"i8*">) -> () - %86 = llvm.extractvalue %36[0] : !llvm<"{ float*, float*, i64, [1 x i64], [1 x i64] }"> - %87 = llvm.bitcast %86 : !llvm<"float*"> to !llvm<"i8*"> - llvm.call @free(%87) : (!llvm<"i8*">) -> () + %84 = llvm.extractvalue %20[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %85 = llvm.bitcast %84 : !llvm.ptr to !llvm.ptr + llvm.call @free(%85) : (!llvm.ptr) -> () + %86 = llvm.extractvalue %36[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %87 = llvm.bitcast %86 : !llvm.ptr to !llvm.ptr + llvm.call @free(%87) : (!llvm.ptr) -> () llvm.return } diff --git a/mlir/test/mlir-cpu-runner/simple.mlir b/mlir/test/mlir-cpu-runner/simple.mlir index a1b6cf62dab67..e75d98aa8bd3c 100644 --- a/mlir/test/mlir-cpu-runner/simple.mlir +++ b/mlir/test/mlir-cpu-runner/simple.mlir @@ -15,8 +15,8 @@ // Declarations of C library functions. llvm.func @fabsf(!llvm.float) -> !llvm.float -llvm.func @malloc(!llvm.i64) -> !llvm<"i8*"> -llvm.func @free(!llvm<"i8*">) +llvm.func @malloc(!llvm.i64) -> !llvm.ptr +llvm.func @free(!llvm.ptr) // Check that a simple function with a nested call works. llvm.func @main() -> !llvm.float { @@ -27,29 +27,29 @@ llvm.func @main() -> !llvm.float { // CHECK: 4.200000e+02 // Helper typed functions wrapping calls to "malloc" and "free". -llvm.func @allocation() -> !llvm<"float*"> { +llvm.func @allocation() -> !llvm.ptr { %0 = llvm.mlir.constant(4 : index) : !llvm.i64 - %1 = llvm.call @malloc(%0) : (!llvm.i64) -> !llvm<"i8*"> - %2 = llvm.bitcast %1 : !llvm<"i8*"> to !llvm<"float*"> - llvm.return %2 : !llvm<"float*"> + %1 = llvm.call @malloc(%0) : (!llvm.i64) -> !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr + llvm.return %2 : !llvm.ptr } -llvm.func @deallocation(%arg0: !llvm<"float*">) { - %0 = llvm.bitcast %arg0 : !llvm<"float*"> to !llvm<"i8*"> - llvm.call @free(%0) : (!llvm<"i8*">) -> () +llvm.func @deallocation(%arg0: !llvm.ptr) { + %0 = llvm.bitcast %arg0 : !llvm.ptr to !llvm.ptr + llvm.call @free(%0) : (!llvm.ptr) -> () llvm.return } // Check that allocation and deallocation works, and that a custom entry point // works. llvm.func @foo() -> !llvm.float { - %0 = llvm.call @allocation() : () -> !llvm<"float*"> + %0 = llvm.call @allocation() : () -> !llvm.ptr %1 = llvm.mlir.constant(0 : index) : !llvm.i64 %2 = llvm.mlir.constant(1.234000e+03 : f32) : !llvm.float - %3 = llvm.getelementptr %0[%1] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - llvm.store %2, %3 : !llvm<"float*"> - %4 = llvm.getelementptr %0[%1] : (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> - %5 = llvm.load %4 : !llvm<"float*"> - llvm.call @deallocation(%0) : (!llvm<"float*">) -> () + %3 = llvm.getelementptr %0[%1] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %2, %3 : !llvm.ptr + %4 = llvm.getelementptr %0[%1] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %5 = llvm.load %4 : !llvm.ptr + llvm.call @deallocation(%0) : (!llvm.ptr) -> () llvm.return %5 : !llvm.float } // NOMAIN: 1.234000e+03 From cb9f9df5f8239e291a62934b0f64eb795b26d84a Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 4 Aug 2020 14:42:17 +0200 Subject: [PATCH 303/600] [mlir] Fix GCC5 compilation problem in MLIR->LLVM type translation GCC5 seems to dislike generic lambdas calling a method of the class containing the lambda without explicit `this`. --- mlir/lib/Target/LLVMIR/TypeTranslation.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/TypeTranslation.cpp b/mlir/lib/Target/LLVMIR/TypeTranslation.cpp index 15e0f158007a5..b327e9ed8d2c4 100644 --- a/mlir/lib/Target/LLVMIR/TypeTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/TypeTranslation.cpp @@ -72,7 +72,7 @@ class TypeToLLVMIRTranslator { LLVM::LLVMFunctionType, LLVM::LLVMPointerType, LLVM::LLVMStructType, LLVM::LLVMFixedVectorType, LLVM::LLVMScalableVectorType>( - [this](auto array) { return translate(array); }) + [this](auto type) { return this->translate(type); }) .Default([](LLVM::LLVMType t) -> llvm::Type * { llvm_unreachable("unknown LLVM dialect type"); }); @@ -187,7 +187,7 @@ class TypeFromLLVMIRTranslator { .Case( - [this](auto *type) { return translate(type); }) + [this](auto *type) { return this->translate(type); }) .Default([this](llvm::Type *type) { return translatePrimitiveType(type); }); From 04e45ae1c6d2fdbf3fd4242df69d1511df757d48 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 4 Aug 2020 08:28:44 -0400 Subject: [PATCH 304/600] [InstSimplify] fold nested min/max intrinsics with constant operands This is based on the existing code for the non-intrinsic idioms in InstCombine. The vector constant constraint is non-obvious: undefs should be ok in the outer call, but they can't propagate safely from the inner call in all cases. Example: https://alive2.llvm.org/ce/z/-2bVbM define <2 x i8> @src(<2 x i8> %x) { %0: %m = umin <2 x i8> %x, { 7, undef } %m2 = umin <2 x i8> { 9, 9 }, %m ret <2 x i8> %m2 } => define <2 x i8> @tgt(<2 x i8> %x) { %0: %m = umin <2 x i8> %x, { 7, undef } ret <2 x i8> %m } Transformation doesn't verify! ERROR: Value mismatch Example: <2 x i8> %x = < undef, undef > Source: <2 x i8> %m = < #x00 (0) [based on undef value], #x00 (0) > <2 x i8> %m2 = < #x00 (0), #x00 (0) > Target: <2 x i8> %m = < #x07 (7), #x10 (16) > Source value: < #x00 (0), #x00 (0) > Target value: < #x07 (7), #x10 (16) > --- llvm/lib/Analysis/InstructionSimplify.cpp | 15 ++++++ .../InstSimplify/maxmin_intrinsics.ll | 52 +++++++------------ 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 2119ddcc7649b..6b8f8e3acc179 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5285,6 +5285,21 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, // umin(i8 %x, i8 255) --> %x if (*C == getMaxMinLimit(getMaxMinOpposite(IID), BitWidth)) return Op0; + + // Remove nested call if constant operands allow it. Example: + // max (max X, 7), 5 -> max X, 7 + auto *MinMax0 = dyn_cast(Op0); + if (MinMax0 && MinMax0->getIntrinsicID() == IID) { + // TODO: loosen undef/splat restrictions for vector constants. + Value *M00 = MinMax0->getOperand(0), *M01 = MinMax0->getOperand(1); + const APInt *InnerC; + if ((match(M00, m_APInt(InnerC)) || match(M01, m_APInt(InnerC))) && + ((IID == Intrinsic::smax && InnerC->sge(*C)) || + (IID == Intrinsic::smin && InnerC->sle(*C)) || + (IID == Intrinsic::umax && InnerC->uge(*C)) || + (IID == Intrinsic::umin && InnerC->ule(*C)))) + return Op0; + } } break; diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index 3aa19e91e0e38..6b10853dd78f2 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -627,8 +627,7 @@ define i8 @umin_smin(i8 %x, i8 %y) { define i8 @umax_umax_constants(i8 %x) { ; CHECK-LABEL: @umax_umax_constants( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 9) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 7, i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umax.i8(i8 %x, i8 9) %m2 = call i8 @llvm.umax.i8(i8 7, i8 %m) @@ -638,8 +637,7 @@ define i8 @umax_umax_constants(i8 %x) { define i8 @umax_umax_constants_commute1(i8 %x) { ; CHECK-LABEL: @umax_umax_constants_commute1( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 -128, i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 7, i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umax.i8(i8 128, i8 %x) %m2 = call i8 @llvm.umax.i8(i8 7, i8 %m) @@ -649,8 +647,7 @@ define i8 @umax_umax_constants_commute1(i8 %x) { define i8 @umax_umax_constants_commute2(i8 %x) { ; CHECK-LABEL: @umax_umax_constants_commute2( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 -56) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umax.i8(i8 [[M]], i8 127) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umax.i8(i8 %x, i8 200) %m2 = call i8 @llvm.umax.i8(i8 %m, i8 127) @@ -660,8 +657,7 @@ define i8 @umax_umax_constants_commute2(i8 %x) { define <2 x i8> @umax_umax_constants_commute3(<2 x i8> %x) { ; CHECK-LABEL: @umax_umax_constants_commute3( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[M]], <2 x i8> ) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> %x) %m2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %m, <2 x i8> ) @@ -671,8 +667,7 @@ define <2 x i8> @umax_umax_constants_commute3(<2 x i8> %x) { define i8 @umin_umin_constants(i8 %x) { ; CHECK-LABEL: @umin_umin_constants( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 7) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 9, i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umin.i8(i8 %x, i8 7) %m2 = call i8 @llvm.umin.i8(i8 9, i8 %m) @@ -682,8 +677,7 @@ define i8 @umin_umin_constants(i8 %x) { define i8 @umin_umin_constants_commute1(i8 %x) { ; CHECK-LABEL: @umin_umin_constants_commute1( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 7, i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 -128, i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umin.i8(i8 7, i8 %x) %m2 = call i8 @llvm.umin.i8(i8 128, i8 %m) @@ -693,8 +687,7 @@ define i8 @umin_umin_constants_commute1(i8 %x) { define <2 x i8> @umin_umin_constants_commute2(<2 x i8> %x) { ; CHECK-LABEL: @umin_umin_constants_commute2( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[M]], <2 x i8> ) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> ) %m2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %m, <2 x i8> ) @@ -704,8 +697,7 @@ define <2 x i8> @umin_umin_constants_commute2(<2 x i8> %x) { define i8 @umin_umin_constants_commute3(i8 %x) { ; CHECK-LABEL: @umin_umin_constants_commute3( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 -128, i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.umin.i8(i8 [[M]], i8 -2) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.umin.i8(i8 128, i8 %x) %m2 = call i8 @llvm.umin.i8(i8 %m, i8 254) @@ -715,8 +707,7 @@ define i8 @umin_umin_constants_commute3(i8 %x) { define i8 @smax_smax_constants(i8 %x) { ; CHECK-LABEL: @smax_smax_constants( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 9) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 7, i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smax.i8(i8 %x, i8 9) %m2 = call i8 @llvm.smax.i8(i8 7, i8 %m) @@ -726,8 +717,7 @@ define i8 @smax_smax_constants(i8 %x) { define <2 x i8> @smax_smax_constants_commute1(<2 x i8> %x) { ; CHECK-LABEL: @smax_smax_constants_commute1( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[M]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %x) %m2 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %m) @@ -737,8 +727,7 @@ define <2 x i8> @smax_smax_constants_commute1(<2 x i8> %x) { define i8 @smax_smax_constants_commute2(i8 %x) { ; CHECK-LABEL: @smax_smax_constants_commute2( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 0) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 -1) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smax.i8(i8 %x, i8 0) %m2 = call i8 @llvm.smax.i8(i8 %m, i8 -1) @@ -748,8 +737,7 @@ define i8 @smax_smax_constants_commute2(i8 %x) { define i8 @smax_smax_constants_commute3(i8 %x) { ; CHECK-LABEL: @smax_smax_constants_commute3( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 -1, i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smax.i8(i8 [[M]], i8 -127) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smax.i8(i8 -1, i8 %x) %m2 = call i8 @llvm.smax.i8(i8 %m, i8 -127) @@ -759,8 +747,7 @@ define i8 @smax_smax_constants_commute3(i8 %x) { define <2 x i8> @smin_smin_constants(<2 x i8> %x) { ; CHECK-LABEL: @smin_smin_constants( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) -; CHECK-NEXT: [[M2:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> , <2 x i8> [[M]]) -; CHECK-NEXT: ret <2 x i8> [[M2]] +; CHECK-NEXT: ret <2 x i8> [[M]] ; %m = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> ) %m2 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> , <2 x i8> %m) @@ -770,8 +757,7 @@ define <2 x i8> @smin_smin_constants(<2 x i8> %x) { define i8 @smin_smin_constants_commute1(i8 %x) { ; CHECK-LABEL: @smin_smin_constants_commute1( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 -127, i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 7, i8 [[M]]) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smin.i8(i8 -127, i8 %x) %m2 = call i8 @llvm.smin.i8(i8 7, i8 %m) @@ -781,8 +767,7 @@ define i8 @smin_smin_constants_commute1(i8 %x) { define i8 @smin_smin_constants_commute2(i8 %x) { ; CHECK-LABEL: @smin_smin_constants_commute2( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 -1) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 0) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smin.i8(i8 %x, i8 -1) %m2 = call i8 @llvm.smin.i8(i8 %m, i8 0) @@ -792,14 +777,15 @@ define i8 @smin_smin_constants_commute2(i8 %x) { define i8 @smin_smin_constants_commute3(i8 %x) { ; CHECK-LABEL: @smin_smin_constants_commute3( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 -127, i8 [[X:%.*]]) -; CHECK-NEXT: [[M2:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 -1) -; CHECK-NEXT: ret i8 [[M2]] +; CHECK-NEXT: ret i8 [[M]] ; %m = call i8 @llvm.smin.i8(i8 -127, i8 %x) %m2 = call i8 @llvm.smin.i8(i8 %m, i8 -1) ret i8 %m2 } +; Negative test - undef in inner constant must not propagate. + define <2 x i8> @umin_umin_constants_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @umin_umin_constants_partial_undef( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) @@ -811,6 +797,8 @@ define <2 x i8> @umin_umin_constants_partial_undef(<2 x i8> %x) { ret <2 x i8> %m2 } +; Negative test - undef in inner constant must not propagate. + define <2 x i8> @smax_smax_constants_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @smax_smax_constants_partial_undef( ; CHECK-NEXT: [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) From 1a4263d394c1a93757613bde4b1c2cf8d6a7bbb9 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 3 Aug 2020 05:34:07 -0400 Subject: [PATCH 305/600] [mlir][Vector] Add linalg.copy-based pattern for splitting vector.transfer_read into full and partial copies. This revision adds a transformation and a pattern that rewrites a "maybe masked" `vector.transfer_read %view[...], %pad `into a pattern resembling: ``` %1:3 = scf.if (%inBounds) { scf.yield %view : memref, index, index } else { %2 = linalg.fill(%extra_alloc, %pad) %3 = subview %view [...][...][...] linalg.copy(%3, %alloc) memref_cast %extra_alloc: memref to memref scf.yield %4 : memref, index, index } %res= vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} ``` where `extra_alloc` is a top of the function alloca'ed buffer of one vector. This rewrite makes it possible to realize the "always full tile" abstraction where vector.transfer_read operations are guaranteed to read from a padded full buffer. The extra work only occurs on the boundary tiles. --- mlir/include/mlir/Dialect/Vector/VectorOps.h | 32 ++- .../mlir/Dialect/Vector/VectorTransforms.h | 42 ++-- mlir/lib/Dialect/Vector/CMakeLists.txt | 1 + mlir/lib/Dialect/Vector/VectorTransforms.cpp | 238 ++++++++++++++---- .../vector-transfer-full-partial-split.mlir | 94 ++++++- .../lib/Transforms/TestVectorTransforms.cpp | 15 +- 6 files changed, 348 insertions(+), 74 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.h b/mlir/include/mlir/Dialect/Vector/VectorOps.h index edf9557df389b..562e07f98774d 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.h @@ -56,22 +56,48 @@ enum class VectorContractLowering { }; /// Enum to control the lowering of `vector.transpose` operations. enum class VectorTransposeLowering { - // Lower transpose into element-wise extract and inserts. + /// Lower transpose into element-wise extract and inserts. EltWise = 0, /// Lower 2-D transpose to `vector.flat_transpose`, maps 1-1 to LLVM matrix /// intrinsics. Flat = 1, }; +/// Enum to control the splitting of `vector.transfer` operations into masked +/// and unmasked variants. +enum class VectorTransferSplit { + /// Do not split vector transfer operations. + None = 0, + /// Split using masked + unmasked vector.transfer operations. + VectorTransfer = 1, + /// Split using a unmasked vector.transfer + linalg.fill + linalg.copy + /// operations. + LinalgCopy = 2, + /// Do not split vector transfer operation but instead mark it as "unmasked". + ForceUnmasked = 3 +}; /// Structure to control the behavior of vector transform patterns. struct VectorTransformsOptions { + /// Option to control the lowering of vector.contract. VectorContractLowering vectorContractLowering = VectorContractLowering::Dot; - VectorTransposeLowering vectorTransposeLowering = - VectorTransposeLowering::EltWise; VectorTransformsOptions & setVectorTransformsOptions(VectorContractLowering opt) { vectorContractLowering = opt; return *this; } + /// Option to control the lowering of vector.transpose. + VectorTransposeLowering vectorTransposeLowering = + VectorTransposeLowering::EltWise; + VectorTransformsOptions & + setVectorTransposeLowering(VectorTransposeLowering opt) { + vectorTransposeLowering = opt; + return *this; + } + /// Option to control the splitting of vector transfers. + VectorTransferSplit vectorTransferSplit = VectorTransferSplit::None; + VectorTransformsOptions &setVectorTransferSplit(VectorTransferSplit opt) { + vectorTransferSplit = opt; + return *this; + } }; /// Collect a set of transformation patterns that are related to contracting diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h index 835ad18a79ad2..e6c7b7abebd53 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h @@ -109,13 +109,13 @@ struct UnrollVectorPattern : public OpRewritePattern { FilterConstraintType filter; }; -/// Split a vector.transfer operation into an unmasked fastpath vector.transfer -/// and a slowpath masked vector.transfer. If `ifOp` is not null and the result -/// is `success, the `ifOp` points to the newly created conditional upon -/// function return. To accomodate for the fact that the original -/// vector.transfer indexing may be arbitrary and the slow path indexes @[0...0] -/// in the temporary buffer, the scf.if op returns a view and values of type -/// index. At this time, only vector.transfer_read is implemented. +/// Split a vector.transfer operation into an unmasked fastpath and a slowpath. +/// If `ifOp` is not null and the result is `success, the `ifOp` points to the +/// newly created conditional upon function return. +/// To accomodate for the fact that the original vector.transfer indexing may be +/// arbitrary and the slow path indexes @[0...0] in the temporary buffer, the +/// scf.if op returns a view and values of type index. +/// At this time, only vector.transfer_read case is implemented. /// /// Example (a 2-D vector.transfer_read): /// ``` @@ -124,17 +124,17 @@ struct UnrollVectorPattern : public OpRewritePattern { /// is transformed into: /// ``` /// %1:3 = scf.if (%inBounds) { -/// scf.yield %0 : memref, index, index -/// } else { -/// %2 = vector.transfer_read %0[...], %pad : memref, vector<...> -/// %3 = vector.type_cast %extra_alloc : memref<...> to -/// memref> store %2, %3[] : memref> %4 = -/// memref_cast %extra_alloc: memref to memref scf.yield %4 : -/// memref, index, index +/// // fastpath, direct cast +/// memref_cast %A: memref to compatibleMemRefType +/// scf.yield %view : compatibleMemRefType, index, index +/// } else { +/// // slowpath, masked vector.transfer or linalg.copy. +/// memref_cast %alloc: memref to compatibleMemRefType +/// scf.yield %4 : compatibleMemRefType, index, index // } /// %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} /// ``` -/// where `extra_alloc` is a top of the function alloca'ed buffer of one vector. +/// where `alloc` is a top of the function alloca'ed buffer of one vector. /// /// Preconditions: /// 1. `xferOp.permutation_map()` must be a minor identity map @@ -143,9 +143,10 @@ struct UnrollVectorPattern : public OpRewritePattern { /// rank-reducing subviews. LogicalResult splitFullAndPartialTransferPrecondition(VectorTransferOpInterface xferOp); -LogicalResult splitFullAndPartialTransfer(OpBuilder &b, - VectorTransferOpInterface xferOp, - scf::IfOp *ifOp = nullptr); +LogicalResult splitFullAndPartialTransfer( + OpBuilder &b, VectorTransferOpInterface xferOp, + VectorTransformsOptions options = VectorTransformsOptions(), + scf::IfOp *ifOp = nullptr); /// Apply `splitFullAndPartialTransfer` selectively via a pattern. This pattern /// may take an extra filter to perform selection at a finer granularity. @@ -155,16 +156,19 @@ struct VectorTransferFullPartialRewriter : public RewritePattern { explicit VectorTransferFullPartialRewriter( MLIRContext *context, + VectorTransformsOptions options = VectorTransformsOptions(), FilterConstraintType filter = [](VectorTransferOpInterface op) { return success(); }, PatternBenefit benefit = 1) - : RewritePattern(benefit, MatchAnyOpTypeTag()), filter(filter) {} + : RewritePattern(benefit, MatchAnyOpTypeTag()), options(options), + filter(filter) {} /// Performs the rewrite. LogicalResult matchAndRewrite(Operation *op, PatternRewriter &rewriter) const override; private: + VectorTransformsOptions options; FilterConstraintType filter; }; diff --git a/mlir/lib/Dialect/Vector/CMakeLists.txt b/mlir/lib/Dialect/Vector/CMakeLists.txt index 13dbf6da73fa2..1087feba7fbdb 100644 --- a/mlir/lib/Dialect/Vector/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/CMakeLists.txt @@ -16,6 +16,7 @@ add_mlir_dialect_library(MLIRVector MLIRIR MLIRStandardOps MLIRAffineOps + MLIRLinalgOps MLIRSCF MLIRLoopAnalysis MLIRSideEffectInterfaces diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 573b822503f3a..3c23c5a6d869d 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h" #include "mlir/Dialect/SCF/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" @@ -2056,7 +2057,16 @@ LogicalResult mlir::vector::splitFullAndPartialTransferPrecondition( return success(); } -MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { +/// Given two MemRefTypes `aT` and `bT`, return a MemRefType to which both can +/// be cast. If the MemRefTypes don't have the same rank or are not strided, +/// return null; otherwise: +/// 1. if `aT` and `bT` are cast-compatible, return `aT`. +/// 2. else return a new MemRefType obtained by iterating over the shape and +/// strides and: +/// a. keeping the ones that are static and equal across `aT` and `bT`. +/// b. using a dynamic shape and/or stride for the dimeniosns that don't +/// agree. +static MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { if (MemRefCastOp::areCastCompatible(aT, bT)) return aT; if (aT.getRank() != bT.getRank()) @@ -2086,13 +2096,154 @@ MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { makeStridedLinearLayoutMap(resStrides, resOffset, aT.getContext())); } -/// Split a vector.transfer operation into an unmasked fastpath vector.transfer -/// and a slowpath masked vector.transfer. If `ifOp` is not null and the result -/// is `success, the `ifOp` points to the newly created conditional upon -/// function return. To accomodate for the fact that the original -/// vector.transfer indexing may be arbitrary and the slow path indexes @[0...0] -/// in the temporary buffer, the scf.if op returns a view and values of type -/// index. At this time, only vector.transfer_read is implemented. +/// Operates under a scoped context to build the intersection between the +/// view `xferOp.memref()` @ `xferOp.indices()` and the view `alloc`. +// TODO: view intersection/union/differences should be a proper std op. +static Value createScopedSubViewIntersection(VectorTransferOpInterface xferOp, + Value alloc) { + using namespace edsc::intrinsics; + int64_t memrefRank = xferOp.getMemRefType().getRank(); + // TODO: relax this precondition, will require rank-reducing subviews. + assert(memrefRank == alloc.getType().cast().getRank() && + "Expected memref rank to match the alloc rank"); + Value one = std_constant_index(1); + ValueRange leadingIndices = + xferOp.indices().take_front(xferOp.getLeadingMemRefRank()); + SmallVector sizes; + sizes.append(leadingIndices.begin(), leadingIndices.end()); + xferOp.zipResultAndIndexing([&](int64_t resultIdx, int64_t indicesIdx) { + using MapList = ArrayRef>; + Value dimMemRef = std_dim(xferOp.memref(), indicesIdx); + Value dimAlloc = std_dim(alloc, resultIdx); + Value index = xferOp.indices()[indicesIdx]; + AffineExpr i, j, k; + bindDims(xferOp.getContext(), i, j, k); + SmallVector maps = + AffineMap::inferFromExprList(MapList{{i - j, k}}); + // affine_min(%dimMemRef - %index, %dimAlloc) + Value affineMin = affine_min(index.getType(), maps[0], + ValueRange{dimMemRef, index, dimAlloc}); + sizes.push_back(affineMin); + }); + return std_sub_view(xferOp.memref(), xferOp.indices(), sizes, + SmallVector(memrefRank, one)); +} + +/// Given an `xferOp` for which: +/// 1. `inBoundsCond` and a `compatibleMemRefType` have been computed. +/// 2. a memref of single vector `alloc` has been allocated. +/// Produce IR resembling: +/// ``` +/// %1:3 = scf.if (%inBounds) { +/// memref_cast %A: memref to compatibleMemRefType +/// scf.yield %view, ... : compatibleMemRefType, index, index +/// } else { +/// %2 = linalg.fill(%alloc, %pad) +/// %3 = subview %view [...][...][...] +/// linalg.copy(%3, %alloc) +/// memref_cast %alloc: memref to compatibleMemRefType +/// scf.yield %4, ... : compatibleMemRefType, index, index +/// } +/// ``` +/// Return the produced scf::IfOp. +static scf::IfOp createScopedFullPartialLinalgCopy( + vector::TransferReadOp xferOp, TypeRange returnTypes, Value inBoundsCond, + MemRefType compatibleMemRefType, Value alloc) { + using namespace edsc; + using namespace edsc::intrinsics; + scf::IfOp fullPartialIfOp; + Value zero = std_constant_index(0); + Value memref = xferOp.memref(); + conditionBuilder( + returnTypes, inBoundsCond, + [&]() -> scf::ValueVector { + Value res = memref; + if (compatibleMemRefType != xferOp.getMemRefType()) + res = std_memref_cast(memref, compatibleMemRefType); + scf::ValueVector viewAndIndices{res}; + viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(), + xferOp.indices().end()); + return viewAndIndices; + }, + [&]() -> scf::ValueVector { + linalg_fill(alloc, xferOp.padding()); + // Take partial subview of memref which guarantees no dimension + // overflows. + Value memRefSubView = createScopedSubViewIntersection( + cast(xferOp.getOperation()), alloc); + linalg_copy(memRefSubView, alloc); + Value casted = std_memref_cast(alloc, compatibleMemRefType); + scf::ValueVector viewAndIndices{casted}; + viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(), + zero); + return viewAndIndices; + }, + &fullPartialIfOp); + return fullPartialIfOp; +} + +/// Given an `xferOp` for which: +/// 1. `inBoundsCond` and a `compatibleMemRefType` have been computed. +/// 2. a memref of single vector `alloc` has been allocated. +/// Produce IR resembling: +/// ``` +/// %1:3 = scf.if (%inBounds) { +/// memref_cast %A: memref to compatibleMemRefType +/// scf.yield %view, ... : compatibleMemRefType, index, index +/// } else { +/// %2 = vector.transfer_read %view[...], %pad : memref, vector<...> +/// %3 = vector.type_cast %extra_alloc : +/// memref<...> to memref> +/// store %2, %3[] : memref> +/// %4 = memref_cast %alloc: memref to compatibleMemRefType +/// scf.yield %4, ... : compatibleMemRefType, index, index +/// } +/// ``` +/// Return the produced scf::IfOp. +static scf::IfOp createScopedFullPartialVectorTransferRead( + vector::TransferReadOp xferOp, TypeRange returnTypes, Value inBoundsCond, + MemRefType compatibleMemRefType, Value alloc) { + using namespace edsc; + using namespace edsc::intrinsics; + scf::IfOp fullPartialIfOp; + Value zero = std_constant_index(0); + Value memref = xferOp.memref(); + conditionBuilder( + returnTypes, inBoundsCond, + [&]() -> scf::ValueVector { + Value res = memref; + if (compatibleMemRefType != xferOp.getMemRefType()) + res = std_memref_cast(memref, compatibleMemRefType); + scf::ValueVector viewAndIndices{res}; + viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(), + xferOp.indices().end()); + return viewAndIndices; + }, + [&]() -> scf::ValueVector { + Operation *newXfer = + ScopedContext::getBuilderRef().clone(*xferOp.getOperation()); + Value vector = cast(newXfer).vector(); + std_store(vector, vector_type_cast( + MemRefType::get({}, vector.getType()), alloc)); + + Value casted = std_memref_cast(alloc, compatibleMemRefType); + scf::ValueVector viewAndIndices{casted}; + viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(), + zero); + + return viewAndIndices; + }, + &fullPartialIfOp); + return fullPartialIfOp; +} + +/// Split a vector.transfer operation into an unmasked fastpath and a slowpath. +/// If `ifOp` is not null and the result is `success, the `ifOp` points to the +/// newly created conditional upon function return. +/// To accomodate for the fact that the original vector.transfer indexing may be +/// arbitrary and the slow path indexes @[0...0] in the temporary buffer, the +/// scf.if op returns a view and values of type index. +/// At this time, only vector.transfer_read case is implemented. /// /// Example (a 2-D vector.transfer_read): /// ``` @@ -2101,17 +2252,17 @@ MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { /// is transformed into: /// ``` /// %1:3 = scf.if (%inBounds) { -/// scf.yield %0 : memref, index, index -/// } else { -/// %2 = vector.transfer_read %0[...], %pad : memref, vector<...> -/// %3 = vector.type_cast %extra_alloc : memref<...> to -/// memref> store %2, %3[] : memref> %4 = -/// memref_cast %extra_alloc: memref to memref scf.yield %4 : -/// memref, index, index +/// // fastpath, direct cast +/// memref_cast %A: memref to compatibleMemRefType +/// scf.yield %view : compatibleMemRefType, index, index +/// } else { +/// // slowpath, masked vector.transfer or linalg.copy. +/// memref_cast %alloc: memref to compatibleMemRefType +/// scf.yield %4 : compatibleMemRefType, index, index // } /// %0 = vector.transfer_read %1#0[%1#1, %1#2] {masked = [false ... false]} /// ``` -/// where `extra_alloc` is a top of the function alloca'ed buffer of one vector. +/// where `alloc` is a top of the function alloca'ed buffer of one vector. /// /// Preconditions: /// 1. `xferOp.permutation_map()` must be a minor identity map @@ -2119,10 +2270,21 @@ MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { /// must be equal. This will be relaxed in the future but requires /// rank-reducing subviews. LogicalResult mlir::vector::splitFullAndPartialTransfer( - OpBuilder &b, VectorTransferOpInterface xferOp, scf::IfOp *ifOp) { + OpBuilder &b, VectorTransferOpInterface xferOp, + VectorTransformsOptions options, scf::IfOp *ifOp) { using namespace edsc; using namespace edsc::intrinsics; + if (options.vectorTransferSplit == VectorTransferSplit::None) + return failure(); + + SmallVector bools(xferOp.getTransferRank(), false); + auto unmaskedAttr = b.getBoolArrayAttr(bools); + if (options.vectorTransferSplit == VectorTransferSplit::ForceUnmasked) { + xferOp.setAttr(vector::TransferReadOp::getMaskedAttrName(), unmaskedAttr); + return success(); + } + assert(succeeded(splitFullAndPartialTransferPrecondition(xferOp)) && "Expected splitFullAndPartialTransferPrecondition to hold"); auto xferReadOp = dyn_cast(xferOp.getOperation()); @@ -2154,45 +2316,21 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer( b.getI64IntegerAttr(32)); } - Value memref = xferOp.memref(); - SmallVector bools(xferOp.getTransferRank(), false); - auto unmaskedAttr = b.getBoolArrayAttr(bools); - MemRefType compatibleMemRefType = getCastCompatibleMemRefType( xferOp.getMemRefType(), alloc.getType().cast()); // Read case: full fill + partial copy -> unmasked vector.xfer_read. - Value zero = std_constant_index(0); SmallVector returnTypes(1 + xferOp.getTransferRank(), b.getIndexType()); returnTypes[0] = compatibleMemRefType; - scf::IfOp fullPartialIfOp; - conditionBuilder( - returnTypes, inBoundsCond, - [&]() -> scf::ValueVector { - Value res = memref; - if (compatibleMemRefType != xferOp.getMemRefType()) - res = std_memref_cast(memref, compatibleMemRefType); - scf::ValueVector viewAndIndices{res}; - viewAndIndices.insert(viewAndIndices.end(), xferOp.indices().begin(), - xferOp.indices().end()); - return viewAndIndices; - }, - [&]() -> scf::ValueVector { - Operation *newXfer = - ScopedContext::getBuilderRef().clone(*xferOp.getOperation()); - Value vector = cast(newXfer).vector(); - std_store(vector, vector_type_cast( - MemRefType::get({}, vector.getType()), alloc)); - - Value casted = std_memref_cast(alloc, compatibleMemRefType); - scf::ValueVector viewAndIndices{casted}; - viewAndIndices.insert(viewAndIndices.end(), xferOp.getTransferRank(), - zero); - - return viewAndIndices; - }, - &fullPartialIfOp); + scf::IfOp fullPartialIfOp = + options.vectorTransferSplit == VectorTransferSplit::VectorTransfer + ? createScopedFullPartialVectorTransferRead( + xferReadOp, returnTypes, inBoundsCond, compatibleMemRefType, + alloc) + : createScopedFullPartialLinalgCopy(xferReadOp, returnTypes, + inBoundsCond, + compatibleMemRefType, alloc); if (ifOp) *ifOp = fullPartialIfOp; @@ -2211,7 +2349,7 @@ LogicalResult mlir::vector::VectorTransferFullPartialRewriter::matchAndRewrite( failed(filter(xferOp))) return failure(); rewriter.startRootUpdate(xferOp); - if (succeeded(splitFullAndPartialTransfer(rewriter, xferOp))) { + if (succeeded(splitFullAndPartialTransfer(rewriter, xferOp, options))) { rewriter.finalizeRootUpdate(xferOp); return success(); } diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir index ef76247ee9d4b..e364542039828 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -1,13 +1,26 @@ // RUN: mlir-opt %s -test-vector-transfer-full-partial-split | FileCheck %s +// RUN: mlir-opt %s -test-vector-transfer-full-partial-split=use-linalg-copy | FileCheck %s --check-prefix=LINALG // CHECK-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)> // CHECK-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)> // CHECK-DAG: #[[$map_2d_stride_1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// LINALG-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)> +// LINALG-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)> +// LINALG-DAG: #[[$map_2d_stride_1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> +// LINALG-DAG: #[[$map_2d_dynamic:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> +// LINALG-DAG: #[[$bounds_map_4:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 4)> +// LINALG-DAG: #[[$bounds_map_8:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 8)> + // CHECK-LABEL: split_vector_transfer_read_2d( // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref // CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index // CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index + +// LINALG-LABEL: split_vector_transfer_read_2d( +// LINALG-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// LINALG-SAME: %[[i:[a-zA-Z0-9]*]]: index +// LINALG-SAME: %[[j:[a-zA-Z0-9]*]]: index func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: index) -> vector<4x8xf32> { %c0 = constant 0 : index %f0 = constant 0.0 : f32 @@ -43,9 +56,45 @@ func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: index) - // CHECK: } // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %[[cst]] // CHECK_SAME: {masked = [false, false]} : memref, vector<4x8xf32> + + // LINALG-DAG: %[[c0:.*]] = constant 0 : index + // LINALG-DAG: %[[c1:.*]] = constant 1 : index + // LINALG-DAG: %[[c4:.*]] = constant 4 : index + // LINALG-DAG: %[[c8:.*]] = constant 8 : index + // LINALG-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32 + // alloca for boundary full tile + // LINALG: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32> + // %i + 4 <= dim(%A, 0) + // LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] + // LINALG: %[[d0:.*]] = dim %[[A]], %[[c0]] : memref + // LINALG: %[[cmp0:.*]] = cmpi "sle", %[[idx0]], %[[d0]] : index + // %j + 8 <= dim(%A, 1) + // LINALG: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] + // LINALG: %[[cmp1:.*]] = cmpi "sle", %[[idx1]], %[[c8]] : index + // are both conds true + // LINALG: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1 + // LINALG: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref, index, index) { + // inBounds, just yield %A + // LINALG: scf.yield %[[A]], %[[i]], %[[j]] : memref, index, index + // LINALG: } else { + // slow path, fill tmp alloc and yield a memref_casted version of it + // LINALG: linalg.fill(%[[alloc]], %[[cst]]) : memref<4x8xf32>, f32 + // LINALG: %[[d0:.*]] = dim %[[A]], %[[c0]] : memref + // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]]) + // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) + // LINALG: %[[sv:.*]] = subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [%[[c1]], %[[c1]]] + // LINALG-SAME: memref to memref + // LINALG: linalg.copy(%[[sv]], %[[alloc]]) : memref, memref<4x8xf32> + // LINALG: %[[yielded:.*]] = memref_cast %[[alloc]] : + // LINALG-SAME: memref<4x8xf32> to memref + // LINALG: scf.yield %[[yielded]], %[[c0]], %[[c0]] : + // LINALG-SAME: memref, index, index + // LINALG: } + // LINALG: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %[[cst]] + // LINALG_SAME: {masked = [false, false]} : memref, vector<4x8xf32> %1 = vector.transfer_read %A[%i, %j], %f0 : memref, vector<4x8xf32> - // CHECK: return %[[res]] : vector<4x8xf32> + // LINALG: return %[[res]] : vector<4x8xf32> return %1: vector<4x8xf32> } @@ -53,6 +102,11 @@ func @split_vector_transfer_read_2d(%A: memref, %i: index, %j: index) - // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref // CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index // CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index + +// LINALG-LABEL: split_vector_transfer_read_strided_2d( +// LINALG-SAME: %[[A:[a-zA-Z0-9]*]]: memref +// LINALG-SAME: %[[i:[a-zA-Z0-9]*]]: index +// LINALG-SAME: %[[j:[a-zA-Z0-9]*]]: index func @split_vector_transfer_read_strided_2d( %A: memref<7x8xf32, offset:?, strides:[?, 1]>, %i: index, %j: index) -> vector<4x8xf32> { @@ -94,6 +148,44 @@ func @split_vector_transfer_read_strided_2d( // CHECK: } // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {masked = [false, false]} : // CHECK-SAME: memref, vector<4x8xf32> + + // LINALG-DAG: %[[c0:.*]] = constant 0 : index + // LINALG-DAG: %[[c1:.*]] = constant 1 : index + // LINALG-DAG: %[[c4:.*]] = constant 4 : index + // LINALG-DAG: %[[c7:.*]] = constant 7 : index + // LINALG-DAG: %[[c8:.*]] = constant 8 : index + // LINALG-DAG: %[[cst:.*]] = constant 0.000000e+00 : f32 + // alloca for boundary full tile + // LINALG: %[[alloc:.*]] = alloca() {alignment = 32 : i64} : memref<4x8xf32> + // %i + 4 <= dim(%A, 0) + // LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] + // LINALG: %[[cmp0:.*]] = cmpi "sle", %[[idx0]], %[[c7]] : index + // %j + 8 <= dim(%A, 1) + // LINALG: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] + // LINALG: %[[cmp1:.*]] = cmpi "sle", %[[idx1]], %[[c8]] : index + // are both conds true + // LINALG: %[[cond:.*]] = and %[[cmp0]], %[[cmp1]] : i1 + // LINALG: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref, index, index) { + // inBounds but not cast-compatible: yield a memref_casted form of %A + // LINALG: %[[casted:.*]] = memref_cast %arg0 : + // LINALG-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]> to memref + // LINALG: scf.yield %[[casted]], %[[i]], %[[j]] : + // LINALG-SAME: memref, index, index + // LINALG: } else { + // slow path, fill tmp alloc and yield a memref_casted version of it + // LINALG: linalg.fill(%[[alloc]], %[[cst]]) : memref<4x8xf32>, f32 + // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]]) + // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) + // LINALG: %[[sv:.*]] = subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [%[[c1]], %[[c1]]] + // LINALG-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]> to memref + // LINALG: linalg.copy(%[[sv]], %[[alloc]]) : memref, memref<4x8xf32> + // LINALG: %[[yielded:.*]] = memref_cast %[[alloc]] : + // LINALG-SAME: memref<4x8xf32> to memref + // LINALG: scf.yield %[[yielded]], %[[c0]], %[[c0]] : + // LINALG-SAME: memref, index, index + // LINALG: } + // LINALG: %[[res:.*]] = vector.transfer_read {{.*}} {masked = [false, false]} : + // LINALG-SAME: memref, vector<4x8xf32> %1 = vector.transfer_read %A[%i, %j], %f0 : memref<7x8xf32, offset:?, strides:[?, 1]>, vector<4x8xf32> diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp index 0bba74e76385e..9da3156d53593 100644 --- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp +++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp @@ -125,10 +125,23 @@ struct TestVectorUnrollingPatterns struct TestVectorTransferFullPartialSplitPatterns : public PassWrapper { + TestVectorTransferFullPartialSplitPatterns() = default; + TestVectorTransferFullPartialSplitPatterns( + const TestVectorTransferFullPartialSplitPatterns &pass) {} + Option useLinalgOps{ + *this, "use-linalg-copy", + llvm::cl::desc("Split using a unmasked vector.transfer + linalg.fill + " + "linalg.copy operations."), + llvm::cl::init(false)}; void runOnFunction() override { MLIRContext *ctx = &getContext(); OwningRewritePatternList patterns; - patterns.insert(ctx); + VectorTransformsOptions options; + if (useLinalgOps) + options.setVectorTransferSplit(VectorTransferSplit::LinalgCopy); + else + options.setVectorTransferSplit(VectorTransferSplit::VectorTransfer); + patterns.insert(ctx, options); applyPatternsAndFoldGreedily(getFunction(), patterns); } }; From 98827feddb90b8d8bfeb3c85f7801ee411bab2cd Mon Sep 17 00:00:00 2001 From: Russell Gallop Date: Tue, 4 Aug 2020 10:14:31 +0100 Subject: [PATCH 306/600] [lit] Add --time-trace-output to lit This produces a chrome://tracing compatible trace file in the same way as -ftime-trace. This can be useful in optimising test time where one long test is causing long overall test time on a wide machine. This also helped in finding tests which have side effects on others (e.g. https://reviews.llvm.org/D84885). Differential Revision: https://reviews.llvm.org/D84931 --- llvm/utils/lit/lit/Test.py | 2 ++ llvm/utils/lit/lit/cl_arguments.py | 5 ++++- llvm/utils/lit/lit/reports.py | 32 ++++++++++++++++++++++++++++++ llvm/utils/lit/lit/worker.py | 3 +++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/llvm/utils/lit/lit/Test.py b/llvm/utils/lit/lit/Test.py index a38ea4e7717a3..59fefbc7f0891 100644 --- a/llvm/utils/lit/lit/Test.py +++ b/llvm/utils/lit/lit/Test.py @@ -150,6 +150,8 @@ def __init__(self, code, output='', elapsed=None): self.output = output # The wall timing to execute the test, if timing. self.elapsed = elapsed + self.start = None + self.pid = None # The metrics reported by this test. self.metrics = {} # The micro-test results reported by this test. diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py index baeb3635298f5..69166e00aba8c 100644 --- a/llvm/utils/lit/lit/cl_arguments.py +++ b/llvm/utils/lit/lit/cl_arguments.py @@ -109,6 +109,9 @@ def parse_args(): execution_group.add_argument("--xunit-xml-output", type=lit.reports.XunitReport, help="Write XUnit-compatible XML test reports to the specified file") + execution_group.add_argument("--time-trace-output", + type=lit.reports.TimeTraceReport, + help="Write Chrome tracing compatible JSON to the specified file") execution_group.add_argument("--timeout", dest="maxIndividualTestTime", help="Maximum time to spend running a single test (in seconds). " @@ -195,7 +198,7 @@ def parse_args(): else: opts.shard = None - opts.reports = filter(None, [opts.output, opts.xunit_xml_output]) + opts.reports = filter(None, [opts.output, opts.xunit_xml_output, opts.time_trace_output]) return opts diff --git a/llvm/utils/lit/lit/reports.py b/llvm/utils/lit/lit/reports.py index 3ce961b44029e..b43f77911673c 100755 --- a/llvm/utils/lit/lit/reports.py +++ b/llvm/utils/lit/lit/reports.py @@ -136,3 +136,35 @@ def _get_skip_reason(self, test): if features: return 'Missing required feature(s): ' + ', '.join(features) return 'Unsupported configuration' + + +class TimeTraceReport(object): + def __init__(self, output_file): + self.output_file = output_file + self.skipped_codes = {lit.Test.EXCLUDED, + lit.Test.SKIPPED, lit.Test.UNSUPPORTED} + + def write_results(self, tests, elapsed): + # Find when first test started so we can make start times relative. + first_start_time = min([t.result.start for t in tests]) + events = [self._get_test_event( + x, first_start_time) for x in tests if x.result.code not in self.skipped_codes] + + json_data = {'traceEvents': events} + + with open(self.output_file, "w") as time_trace_file: + json.dump(json_data, time_trace_file, indent=2, sort_keys=True) + + def _get_test_event(self, test, first_start_time): + test_name = test.getFullName() + elapsed_time = test.result.elapsed or 0.0 + start_time = test.result.start - first_start_time if test.result.start else 0.0 + pid = test.result.pid or 0 + return { + 'pid': pid, + 'tid': 1, + 'ph': 'X', + 'ts': int(start_time * 1000000.), + 'dur': int(elapsed_time * 1000000.), + 'name': test_name, + } diff --git a/llvm/utils/lit/lit/worker.py b/llvm/utils/lit/lit/worker.py index 04fc77239e025..ba9b919f50ebb 100644 --- a/llvm/utils/lit/lit/worker.py +++ b/llvm/utils/lit/lit/worker.py @@ -6,6 +6,7 @@ and store it in global variables. This reduces the cost of each task. """ import contextlib +import os import signal import time import traceback @@ -65,6 +66,8 @@ def _execute(test, lit_config): start = time.time() result = _execute_test_handle_errors(test, lit_config) result.elapsed = time.time() - start + result.start = start + result.pid = os.getpid() return result From 36750ba5bd0e9e72120dbfaab4166baafd89e98a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 12:35:46 +0100 Subject: [PATCH 307/600] [X86][AVX] isHorizontalBinOp - relax lane-crossing limits for AVX1-only targets. Permit lane-crossing post shuffles on AVX1 targets as long as every element comes from the same source lane, which for v8f32/v4f64 cases can be efficiently lowered with the LowerShuffleAsLanePermuteAnd* style methods. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 36 +++++++++++++++++++++---- llvm/test/CodeGen/X86/haddsub-4.ll | 6 ++--- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 79047c90ff999..1f59cd820ad7e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10661,6 +10661,35 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef Mask) { return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask); } +/// Test whether elements in each LaneSizeInBits lane in this shuffle mask come +/// from multiple lanes - this is different to isLaneCrossingShuffleMask to +/// better support 'repeated mask + lane permute' style shuffles. +static bool isMultiLaneShuffleMask(unsigned LaneSizeInBits, + unsigned ScalarSizeInBits, + ArrayRef Mask) { + assert(LaneSizeInBits && ScalarSizeInBits && + (LaneSizeInBits % ScalarSizeInBits) == 0 && + "Illegal shuffle lane size"); + int NumElts = Mask.size(); + int NumEltsPerLane = LaneSizeInBits / ScalarSizeInBits; + int NumLanes = NumElts / NumEltsPerLane; + if (NumLanes > 1) { + for (int i = 0; i != NumLanes; ++i) { + int SrcLane = -1; + for (int j = 0; j != NumEltsPerLane; ++j) { + int M = Mask[(i * NumEltsPerLane) + j]; + if (M < 0) + continue; + int Lane = (M % NumElts) / NumEltsPerLane; + if (SrcLane >= 0 && SrcLane != Lane) + return true; + SrcLane = Lane; + } + } + } + return false; +} + /// Test whether a shuffle mask is equivalent within each sub-lane. /// /// This checks a shuffle mask to see if it is performing the same @@ -44598,12 +44627,9 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG, if (IsIdentityPostShuffle) PostShuffleMask.clear(); - // Avoid 128-bit lane crossing if pre-AVX2 and FP (integer will split), unless - // the shuffle can widen to shuffle entire lanes, which should still be quick. + // Avoid 128-bit multi lane shuffles if pre-AVX2 and FP (integer will split). if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() && - isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), - PostShuffleMask) && - !canScaleShuffleElements(PostShuffleMask, 2)) + isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask)) return false; // Assume a SingleSource HOP if we only shuffle one input and don't need to diff --git a/llvm/test/CodeGen/X86/haddsub-4.ll b/llvm/test/CodeGen/X86/haddsub-4.ll index 720b63431a24c..baa03d2591882 100644 --- a/llvm/test/CodeGen/X86/haddsub-4.ll +++ b/llvm/test/CodeGen/X86/haddsub-4.ll @@ -64,11 +64,9 @@ define <8 x float> @hadd_reverse_v8f32(<8 x float> %a0, <8 x float> %a1) { ; ; AVX1-LABEL: hadd_reverse_v8f32: ; AVX1: # %bb.0: -; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm0[3,1],ymm1[3,1],ymm0[7,5],ymm1[7,5] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[2,3,0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0],ymm1[2,0],ymm0[6,4],ymm1[6,4] +; AVX1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX1-NEXT: vaddps %ymm0, %ymm2, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: hadd_reverse_v8f32: From 051f293b7881116c0d4aae6e2ee0470b3ca58886 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 14:51:23 +0100 Subject: [PATCH 308/600] [X86] Remove unused canScaleShuffleElements helper The only use was removed at rG36750ba5bd0e9e72 Thanks to @nemanjai for the heads up --- llvm/lib/Target/X86/X86ISelLowering.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1f59cd820ad7e..df88bdad1d47f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5614,11 +5614,6 @@ static bool scaleShuffleElements(ArrayRef Mask, unsigned NumDstElts, return false; } -static bool canScaleShuffleElements(ArrayRef Mask, unsigned NumDstElts) { - SmallVector WidenedMask; - return scaleShuffleElements(Mask, NumDstElts, WidenedMask); -} - /// Returns true if Elt is a constant zero or a floating point constant +0.0. bool X86::isZeroNode(SDValue Elt) { return isNullConstant(Elt) || isNullFPConstant(Elt); From 1c0a0dfa0236514fd1fbb1bccd8ad29f9d64e915 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 4 Aug 2020 15:58:12 +0200 Subject: [PATCH 309/600] [Concepts] Include the found concept decl when dumping the ConceptSpecializationExpr Differential Revision: https://reviews.llvm.org/D85124 --- clang/include/clang/AST/TextNodeDumper.h | 1 + clang/lib/AST/TextNodeDumper.cpp | 6 ++++++ clang/test/AST/ast-dump-concepts.cpp | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 8e8084aec3c18..f68a5dbfc2a0d 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -295,6 +295,7 @@ class TextNodeDumper void VisitObjCIvarRefExpr(const ObjCIvarRefExpr *Node); void VisitObjCBoolLiteralExpr(const ObjCBoolLiteralExpr *Node); void VisitOMPIteratorExpr(const OMPIteratorExpr *Node); + void VisitConceptSpecializationExpr(const ConceptSpecializationExpr *Node); void VisitRValueReferenceType(const ReferenceType *T); void VisitArrayType(const ArrayType *T); diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 5b6c6085e02cf..3d47d5cb66d2e 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1340,6 +1340,12 @@ void TextNodeDumper::VisitOMPIteratorExpr(const OMPIteratorExpr *Node) { } } +void TextNodeDumper::VisitConceptSpecializationExpr( + const ConceptSpecializationExpr *Node) { + OS << " "; + dumpBareDeclRef(Node->getFoundDecl()); +} + void TextNodeDumper::VisitRValueReferenceType(const ReferenceType *T) { if (T->isSpelledAsLValue()) OS << " written as lvalue reference"; diff --git a/clang/test/AST/ast-dump-concepts.cpp b/clang/test/AST/ast-dump-concepts.cpp index 7050ee0fb4492..630a953976fc0 100644 --- a/clang/test/AST/ast-dump-concepts.cpp +++ b/clang/test/AST/ast-dump-concepts.cpp @@ -15,7 +15,7 @@ concept binary_concept = true; template struct Foo { // CHECK: TemplateTypeParmDecl {{.*}} referenced Concept {{.*}} 'binary_concept' - // CHECK-NEXT: |-ConceptSpecializationExpr {{.*}} 'bool' + // CHECK-NEXT: |-ConceptSpecializationExpr {{.*}} 'bool' Concept {{.*}} 'binary_concept' // CHECK-NEXT: `-TemplateArgument {{.*}} type 'int' template R> Foo(R); From 62a933b72c5b060bcb2c7332d05082f002d6c65a Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 4 Aug 2020 08:30:51 -0500 Subject: [PATCH 310/600] [Support][PPC] Fix bot failures due to cd53ded557c3 Commit https://reviews.llvm.org/rGcd53ded557c3 attempts to fix the computation in computeHostNumPhysicalCores() to respect Affinity. However, the GLIBC wrapper of the affinity system call fails with a default size of cpu_set_t on systems that have more than 1024 CPUs. This just fixes the computation on such large machines. --- llvm/lib/Support/Host.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index d3b255ae0f2ec..890b1178cbd0a 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1274,9 +1274,21 @@ int computeHostNumPhysicalCores() { #elif defined(__linux__) && defined(__powerpc__) int computeHostNumPhysicalCores() { cpu_set_t Affinity; - if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) - return -1; - return CPU_COUNT(&Affinity); + if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) + return CPU_COUNT(&Affinity); + + // The call to sched_getaffinity() may have failed because the Affinity + // mask is too small for the number of CPU's on the system (i.e. the + // system has more than 1024 CPUs). Allocate a mask large enough for + // twice as many CPUs. + cpu_set_t *DynAffinity; + DynAffinity = CPU_ALLOC(2048); + if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { + int NumCPUs = CPU_COUNT(DynAffinity); + CPU_FREE(DynAffinity); + return NumCPUs; + } + return -1; } #elif defined(__linux__) && defined(__s390x__) int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); } From 860cbbdd6b84017e6d37e1752b0358a05da6b115 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Thu, 23 Jul 2020 09:20:06 +0000 Subject: [PATCH 311/600] [SyntaxTree] Add support for `LiteralExpression` We use inheritance to model the grammar's disjunction rule: literal: integer-literal character-literal floating-point-literal string-literal boolean-literal pointer-literal user-defined-literal Differential Revision: https://reviews.llvm.org/D85186 --- clang/include/clang/Tooling/Syntax/Nodes.h | 57 ++++++++++++++-------- clang/lib/Tooling/Syntax/Nodes.cpp | 32 +----------- 2 files changed, 37 insertions(+), 52 deletions(-) diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h index d97b127638bba..8a873f9d5273b 100644 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -267,66 +267,82 @@ class ParenExpression final : public Expression { syntax::Leaf *closeParen(); }; +/// Expression for literals. C++ [lex.literal] +class LiteralExpression : public Expression { +public: + LiteralExpression(NodeKind K) : Expression(K) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::IntegerLiteralExpression || + N->kind() == NodeKind::CharacterLiteralExpression || + N->kind() == NodeKind::FloatingLiteralExpression || + N->kind() == NodeKind::StringLiteralExpression || + N->kind() == NodeKind::BoolLiteralExpression || + N->kind() == NodeKind::CxxNullPtrExpression || + N->kind() == NodeKind::IntegerUserDefinedLiteralExpression || + N->kind() == NodeKind::FloatUserDefinedLiteralExpression || + N->kind() == NodeKind::CharUserDefinedLiteralExpression || + N->kind() == NodeKind::StringUserDefinedLiteralExpression; + } + syntax::Leaf *literalToken(); +}; + /// Expression for integer literals. C++ [lex.icon] -class IntegerLiteralExpression final : public Expression { +class IntegerLiteralExpression final : public LiteralExpression { public: - IntegerLiteralExpression() : Expression(NodeKind::IntegerLiteralExpression) {} + IntegerLiteralExpression() + : LiteralExpression(NodeKind::IntegerLiteralExpression) {} static bool classof(const Node *N) { return N->kind() == NodeKind::IntegerLiteralExpression; } - syntax::Leaf *literalToken(); }; /// Expression for character literals. C++ [lex.ccon] -class CharacterLiteralExpression final : public Expression { +class CharacterLiteralExpression final : public LiteralExpression { public: CharacterLiteralExpression() - : Expression(NodeKind::CharacterLiteralExpression) {} + : LiteralExpression(NodeKind::CharacterLiteralExpression) {} static bool classof(const Node *N) { return N->kind() == NodeKind::CharacterLiteralExpression; } - syntax::Leaf *literalToken(); }; /// Expression for floating-point literals. C++ [lex.fcon] -class FloatingLiteralExpression final : public Expression { +class FloatingLiteralExpression final : public LiteralExpression { public: FloatingLiteralExpression() - : Expression(NodeKind::FloatingLiteralExpression) {} + : LiteralExpression(NodeKind::FloatingLiteralExpression) {} static bool classof(const Node *N) { return N->kind() == NodeKind::FloatingLiteralExpression; } - syntax::Leaf *literalToken(); }; /// Expression for string-literals. C++ [lex.string] -class StringLiteralExpression final : public Expression { +class StringLiteralExpression final : public LiteralExpression { public: - StringLiteralExpression() : Expression(NodeKind::StringLiteralExpression) {} + StringLiteralExpression() + : LiteralExpression(NodeKind::StringLiteralExpression) {} static bool classof(const Node *N) { return N->kind() == NodeKind::StringLiteralExpression; } - syntax::Leaf *literalToken(); }; /// Expression for boolean literals. C++ [lex.bool] -class BoolLiteralExpression final : public Expression { +class BoolLiteralExpression final : public LiteralExpression { public: - BoolLiteralExpression() : Expression(NodeKind::BoolLiteralExpression) {} + BoolLiteralExpression() + : LiteralExpression(NodeKind::BoolLiteralExpression) {} static bool classof(const Node *N) { return N->kind() == NodeKind::BoolLiteralExpression; } - syntax::Leaf *literalToken(); }; /// Expression for the `nullptr` literal. C++ [lex.nullptr] -class CxxNullPtrExpression final : public Expression { +class CxxNullPtrExpression final : public LiteralExpression { public: - CxxNullPtrExpression() : Expression(NodeKind::CxxNullPtrExpression) {} + CxxNullPtrExpression() : LiteralExpression(NodeKind::CxxNullPtrExpression) {} static bool classof(const Node *N) { return N->kind() == NodeKind::CxxNullPtrExpression; } - syntax::Leaf *nullPtrKeyword(); }; /// Expression for user-defined literal. C++ [lex.ext] @@ -335,16 +351,15 @@ class CxxNullPtrExpression final : public Expression { /// user-defined-floating-point-literal /// user-defined-string-literal /// user-defined-character-literal -class UserDefinedLiteralExpression : public Expression { +class UserDefinedLiteralExpression : public LiteralExpression { public: - UserDefinedLiteralExpression(NodeKind K) : Expression(K) {} + UserDefinedLiteralExpression(NodeKind K) : LiteralExpression(K) {} static bool classof(const Node *N) { return N->kind() == NodeKind::IntegerUserDefinedLiteralExpression || N->kind() == NodeKind::FloatUserDefinedLiteralExpression || N->kind() == NodeKind::CharUserDefinedLiteralExpression || N->kind() == NodeKind::StringUserDefinedLiteralExpression; } - syntax::Leaf *literalToken(); }; /// Expression for user-defined-integer-literal. C++ [lex.ext] diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 2435ae0a91dd6..eced68fa2443f 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -230,37 +230,7 @@ syntax::Leaf *syntax::ParenExpression::closeParen() { findChild(syntax::NodeRole::CloseParen)); } -syntax::Leaf *syntax::IntegerLiteralExpression::literalToken() { - return llvm::cast_or_null( - findChild(syntax::NodeRole::LiteralToken)); -} - -syntax::Leaf *syntax::CharacterLiteralExpression::literalToken() { - return llvm::cast_or_null( - findChild(syntax::NodeRole::LiteralToken)); -} - -syntax::Leaf *syntax::FloatingLiteralExpression::literalToken() { - return llvm::cast_or_null( - findChild(syntax::NodeRole::LiteralToken)); -} - -syntax::Leaf *syntax::StringLiteralExpression::literalToken() { - return llvm::cast_or_null( - findChild(syntax::NodeRole::LiteralToken)); -} - -syntax::Leaf *syntax::BoolLiteralExpression::literalToken() { - return llvm::cast_or_null( - findChild(syntax::NodeRole::LiteralToken)); -} - -syntax::Leaf *syntax::CxxNullPtrExpression::nullPtrKeyword() { - return llvm::cast_or_null( - findChild(syntax::NodeRole::LiteralToken)); -} - -syntax::Leaf *syntax::UserDefinedLiteralExpression::literalToken() { +syntax::Leaf *syntax::LiteralExpression::literalToken() { return llvm::cast_or_null( findChild(syntax::NodeRole::LiteralToken)); } From 2d0b05969bc01a2fda14b8dc3e8c26c81efe9c6f Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 4 Aug 2020 09:49:32 -0400 Subject: [PATCH 312/600] [mlir][Vector] Relax condition for `splitFullAndPartialTransferPrecondition` The `splitFullAndPartialTransferPrecondition` has a restrictive condition to prevent the pattern to be applied recursively if it is nested under an scf.IfOp. Relaxing the condition to the immediate parent op must not be an scf.IfOp lets the pattern be applied more generally while still preventing recursion. Differential Revision: https://reviews.llvm.org/D85209 --- mlir/lib/Dialect/Vector/VectorTransforms.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index 3c23c5a6d869d..33fbed65ace60 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -2049,10 +2049,10 @@ LogicalResult mlir::vector::splitFullAndPartialTransferPrecondition( // Must have some masked dimension to be a candidate for splitting. if (!xferOp.hasMaskedDim()) return failure(); - // Don't split transfer operations under IfOp, this avoids applying the - // pattern recursively. - // TODO: improve the condition to make it more applicable. - if (xferOp.getParentOfType()) + // Don't split transfer operations directly under IfOp, this avoids applying + // the pattern recursively. + // TODO: improve the filtering condition to make it more applicable. + if (isa(xferOp.getOperation()->getParentOp())) return failure(); return success(); } From 8ce15f7eeb122c0bba4b676d797217359dd57c30 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 3 Aug 2020 18:29:12 +0000 Subject: [PATCH 313/600] [SyntaxTree] Fix crash on pointer to member function Differential Revision: https://reviews.llvm.org/D85146 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 12 +++ clang/unittests/Tooling/Syntax/TreeTest.cpp | 93 +++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index 1f192180ec451..15b7c8fab1982 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -939,6 +939,8 @@ class BuildTreeVisitor : public RecursiveASTVisitor { return true; } + // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test + // results. Find test coverage or remove it. bool TraverseParenTypeLoc(ParenTypeLoc L) { // We reverse order of traversal to get the proper syntax structure. if (!WalkUpFromParenTypeLoc(L)) @@ -987,6 +989,16 @@ class BuildTreeVisitor : public RecursiveASTVisitor { return WalkUpFromFunctionTypeLoc(L); } + bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) { + // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds + // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to + // "(Y::*mp)" We thus reverse the order of traversal to get the proper + // syntax structure. + if (!WalkUpFromMemberPointerTypeLoc(L)) + return false; + return TraverseTypeLoc(L.getPointeeLoc()); + } + bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { auto SR = L.getLocalSourceRange(); Builder.foldNode(Builder.getRange(SR), diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index a722ca2b1a45a..3ccfabb95da90 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -4074,6 +4074,99 @@ const int X::* b; )txt")); } +TEST_P(SyntaxTreeTest, MemberFunctionPointer) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +struct X { + struct Y {}; +}; +void (X::*xp)(); +void (X::**xpp)(const int*); +// FIXME: Generate the right syntax tree for this type, +// i.e. create a syntax node for the outer member pointer +void (X::Y::*xyp)(const int*, char); +)cpp", + R"txt( +*: TranslationUnit +|-SimpleDeclaration +| |-struct +| |-X +| |-{ +| |-SimpleDeclaration +| | |-struct +| | |-Y +| | |-{ +| | |-} +| | `-; +| |-} +| `-; +|-SimpleDeclaration +| |-void +| |-SimpleDeclarator +| | |-ParenDeclarator +| | | |-( +| | | |-MemberPointer +| | | | |-X +| | | | |-:: +| | | | `-* +| | | |-xp +| | | `-) +| | `-ParametersAndQualifiers +| | |-( +| | `-) +| `-; +|-SimpleDeclaration +| |-void +| |-SimpleDeclarator +| | |-ParenDeclarator +| | | |-( +| | | |-MemberPointer +| | | | |-X +| | | | |-:: +| | | | `-* +| | | |-* +| | | |-xpp +| | | `-) +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | |-const +| | | |-int +| | | `-SimpleDeclarator +| | | `-* +| | `-) +| `-; +`-SimpleDeclaration + |-void + |-SimpleDeclarator + | |-ParenDeclarator + | | |-( + | | |-X + | | |-:: + | | |-MemberPointer + | | | |-Y + | | | |-:: + | | | `-* + | | |-xyp + | | `-) + | `-ParametersAndQualifiers + | |-( + | |-SimpleDeclaration + | | |-const + | | |-int + | | `-SimpleDeclarator + | | `-* + | |-, + | |-SimpleDeclaration + | | `-char + | `-) + `-; +)txt")); +} + TEST_P(SyntaxTreeTest, ComplexDeclarator) { EXPECT_TRUE(treeDumpEqual( R"cpp( From 961da69d7eafe44411d5ac9719209653d196f9e2 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Mon, 3 Aug 2020 10:54:50 -0700 Subject: [PATCH 314/600] Improve diagnostics for disallowed attributes used with multiversioning Since we permit using SOME attributes (at the moment, just 1) with multiversioning, we should improve the message as it still implies that no attributes should be combined with multiversioning. --- .../clang/Basic/DiagnosticSemaKinds.td | 4 +-- clang/lib/Sema/SemaDecl.cpp | 36 ++++++++++++------- clang/test/Sema/attr-cpuspecific.c | 4 +-- clang/test/Sema/attr-target-mv.c | 6 ++-- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 91112860a2d02..288e8232ca744 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10751,9 +10751,9 @@ def err_multiversion_duplicate : Error< "multiversioned function redeclarations require identical target attributes">; def err_multiversion_noproto : Error< "multiversioned function must have a prototype">; -def err_multiversion_no_other_attrs : Error< +def err_multiversion_disallowed_other_attr : Error< "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioning cannot be combined" - " with other attributes">; + " with attribute %1">; def err_multiversion_diff : Error< "multiversioned function declaration has a different %select{calling convention" "|return type|constexpr specification|inline specification|storage class|" diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 531c2801bf929..ba05b0d32cf4c 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -10037,23 +10037,37 @@ static bool AttrCompatibleWithMultiVersion(attr::Kind Kind, } } -static bool HasNonMultiVersionAttributes(const FunctionDecl *FD, - MultiVersionKind MVType) { +static bool checkNonMultiVersionCompatAttributes(Sema &S, + const FunctionDecl *FD, + const FunctionDecl *CausedFD, + MultiVersionKind MVType) { + bool IsCPUSpecificCPUDispatchMVType = + MVType == MultiVersionKind::CPUDispatch || + MVType == MultiVersionKind::CPUSpecific; + const auto Diagnose = [FD, CausedFD, IsCPUSpecificCPUDispatchMVType]( + Sema &S, const Attr *A) { + S.Diag(FD->getLocation(), diag::err_multiversion_disallowed_other_attr) + << IsCPUSpecificCPUDispatchMVType << A; + if (CausedFD) + S.Diag(CausedFD->getLocation(), diag::note_multiversioning_caused_here); + return true; + }; + for (const Attr *A : FD->attrs()) { switch (A->getKind()) { case attr::CPUDispatch: case attr::CPUSpecific: if (MVType != MultiVersionKind::CPUDispatch && MVType != MultiVersionKind::CPUSpecific) - return true; + return Diagnose(S, A); break; case attr::Target: if (MVType != MultiVersionKind::Target) - return true; + return Diagnose(S, A); break; default: if (!AttrCompatibleWithMultiVersion(A->getKind(), MVType)) - return true; + return Diagnose(S, A); break; } } @@ -10189,16 +10203,12 @@ static bool CheckMultiVersionAdditionalRules(Sema &S, const FunctionDecl *OldFD, // For now, disallow all other attributes. These should be opt-in, but // an analysis of all of them is a future FIXME. - if (CausesMV && OldFD && HasNonMultiVersionAttributes(OldFD, MVType)) { - S.Diag(OldFD->getLocation(), diag::err_multiversion_no_other_attrs) - << IsCPUSpecificCPUDispatchMVType; - S.Diag(NewFD->getLocation(), diag::note_multiversioning_caused_here); + if (CausesMV && OldFD && + checkNonMultiVersionCompatAttributes(S, OldFD, NewFD, MVType)) return true; - } - if (HasNonMultiVersionAttributes(NewFD, MVType)) - return S.Diag(NewFD->getLocation(), diag::err_multiversion_no_other_attrs) - << IsCPUSpecificCPUDispatchMVType; + if (checkNonMultiVersionCompatAttributes(S, NewFD, nullptr, MVType)) + return true; // Only allow transition to MultiVersion if it hasn't been used. if (OldFD && CausesMV && OldFD->isUsed(false)) diff --git a/clang/test/Sema/attr-cpuspecific.c b/clang/test/Sema/attr-cpuspecific.c index ae86742ca0810..e32c7a22894d6 100644 --- a/clang/test/Sema/attr-cpuspecific.c +++ b/clang/test/Sema/attr-cpuspecific.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsyntax-only -verify %s -Wnonnull void __attribute__((cpu_specific(ivybridge))) no_default(void); void __attribute__((cpu_specific(sandybridge))) no_default(void); @@ -80,7 +80,7 @@ int __attribute((cpu_dispatch())) no_dispatch(void) {} // expected-error@+1 {{'cpu_specific' attribute takes at least 1 argument}} int __attribute((cpu_specific())) no_specific(void) {} -//expected-error@+1 {{attribute 'cpu_specific' multiversioning cannot be combined}} +//expected-error@+1 {{attribute 'cpu_specific' multiversioning cannot be combined with attribute 'used'}} void __attribute__((used,cpu_specific(sandybridge))) addtl_attrs(void); void __attribute__((target("default"))) addtl_attrs2(void); diff --git a/clang/test/Sema/attr-target-mv.c b/clang/test/Sema/attr-target-mv.c index e9156a6c73e73..33a2c4fa54ebf 100644 --- a/clang/test/Sema/attr-target-mv.c +++ b/clang/test/Sema/attr-target-mv.c @@ -77,14 +77,14 @@ int prev_no_target2(void); int __attribute__((target("arch=ivybridge"))) prev_no_target2(void); void __attribute__((target("sse4.2"))) addtl_attrs(void); -//expected-error@+2 {{attribute 'target' multiversioning cannot be combined}} +//expected-error@+2 {{attribute 'target' multiversioning cannot be combined with attribute 'no_caller_saved_registers'}} void __attribute__((no_caller_saved_registers,target("arch=sandybridge"))) addtl_attrs(void); -//expected-error@+1 {{attribute 'target' multiversioning cannot be combined}} +//expected-error@+1 {{attribute 'target' multiversioning cannot be combined with attribute 'no_caller_saved_registers'}} void __attribute__((target("default"), no_caller_saved_registers)) addtl_attrs2(void); -//expected-error@+2 {{attribute 'target' multiversioning cannot be combined}} +//expected-error@+2 {{attribute 'target' multiversioning cannot be combined with attribute 'no_caller_saved_registers'}} //expected-note@+2 {{function multiversioning caused by this declaration}} void __attribute__((no_caller_saved_registers,target("sse4.2"))) addtl_attrs3(void); void __attribute__((target("arch=sandybridge"))) addtl_attrs3(void); From 0a8ac91a084504929b1ef4ec1fee693455bd796d Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Tue, 4 Aug 2020 06:28:29 -0700 Subject: [PATCH 315/600] Permit nowthrow and nonnull with multiversioning. Some shipped versions of stdlib.h use nonnull and nothrow with function multiversioning. Support these, as they are generally harmless. --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 +++ clang/lib/Sema/SemaDecl.cpp | 7 +++++-- clang/test/Sema/attr-cpuspecific.c | 2 ++ clang/test/Sema/attr-target-mv.c | 7 +++++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 288e8232ca744..054b81c4a72b5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10754,6 +10754,9 @@ def err_multiversion_noproto : Error< def err_multiversion_disallowed_other_attr : Error< "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioning cannot be combined" " with attribute %1">; +def err_multiversion_mismatched_attrs + : Error<"attributes on multiversioned functions must all match, attribute " + "%0 %select{is missing|has different arguments}1">; def err_multiversion_diff : Error< "multiversioned function declaration has a different %select{calling convention" "|return type|constexpr specification|inline specification|storage class|" diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index ba05b0d32cf4c..77e15f187e538 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -10029,11 +10029,16 @@ static bool CheckMultiVersionValue(Sema &S, const FunctionDecl *FD) { // multiversion functions. static bool AttrCompatibleWithMultiVersion(attr::Kind Kind, MultiVersionKind MVType) { + // Note: this list/diagnosis must match the list in + // checkMultiversionAttributesAllSame. switch (Kind) { default: return false; case attr::Used: return MVType == MultiVersionKind::Target; + case attr::NonNull: + case attr::NoThrow: + return true; } } @@ -10201,8 +10206,6 @@ static bool CheckMultiVersionAdditionalRules(Sema &S, const FunctionDecl *OldFD, MVType == MultiVersionKind::CPUDispatch || MVType == MultiVersionKind::CPUSpecific; - // For now, disallow all other attributes. These should be opt-in, but - // an analysis of all of them is a future FIXME. if (CausesMV && OldFD && checkNonMultiVersionCompatAttributes(S, OldFD, NewFD, MVType)) return true; diff --git a/clang/test/Sema/attr-cpuspecific.c b/clang/test/Sema/attr-cpuspecific.c index e32c7a22894d6..9cfeef8a23562 100644 --- a/clang/test/Sema/attr-cpuspecific.c +++ b/clang/test/Sema/attr-cpuspecific.c @@ -115,3 +115,5 @@ int use3(void) { // expected-warning@+1 {{CPU list contains duplicate entries; attribute ignored}} int __attribute__((cpu_dispatch(pentium_iii, pentium_iii_no_xmm_regs))) dupe_p3(void); + +void __attribute__((cpu_specific(atom), nothrow, nonnull(1))) addtl_attrs(int*); diff --git a/clang/test/Sema/attr-target-mv.c b/clang/test/Sema/attr-target-mv.c index 33a2c4fa54ebf..3f072b19083f3 100644 --- a/clang/test/Sema/attr-target-mv.c +++ b/clang/test/Sema/attr-target-mv.c @@ -101,3 +101,10 @@ __vectorcall int __attribute__((target("arch=sandybridge"))) diff_cc(void); int __attribute__((target("sse4.2"))) diff_ret(void); // expected-error@+1 {{multiversioned function declaration has a different return type}} short __attribute__((target("arch=sandybridge"))) diff_ret(void); + +void __attribute__((target("sse4.2"), nothrow, used, nonnull(1))) addtl_attrs5(int*); +void __attribute__((target("arch=sandybridge"))) addtl_attrs5(int*); + +void __attribute__((target("sse4.2"))) addtl_attrs6(int*); +void __attribute__((target("arch=sandybridge"), nothrow, used, nonnull)) addtl_attrs6(int*); + From feb9d8bd8e60100b29f8a065995e14a16c871914 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 15:52:09 +0100 Subject: [PATCH 316/600] Fix sphinx indentation warning. Don't double indent and make it clear we're referting to the latency mode. --- llvm/docs/CommandGuide/llvm-exegesis.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst index 8cc1a237e9969..c5756f652bee5 100644 --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -196,7 +196,7 @@ OPTIONS `latency` mode can be make use of either RDTSC or LBR. `latency[LBR]` is only available on X86 (at least `Skylake`). - To run in this mode, a positive value must be specified for `x86-lbr-sample-period` and `--repetition-mode=loop` + To run in `latency` mode, a positive value must be specified for `x86-lbr-sample-period` and `--repetition-mode=loop`. In `analysis` mode, you also need to specify at least one of the `-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`. From 6f0da46d538ec3003584cc6ea4419f4403f19771 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 15:57:31 +0100 Subject: [PATCH 317/600] [X86] getFauxShuffleMask - drop unnecessary computeKnownBits OR(X,Y) shuffle decoding. Now that rG47cea9e82dda941e lets us aggressively decode multi-use shuffles for the OR(SHUFFLE(),SHUFFLE()) case we don't need the computeKnownBits variant any more. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 34 ------------------------- 1 file changed, 34 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index df88bdad1d47f..25bd7609fc54c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7360,40 +7360,6 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, return true; } case ISD::OR: { - // Inspect each operand at the byte level. We can merge these into a - // blend shuffle mask if for each byte at least one is masked out (zero). - KnownBits Known0 = - DAG.computeKnownBits(N.getOperand(0), DemandedElts, Depth + 1); - KnownBits Known1 = - DAG.computeKnownBits(N.getOperand(1), DemandedElts, Depth + 1); - if (Known0.One.isNullValue() && Known1.One.isNullValue()) { - bool IsByteMask = true; - APInt ZeroMask = APInt::getNullValue(NumBytesPerElt); - APInt SelectMask = APInt::getNullValue(NumBytesPerElt); - for (unsigned i = 0; i != NumBytesPerElt && IsByteMask; ++i) { - unsigned LHS = Known0.Zero.extractBits(8, i * 8).getZExtValue(); - unsigned RHS = Known1.Zero.extractBits(8, i * 8).getZExtValue(); - if (LHS == 255 && RHS == 0) - SelectMask.setBit(i); - else if (LHS == 255 && RHS == 255) - ZeroMask.setBit(i); - else if (!(LHS == 0 && RHS == 255)) - IsByteMask = false; - } - if (IsByteMask) { - for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) { - for (unsigned j = 0; j != NumBytesPerElt; ++j) { - unsigned Ofs = (SelectMask[j] ? NumSizeInBytes : 0); - int Idx = (ZeroMask[j] ? (int)SM_SentinelZero : (i + j + Ofs)); - Mask.push_back(Idx); - } - } - Ops.push_back(N.getOperand(0)); - Ops.push_back(N.getOperand(1)); - return true; - } - } - // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other // is a valid shuffle index. SDValue N0 = peekThroughBitcasts(N.getOperand(0)); From 6e727551b9db76dd209a34e312dd44faae3843b6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 16:12:50 +0100 Subject: [PATCH 318/600] Fix sphinx indentation warning to stop newline in byref section html output. --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index af93a6ed5c56e..4624e8042e16f 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1087,7 +1087,7 @@ Currently, only the following parameter attributes are defined: ``byval``. If the alignment is not specified, then the code generator makes a target-specific assumption. - This is intended for representing ABI constraints, and is not + This is intended for representing ABI constraints, and is not intended to be inferred for optimization use. .. _attr_preallocated: From ee75cf36bb1790a51cd1fd7c022b0ece101eb248 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 24 Jul 2020 11:41:30 +0100 Subject: [PATCH 319/600] [AMDGPU] Generate frem test checks Differential Revision: https://reviews.llvm.org/D84515 --- llvm/test/CodeGen/AMDGPU/frem.ll | 1022 ++++++++++++++++++++++++++++-- 1 file changed, 980 insertions(+), 42 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 445b726293089..aef979f7d618d 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -1,21 +1,114 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=+mad-mac-f32-insts -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -amdgpu-scalarize-global-loads=false -enable-misched=0 -march=amdgcn -mattr=+mad-mac-f32-insts -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -enable-misched=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -amdgpu-scalarize-global-loads=false -enable-misched=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s -; FUNC-LABEL: {{^}}frem_f32: -; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}} -; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16 -; GCN: v_div_scale_f32 - -; GCN: v_rcp_f32_e32 -; GCN: v_fma_f32 -; GCN: v_mul_f32_e32 -; GCN: v_div_fmas_f32 -; GCN: v_div_fixup_f32 -; GCN: v_trunc_f32_e32 v{{[0-9]+}}, v{{[0-9]+}} -; GCN: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GCN: s_endpgm define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, +; SI-LABEL: frem_f32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; SI-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:16 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 +; SI-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; SI-NEXT: v_rcp_f32_e32 v4, v3 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; SI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; SI-NEXT: v_fma_f32 v4, v5, v4, v4 +; SI-NEXT: v_mul_f32_e32 v5, v2, v4 +; SI-NEXT: v_fma_f32 v6, -v3, v5, v2 +; SI-NEXT: v_fma_f32 v5, v6, v4, v5 +; SI-NEXT: v_fma_f32 v2, -v3, v5, v2 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; SI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; SI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_mad_f32 v0, -v2, v1, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: frem_f32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s11, 0xf000 +; CI-NEXT: s_mov_b32 s10, -1 +; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s4 +; CI-NEXT: s_mov_b32 s9, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s10 +; CI-NEXT: s_mov_b32 s7, s11 +; CI-NEXT: s_mov_b32 s3, s11 +; CI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:16 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_div_scale_f32 v3, s[0:1], v1, v1, v0 +; CI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 +; CI-NEXT: v_rcp_f32_e32 v4, v3 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; CI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; CI-NEXT: v_fma_f32 v4, v5, v4, v4 +; CI-NEXT: v_mul_f32_e32 v5, v2, v4 +; CI-NEXT: v_fma_f32 v6, -v3, v5, v2 +; CI-NEXT: v_fma_f32 v5, v6, v4, v5 +; CI-NEXT: v_fma_f32 v2, -v3, v5, v2 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; CI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; CI-NEXT: v_trunc_f32_e32 v2, v2 +; CI-NEXT: v_mad_f32 v0, -v2, v1, v0 +; CI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: frem_f32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: s_add_u32 s0, s0, 16 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: flat_load_dword v4, v[2:3] +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_div_scale_f32 v5, s[0:1], v2, v2, v4 +; VI-NEXT: v_div_scale_f32 v3, vcc, v4, v2, v4 +; VI-NEXT: v_rcp_f32_e32 v6, v5 +; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; VI-NEXT: v_fma_f32 v7, -v5, v6, 1.0 +; VI-NEXT: v_fma_f32 v6, v7, v6, v6 +; VI-NEXT: v_mul_f32_e32 v7, v3, v6 +; VI-NEXT: v_fma_f32 v8, -v5, v7, v3 +; VI-NEXT: v_fma_f32 v7, v8, v6, v7 +; VI-NEXT: v_fma_f32 v3, -v5, v7, v3 +; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; VI-NEXT: v_div_fmas_f32 v3, v3, v6, v7 +; VI-NEXT: v_div_fixup_f32 v3, v3, v2, v4 +; VI-NEXT: v_trunc_f32_e32 v3, v3 +; VI-NEXT: v_mad_f32 v2, -v3, v2, v4 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm float addrspace(1)* %in2) #0 { %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4 %r0 = load float, float addrspace(1)* %in1, align 4 @@ -25,15 +118,79 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1) ret void } -; FUNC-LABEL: {{^}}unsafe_frem_f32: -; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16 -; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}} -; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]] -; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[X]], [[INVY]] -; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]] -; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]] -; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, +; SI-LABEL: unsafe_frem_f32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; SI-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:16 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_rcp_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_mad_f32 v0, -v2, v1, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: unsafe_frem_f32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s11, 0xf000 +; CI-NEXT: s_mov_b32 s10, -1 +; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s4 +; CI-NEXT: s_mov_b32 s9, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s10 +; CI-NEXT: s_mov_b32 s7, s11 +; CI-NEXT: s_mov_b32 s3, s11 +; CI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:16 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_rcp_f32_e32 v2, v1 +; CI-NEXT: v_mul_f32_e32 v2, v0, v2 +; CI-NEXT: v_trunc_f32_e32 v2, v2 +; CI-NEXT: v_mad_f32 v0, -v2, v1, v0 +; CI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: unsafe_frem_f32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: s_add_u32 s0, s0, 16 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: flat_load_dword v4, v[2:3] +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_rcp_f32_e32 v3, v2 +; VI-NEXT: v_mul_f32_e32 v3, v4, v3 +; VI-NEXT: v_trunc_f32_e32 v3, v3 +; VI-NEXT: v_mad_f32 v2, -v3, v2, v4 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm float addrspace(1)* %in2) #1 { %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4 %r0 = load float, float addrspace(1)* %in1, align 4 @@ -43,18 +200,129 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs ret void } -; FUNC-LABEL: {{^}}frem_f64: -; GCN: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 -; GCN: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 -; GCN-DAG: v_div_fmas_f64 -; GCN-DAG: v_div_scale_f64 -; GCN-DAG: v_mul_f64 -; CI: v_trunc_f64_e32 -; CI: v_mul_f64 -; GCN: v_add_f64 -; GCN: buffer_store_dwordx2 -; GCN: s_endpgm define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +; SI-LABEL: frem_f64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, s8 +; SI-NEXT: s_mov_b32 s5, s9 +; SI-NEXT: s_mov_b32 s0, s10 +; SI-NEXT: s_mov_b32 s1, s11 +; SI-NEXT: s_mov_b32 s2, s6 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: s_mov_b32 s14, s6 +; SI-NEXT: s_mov_b32 s15, s7 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[12:15], 0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_div_scale_f64 v[4:5], s[0:1], v[2:3], v[2:3], v[0:1] +; SI-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] +; SI-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; SI-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] +; SI-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; SI-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] +; SI-NEXT: v_div_scale_f64 v[8:9], s[0:1], v[0:1], v[2:3], v[0:1] +; SI-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] +; SI-NEXT: v_fma_f64 v[12:13], -v[4:5], v[10:11], v[8:9] +; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9 +; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc +; SI-NEXT: s_nop 0 +; SI-NEXT: s_nop 0 +; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11] +; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1] +; SI-NEXT: v_bfe_u32 v6, v5, 20, 11 +; SI-NEXT: v_add_i32_e32 v8, vcc, 0xfffffc01, v6 +; SI-NEXT: s_mov_b32 s1, 0xfffff +; SI-NEXT: s_mov_b32 s0, s6 +; SI-NEXT: v_lshr_b64 v[6:7], s[0:1], v8 +; SI-NEXT: v_not_b32_e32 v6, v6 +; SI-NEXT: v_and_b32_e32 v6, v4, v6 +; SI-NEXT: v_not_b32_e32 v7, v7 +; SI-NEXT: v_and_b32_e32 v7, v5, v7 +; SI-NEXT: v_and_b32_e32 v9, 0x80000000, v5 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v8 +; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v8 +; SI-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[0:1] +; SI-NEXT: v_mul_f64 v[2:3], v[4:5], v[2:3] +; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: frem_f64: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s11, 0xf000 +; CI-NEXT: s_mov_b32 s10, -1 +; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s4 +; CI-NEXT: s_mov_b32 s9, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s10 +; CI-NEXT: s_mov_b32 s7, s11 +; CI-NEXT: s_mov_b32 s3, s11 +; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_div_scale_f64 v[4:5], s[0:1], v[2:3], v[2:3], v[0:1] +; CI-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] +; CI-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; CI-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] +; CI-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; CI-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] +; CI-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1] +; CI-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] +; CI-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9] +; CI-NEXT: s_nop 1 +; CI-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11] +; CI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1] +; CI-NEXT: v_trunc_f64_e32 v[4:5], v[4:5] +; CI-NEXT: v_mul_f64 v[2:3], v[4:5], v[2:3] +; CI-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: frem_f64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] +; VI-NEXT: flat_load_dwordx2 v[4:5], v[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_div_scale_f64 v[6:7], s[0:1], v[4:5], v[4:5], v[2:3] +; VI-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] +; VI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 +; VI-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] +; VI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 +; VI-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] +; VI-NEXT: v_div_scale_f64 v[10:11], vcc, v[2:3], v[4:5], v[2:3] +; VI-NEXT: v_mul_f64 v[12:13], v[10:11], v[8:9] +; VI-NEXT: v_fma_f64 v[6:7], -v[6:7], v[12:13], v[10:11] +; VI-NEXT: s_nop 1 +; VI-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13] +; VI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[2:3] +; VI-NEXT: v_trunc_f64_e32 v[6:7], v[6:7] +; VI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] +; VI-NEXT: v_add_f64 v[2:3], v[2:3], -v[4:5] +; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-NEXT: s_endpgm double addrspace(1)* %in2) #0 { %r0 = load double, double addrspace(1)* %in1, align 8 %r1 = load double, double addrspace(1)* %in2, align 8 @@ -63,14 +331,95 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( ret void } -; FUNC-LABEL: {{^}}unsafe_frem_f64: -; GCN: v_rcp_f64_e32 -; GCN: v_mul_f64 -; SI: v_bfe_u32 -; CI: v_trunc_f64_e32 -; GCN: v_fma_f64 -; GCN: s_endpgm define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +; SI-LABEL: unsafe_frem_f64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s15, 0xf000 +; SI-NEXT: s_mov_b32 s14, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s4 +; SI-NEXT: s_mov_b32 s13, s5 +; SI-NEXT: s_mov_b32 s0, s6 +; SI-NEXT: s_mov_b32 s1, s7 +; SI-NEXT: s_mov_b32 s2, s14 +; SI-NEXT: s_mov_b32 s3, s15 +; SI-NEXT: s_mov_b32 s10, s14 +; SI-NEXT: s_mov_b32 s11, s15 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[8:11], 0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] +; SI-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] +; SI-NEXT: v_bfe_u32 v6, v5, 20, 11 +; SI-NEXT: v_add_i32_e32 v8, vcc, 0xfffffc01, v6 +; SI-NEXT: s_mov_b32 s1, 0xfffff +; SI-NEXT: s_mov_b32 s0, s14 +; SI-NEXT: v_lshr_b64 v[6:7], s[0:1], v8 +; SI-NEXT: v_not_b32_e32 v6, v6 +; SI-NEXT: v_and_b32_e32 v6, v4, v6 +; SI-NEXT: v_not_b32_e32 v7, v7 +; SI-NEXT: v_and_b32_e32 v7, v5, v7 +; SI-NEXT: v_and_b32_e32 v9, 0x80000000, v5 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v8 +; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v8 +; SI-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[0:1] +; SI-NEXT: v_mul_f64 v[2:3], v[4:5], v[2:3] +; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[12:15], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: unsafe_frem_f64: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s11, 0xf000 +; CI-NEXT: s_mov_b32 s10, -1 +; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s4 +; CI-NEXT: s_mov_b32 s9, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s10 +; CI-NEXT: s_mov_b32 s7, s11 +; CI-NEXT: s_mov_b32 s3, s11 +; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] +; CI-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] +; CI-NEXT: v_trunc_f64_e32 v[4:5], v[4:5] +; CI-NEXT: v_mul_f64 v[2:3], v[4:5], v[2:3] +; CI-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: unsafe_frem_f64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] +; VI-NEXT: flat_load_dwordx2 v[4:5], v[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] +; VI-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7] +; VI-NEXT: v_trunc_f64_e32 v[6:7], v[6:7] +; VI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] +; VI-NEXT: v_add_f64 v[2:3], v[2:3], -v[4:5] +; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-NEXT: s_endpgm double addrspace(1)* %in2) #1 { %r0 = load double, double addrspace(1)* %in1, align 8 %r1 = load double, double addrspace(1)* %in2, align 8 @@ -80,6 +429,162 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add } define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1, +; SI-LABEL: frem_v2f32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[8:11], 0 offset:32 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_div_scale_f32 v4, vcc, v1, v3, v1 +; SI-NEXT: v_div_scale_f32 v5, s[4:5], v3, v3, v1 +; SI-NEXT: v_rcp_f32_e32 v6, v5 +; SI-NEXT: s_mov_b32 s6, 3 +; SI-NEXT: s_mov_b32 s7, 0 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v7, -v5, v6, 1.0 +; SI-NEXT: v_fma_f32 v6, v7, v6, v6 +; SI-NEXT: v_mul_f32_e32 v7, v4, v6 +; SI-NEXT: v_fma_f32 v8, -v5, v7, v4 +; SI-NEXT: v_fma_f32 v7, v8, v6, v7 +; SI-NEXT: v_fma_f32 v4, -v5, v7, v4 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v4, v4, v6, v7 +; SI-NEXT: v_div_fixup_f32 v4, v4, v3, v1 +; SI-NEXT: v_trunc_f32_e32 v4, v4 +; SI-NEXT: v_mad_f32 v1, -v4, v3, v1 +; SI-NEXT: v_div_scale_f32 v3, vcc, v0, v2, v0 +; SI-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 +; SI-NEXT: v_rcp_f32_e32 v5, v4 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v6, -v4, v5, 1.0 +; SI-NEXT: v_fma_f32 v5, v6, v5, v5 +; SI-NEXT: v_mul_f32_e32 v6, v3, v5 +; SI-NEXT: v_fma_f32 v7, -v4, v6, v3 +; SI-NEXT: v_fma_f32 v6, v7, v5, v6 +; SI-NEXT: v_fma_f32 v3, -v4, v6, v3 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v3, v3, v5, v6 +; SI-NEXT: v_div_fixup_f32 v3, v3, v2, v0 +; SI-NEXT: v_trunc_f32_e32 v3, v3 +; SI-NEXT: v_mad_f32 v0, -v3, v2, v0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: frem_v2f32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: s_mov_b32 s10, s2 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s0, s4 +; CI-NEXT: s_mov_b32 s1, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s2 +; CI-NEXT: s_mov_b32 s7, s3 +; CI-NEXT: s_mov_b32 s11, s3 +; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[8:11], 0 offset:32 +; CI-NEXT: s_mov_b32 s6, 3 +; CI-NEXT: s_mov_b32 s7, 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_div_scale_f32 v5, s[4:5], v3, v3, v1 +; CI-NEXT: v_div_scale_f32 v4, vcc, v1, v3, v1 +; CI-NEXT: v_rcp_f32_e32 v6, v5 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v7, -v5, v6, 1.0 +; CI-NEXT: v_fma_f32 v6, v7, v6, v6 +; CI-NEXT: v_mul_f32_e32 v7, v4, v6 +; CI-NEXT: v_fma_f32 v8, -v5, v7, v4 +; CI-NEXT: v_fma_f32 v7, v8, v6, v7 +; CI-NEXT: v_fma_f32 v4, -v5, v7, v4 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v4, v4, v6, v7 +; CI-NEXT: v_div_fixup_f32 v4, v4, v3, v1 +; CI-NEXT: v_trunc_f32_e32 v4, v4 +; CI-NEXT: v_mad_f32 v1, -v4, v3, v1 +; CI-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 +; CI-NEXT: v_div_scale_f32 v3, vcc, v0, v2, v0 +; CI-NEXT: v_rcp_f32_e32 v5, v4 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v6, -v4, v5, 1.0 +; CI-NEXT: v_fma_f32 v5, v6, v5, v5 +; CI-NEXT: v_mul_f32_e32 v6, v3, v5 +; CI-NEXT: v_fma_f32 v7, -v4, v6, v3 +; CI-NEXT: v_fma_f32 v6, v7, v5, v6 +; CI-NEXT: v_fma_f32 v3, -v4, v6, v3 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v3, v3, v5, v6 +; CI-NEXT: v_div_fixup_f32 v3, v3, v2, v0 +; CI-NEXT: v_trunc_f32_e32 v3, v3 +; CI-NEXT: v_mad_f32 v0, -v3, v2, v0 +; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: frem_v2f32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s2, 3 +; VI-NEXT: s_mov_b32 s3, 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: s_add_u32 s0, s0, 32 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] +; VI-NEXT: flat_load_dwordx2 v[4:5], v[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_div_scale_f32 v7, s[0:1], v5, v5, v3 +; VI-NEXT: v_div_scale_f32 v6, vcc, v3, v5, v3 +; VI-NEXT: v_rcp_f32_e32 v8, v7 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s2 +; VI-NEXT: v_fma_f32 v9, -v7, v8, 1.0 +; VI-NEXT: v_fma_f32 v8, v9, v8, v8 +; VI-NEXT: v_mul_f32_e32 v9, v6, v8 +; VI-NEXT: v_fma_f32 v10, -v7, v9, v6 +; VI-NEXT: v_fma_f32 v9, v10, v8, v9 +; VI-NEXT: v_fma_f32 v6, -v7, v9, v6 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s3 +; VI-NEXT: v_div_fmas_f32 v6, v6, v8, v9 +; VI-NEXT: v_div_fixup_f32 v6, v6, v5, v3 +; VI-NEXT: v_trunc_f32_e32 v6, v6 +; VI-NEXT: v_mad_f32 v3, -v6, v5, v3 +; VI-NEXT: v_div_scale_f32 v6, s[0:1], v4, v4, v2 +; VI-NEXT: v_div_scale_f32 v5, vcc, v2, v4, v2 +; VI-NEXT: v_rcp_f32_e32 v7, v6 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s2 +; VI-NEXT: v_fma_f32 v8, -v6, v7, 1.0 +; VI-NEXT: v_fma_f32 v7, v8, v7, v7 +; VI-NEXT: v_mul_f32_e32 v8, v5, v7 +; VI-NEXT: v_fma_f32 v9, -v6, v8, v5 +; VI-NEXT: v_fma_f32 v8, v9, v7, v8 +; VI-NEXT: v_fma_f32 v5, -v6, v8, v5 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s3 +; VI-NEXT: v_div_fmas_f32 v5, v5, v7, v8 +; VI-NEXT: v_div_fixup_f32 v5, v5, v4, v2 +; VI-NEXT: v_trunc_f32_e32 v5, v5 +; VI-NEXT: v_mad_f32 v2, -v5, v4, v2 +; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-NEXT: s_endpgm <2 x float> addrspace(1)* %in2) #0 { %gep2 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in2, i32 4 %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1, align 8 @@ -90,6 +595,252 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float } define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1, +; SI-LABEL: frem_v4f32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:64 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_div_scale_f32 v8, vcc, v3, v7, v3 +; SI-NEXT: v_div_scale_f32 v9, s[4:5], v7, v7, v3 +; SI-NEXT: v_rcp_f32_e32 v10, v9 +; SI-NEXT: s_mov_b32 s6, 3 +; SI-NEXT: s_mov_b32 s7, 0 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v11, -v9, v10, 1.0 +; SI-NEXT: v_fma_f32 v10, v11, v10, v10 +; SI-NEXT: v_mul_f32_e32 v11, v8, v10 +; SI-NEXT: v_fma_f32 v12, -v9, v11, v8 +; SI-NEXT: v_fma_f32 v11, v12, v10, v11 +; SI-NEXT: v_fma_f32 v8, -v9, v11, v8 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v8, v8, v10, v11 +; SI-NEXT: v_div_fixup_f32 v8, v8, v7, v3 +; SI-NEXT: v_trunc_f32_e32 v8, v8 +; SI-NEXT: v_mad_f32 v3, -v8, v7, v3 +; SI-NEXT: v_div_scale_f32 v7, vcc, v2, v6, v2 +; SI-NEXT: v_div_scale_f32 v8, s[4:5], v6, v6, v2 +; SI-NEXT: v_rcp_f32_e32 v9, v8 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v10, -v8, v9, 1.0 +; SI-NEXT: v_fma_f32 v9, v10, v9, v9 +; SI-NEXT: v_mul_f32_e32 v10, v7, v9 +; SI-NEXT: v_fma_f32 v11, -v8, v10, v7 +; SI-NEXT: v_fma_f32 v10, v11, v9, v10 +; SI-NEXT: v_fma_f32 v7, -v8, v10, v7 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v7, v7, v9, v10 +; SI-NEXT: v_div_fixup_f32 v7, v7, v6, v2 +; SI-NEXT: v_trunc_f32_e32 v7, v7 +; SI-NEXT: v_mad_f32 v2, -v7, v6, v2 +; SI-NEXT: v_div_scale_f32 v6, vcc, v1, v5, v1 +; SI-NEXT: v_div_scale_f32 v7, s[4:5], v5, v5, v1 +; SI-NEXT: v_rcp_f32_e32 v8, v7 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v9, -v7, v8, 1.0 +; SI-NEXT: v_fma_f32 v8, v9, v8, v8 +; SI-NEXT: v_mul_f32_e32 v9, v6, v8 +; SI-NEXT: v_fma_f32 v10, -v7, v9, v6 +; SI-NEXT: v_fma_f32 v9, v10, v8, v9 +; SI-NEXT: v_fma_f32 v6, -v7, v9, v6 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v6, v6, v8, v9 +; SI-NEXT: v_div_fixup_f32 v6, v6, v5, v1 +; SI-NEXT: v_trunc_f32_e32 v6, v6 +; SI-NEXT: v_mad_f32 v1, -v6, v5, v1 +; SI-NEXT: v_div_scale_f32 v5, vcc, v0, v4, v0 +; SI-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, v0 +; SI-NEXT: v_rcp_f32_e32 v7, v6 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v8, -v6, v7, 1.0 +; SI-NEXT: v_fma_f32 v7, v8, v7, v7 +; SI-NEXT: v_mul_f32_e32 v8, v5, v7 +; SI-NEXT: v_fma_f32 v9, -v6, v8, v5 +; SI-NEXT: v_fma_f32 v8, v9, v7, v8 +; SI-NEXT: v_fma_f32 v5, -v6, v8, v5 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v5, v5, v7, v8 +; SI-NEXT: v_div_fixup_f32 v5, v5, v4, v0 +; SI-NEXT: v_trunc_f32_e32 v5, v5 +; SI-NEXT: v_mad_f32 v0, -v5, v4, v0 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: frem_v4f32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: s_mov_b32 s10, s2 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s0, s4 +; CI-NEXT: s_mov_b32 s1, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s2 +; CI-NEXT: s_mov_b32 s7, s3 +; CI-NEXT: s_mov_b32 s11, s3 +; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:64 +; CI-NEXT: s_mov_b32 s6, 3 +; CI-NEXT: s_mov_b32 s7, 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_div_scale_f32 v9, s[4:5], v7, v7, v3 +; CI-NEXT: v_div_scale_f32 v8, vcc, v3, v7, v3 +; CI-NEXT: v_rcp_f32_e32 v10, v9 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v11, -v9, v10, 1.0 +; CI-NEXT: v_fma_f32 v10, v11, v10, v10 +; CI-NEXT: v_mul_f32_e32 v11, v8, v10 +; CI-NEXT: v_fma_f32 v12, -v9, v11, v8 +; CI-NEXT: v_fma_f32 v11, v12, v10, v11 +; CI-NEXT: v_fma_f32 v8, -v9, v11, v8 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v8, v8, v10, v11 +; CI-NEXT: v_div_fixup_f32 v8, v8, v7, v3 +; CI-NEXT: v_trunc_f32_e32 v8, v8 +; CI-NEXT: v_mad_f32 v3, -v8, v7, v3 +; CI-NEXT: v_div_scale_f32 v8, s[4:5], v6, v6, v2 +; CI-NEXT: v_div_scale_f32 v7, vcc, v2, v6, v2 +; CI-NEXT: v_rcp_f32_e32 v9, v8 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v10, -v8, v9, 1.0 +; CI-NEXT: v_fma_f32 v9, v10, v9, v9 +; CI-NEXT: v_mul_f32_e32 v10, v7, v9 +; CI-NEXT: v_fma_f32 v11, -v8, v10, v7 +; CI-NEXT: v_fma_f32 v10, v11, v9, v10 +; CI-NEXT: v_fma_f32 v7, -v8, v10, v7 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v7, v7, v9, v10 +; CI-NEXT: v_div_fixup_f32 v7, v7, v6, v2 +; CI-NEXT: v_trunc_f32_e32 v7, v7 +; CI-NEXT: v_mad_f32 v2, -v7, v6, v2 +; CI-NEXT: v_div_scale_f32 v7, s[4:5], v5, v5, v1 +; CI-NEXT: v_div_scale_f32 v6, vcc, v1, v5, v1 +; CI-NEXT: v_rcp_f32_e32 v8, v7 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v9, -v7, v8, 1.0 +; CI-NEXT: v_fma_f32 v8, v9, v8, v8 +; CI-NEXT: v_mul_f32_e32 v9, v6, v8 +; CI-NEXT: v_fma_f32 v10, -v7, v9, v6 +; CI-NEXT: v_fma_f32 v9, v10, v8, v9 +; CI-NEXT: v_fma_f32 v6, -v7, v9, v6 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v6, v6, v8, v9 +; CI-NEXT: v_div_fixup_f32 v6, v6, v5, v1 +; CI-NEXT: v_trunc_f32_e32 v6, v6 +; CI-NEXT: v_mad_f32 v1, -v6, v5, v1 +; CI-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, v0 +; CI-NEXT: v_div_scale_f32 v5, vcc, v0, v4, v0 +; CI-NEXT: v_rcp_f32_e32 v7, v6 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v8, -v6, v7, 1.0 +; CI-NEXT: v_fma_f32 v7, v8, v7, v7 +; CI-NEXT: v_mul_f32_e32 v8, v5, v7 +; CI-NEXT: v_fma_f32 v9, -v6, v8, v5 +; CI-NEXT: v_fma_f32 v8, v9, v7, v8 +; CI-NEXT: v_fma_f32 v5, -v6, v8, v5 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v5, v5, v7, v8 +; CI-NEXT: v_div_fixup_f32 v5, v5, v4, v0 +; CI-NEXT: v_trunc_f32_e32 v5, v5 +; CI-NEXT: v_mad_f32 v0, -v5, v4, v0 +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: frem_v4f32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s2, 3 +; VI-NEXT: s_mov_b32 s3, 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s6 +; VI-NEXT: s_add_u32 s0, s0, 64 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: v_mov_b32_e32 v1, s7 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; VI-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; VI-NEXT: v_mov_b32_e32 v8, s4 +; VI-NEXT: v_mov_b32_e32 v9, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_div_scale_f32 v11, s[0:1], v7, v7, v3 +; VI-NEXT: v_div_scale_f32 v10, vcc, v3, v7, v3 +; VI-NEXT: v_rcp_f32_e32 v12, v11 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s2 +; VI-NEXT: v_fma_f32 v13, -v11, v12, 1.0 +; VI-NEXT: v_fma_f32 v12, v13, v12, v12 +; VI-NEXT: v_mul_f32_e32 v13, v10, v12 +; VI-NEXT: v_fma_f32 v14, -v11, v13, v10 +; VI-NEXT: v_fma_f32 v13, v14, v12, v13 +; VI-NEXT: v_fma_f32 v10, -v11, v13, v10 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s3 +; VI-NEXT: v_div_fmas_f32 v10, v10, v12, v13 +; VI-NEXT: v_div_fixup_f32 v10, v10, v7, v3 +; VI-NEXT: v_trunc_f32_e32 v10, v10 +; VI-NEXT: v_mad_f32 v3, -v10, v7, v3 +; VI-NEXT: v_div_scale_f32 v10, s[0:1], v6, v6, v2 +; VI-NEXT: v_div_scale_f32 v7, vcc, v2, v6, v2 +; VI-NEXT: v_rcp_f32_e32 v11, v10 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s2 +; VI-NEXT: v_fma_f32 v12, -v10, v11, 1.0 +; VI-NEXT: v_fma_f32 v11, v12, v11, v11 +; VI-NEXT: v_mul_f32_e32 v12, v7, v11 +; VI-NEXT: v_fma_f32 v13, -v10, v12, v7 +; VI-NEXT: v_fma_f32 v12, v13, v11, v12 +; VI-NEXT: v_fma_f32 v7, -v10, v12, v7 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s3 +; VI-NEXT: v_div_fmas_f32 v7, v7, v11, v12 +; VI-NEXT: v_div_fixup_f32 v7, v7, v6, v2 +; VI-NEXT: v_trunc_f32_e32 v7, v7 +; VI-NEXT: v_mad_f32 v2, -v7, v6, v2 +; VI-NEXT: v_div_scale_f32 v7, s[0:1], v5, v5, v1 +; VI-NEXT: v_div_scale_f32 v6, vcc, v1, v5, v1 +; VI-NEXT: v_rcp_f32_e32 v10, v7 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s2 +; VI-NEXT: v_fma_f32 v11, -v7, v10, 1.0 +; VI-NEXT: v_fma_f32 v10, v11, v10, v10 +; VI-NEXT: v_mul_f32_e32 v11, v6, v10 +; VI-NEXT: v_fma_f32 v12, -v7, v11, v6 +; VI-NEXT: v_fma_f32 v11, v12, v10, v11 +; VI-NEXT: v_fma_f32 v6, -v7, v11, v6 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s3 +; VI-NEXT: v_div_fmas_f32 v6, v6, v10, v11 +; VI-NEXT: v_div_fixup_f32 v6, v6, v5, v1 +; VI-NEXT: v_trunc_f32_e32 v6, v6 +; VI-NEXT: v_mad_f32 v1, -v6, v5, v1 +; VI-NEXT: v_div_scale_f32 v6, s[0:1], v4, v4, v0 +; VI-NEXT: v_div_scale_f32 v5, vcc, v0, v4, v0 +; VI-NEXT: v_rcp_f32_e32 v7, v6 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s2 +; VI-NEXT: v_fma_f32 v10, -v6, v7, 1.0 +; VI-NEXT: v_fma_f32 v7, v10, v7, v7 +; VI-NEXT: v_mul_f32_e32 v10, v5, v7 +; VI-NEXT: v_fma_f32 v11, -v6, v10, v5 +; VI-NEXT: v_fma_f32 v10, v11, v7, v10 +; VI-NEXT: v_fma_f32 v5, -v6, v10, v5 +; VI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s3 +; VI-NEXT: v_div_fmas_f32 v5, v5, v7, v10 +; VI-NEXT: v_div_fixup_f32 v5, v5, v4, v0 +; VI-NEXT: v_trunc_f32_e32 v5, v5 +; VI-NEXT: v_mad_f32 v0, -v5, v4, v0 +; VI-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; VI-NEXT: s_endpgm <4 x float> addrspace(1)* %in2) #0 { %gep2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %in2, i32 4 %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1, align 16 @@ -100,6 +851,193 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float } define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, +; SI-LABEL: frem_v2f64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, s8 +; SI-NEXT: s_mov_b32 s5, s9 +; SI-NEXT: s_mov_b32 s0, s10 +; SI-NEXT: s_mov_b32 s1, s11 +; SI-NEXT: s_mov_b32 s2, s6 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: s_mov_b32 s14, s6 +; SI-NEXT: s_mov_b32 s15, s7 +; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[12:15], 0 offset:64 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_div_scale_f64 v[8:9], s[0:1], v[6:7], v[6:7], v[2:3] +; SI-NEXT: v_rcp_f64_e32 v[10:11], v[8:9] +; SI-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 +; SI-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] +; SI-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 +; SI-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] +; SI-NEXT: v_div_scale_f64 v[12:13], s[0:1], v[2:3], v[6:7], v[2:3] +; SI-NEXT: v_mul_f64 v[14:15], v[12:13], v[10:11] +; SI-NEXT: v_fma_f64 v[16:17], -v[8:9], v[14:15], v[12:13] +; SI-NEXT: v_cmp_eq_u32_e32 vcc, v7, v9 +; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v3, v13 +; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc +; SI-NEXT: s_nop 0 +; SI-NEXT: s_nop 0 +; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15] +; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3] +; SI-NEXT: v_bfe_u32 v10, v9, 20, 11 +; SI-NEXT: s_movk_i32 s8, 0xfc01 +; SI-NEXT: v_add_i32_e32 v12, vcc, s8, v10 +; SI-NEXT: s_mov_b32 s3, 0xfffff +; SI-NEXT: v_lshr_b64 v[10:11], s[2:3], v12 +; SI-NEXT: v_not_b32_e32 v10, v10 +; SI-NEXT: v_and_b32_e32 v10, v8, v10 +; SI-NEXT: v_not_b32_e32 v11, v11 +; SI-NEXT: v_and_b32_e32 v11, v9, v11 +; SI-NEXT: s_brev_b32 s9, 1 +; SI-NEXT: v_and_b32_e32 v13, s9, v9 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v12 +; SI-NEXT: v_cndmask_b32_e32 v11, v11, v13, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v12 +; SI-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[0:1] +; SI-NEXT: v_mul_f64 v[6:7], v[8:9], v[6:7] +; SI-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] +; SI-NEXT: v_div_scale_f64 v[6:7], s[0:1], v[4:5], v[4:5], v[0:1] +; SI-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] +; SI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 +; SI-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] +; SI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 +; SI-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] +; SI-NEXT: v_div_scale_f64 v[10:11], s[0:1], v[0:1], v[4:5], v[0:1] +; SI-NEXT: v_mul_f64 v[12:13], v[10:11], v[8:9] +; SI-NEXT: v_fma_f64 v[14:15], -v[6:7], v[12:13], v[10:11] +; SI-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 +; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v11 +; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc +; SI-NEXT: s_nop 0 +; SI-NEXT: s_nop 0 +; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13] +; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1] +; SI-NEXT: v_bfe_u32 v8, v7, 20, 11 +; SI-NEXT: v_add_i32_e32 v10, vcc, s8, v8 +; SI-NEXT: v_lshr_b64 v[8:9], s[2:3], v10 +; SI-NEXT: v_not_b32_e32 v8, v8 +; SI-NEXT: v_and_b32_e32 v8, v6, v8 +; SI-NEXT: v_not_b32_e32 v9, v9 +; SI-NEXT: v_and_b32_e32 v9, v7, v9 +; SI-NEXT: v_and_b32_e32 v11, s9, v7 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v10 +; SI-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v10 +; SI-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[0:1] +; SI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] +; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: frem_v2f64: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: s_mov_b32 s10, s2 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s0, s4 +; CI-NEXT: s_mov_b32 s1, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s2 +; CI-NEXT: s_mov_b32 s7, s3 +; CI-NEXT: s_mov_b32 s11, s3 +; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:64 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[6:7], v[6:7], v[2:3] +; CI-NEXT: v_rcp_f64_e32 v[10:11], v[8:9] +; CI-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 +; CI-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] +; CI-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 +; CI-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] +; CI-NEXT: v_div_scale_f64 v[12:13], vcc, v[2:3], v[6:7], v[2:3] +; CI-NEXT: v_mul_f64 v[14:15], v[12:13], v[10:11] +; CI-NEXT: v_fma_f64 v[8:9], -v[8:9], v[14:15], v[12:13] +; CI-NEXT: s_nop 1 +; CI-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[10:11], v[14:15] +; CI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3] +; CI-NEXT: v_trunc_f64_e32 v[8:9], v[8:9] +; CI-NEXT: v_mul_f64 v[6:7], v[8:9], v[6:7] +; CI-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] +; CI-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[4:5], v[4:5], v[0:1] +; CI-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] +; CI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 +; CI-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] +; CI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 +; CI-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] +; CI-NEXT: v_div_scale_f64 v[10:11], vcc, v[0:1], v[4:5], v[0:1] +; CI-NEXT: v_mul_f64 v[12:13], v[10:11], v[8:9] +; CI-NEXT: v_fma_f64 v[6:7], -v[6:7], v[12:13], v[10:11] +; CI-NEXT: s_nop 1 +; CI-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13] +; CI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1] +; CI-NEXT: v_trunc_f64_e32 v[6:7], v[6:7] +; CI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] +; CI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] +; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: frem_v2f64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s6 +; VI-NEXT: s_add_u32 s0, s0, 64 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: v_mov_b32_e32 v1, s7 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] +; VI-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; VI-NEXT: v_mov_b32_e32 v8, s4 +; VI-NEXT: v_mov_b32_e32 v9, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_div_scale_f64 v[10:11], s[0:1], v[6:7], v[6:7], v[2:3] +; VI-NEXT: v_rcp_f64_e32 v[12:13], v[10:11] +; VI-NEXT: v_fma_f64 v[14:15], -v[10:11], v[12:13], 1.0 +; VI-NEXT: v_fma_f64 v[12:13], v[12:13], v[14:15], v[12:13] +; VI-NEXT: v_fma_f64 v[14:15], -v[10:11], v[12:13], 1.0 +; VI-NEXT: v_fma_f64 v[12:13], v[12:13], v[14:15], v[12:13] +; VI-NEXT: v_div_scale_f64 v[14:15], vcc, v[2:3], v[6:7], v[2:3] +; VI-NEXT: v_mul_f64 v[16:17], v[14:15], v[12:13] +; VI-NEXT: v_fma_f64 v[10:11], -v[10:11], v[16:17], v[14:15] +; VI-NEXT: s_nop 1 +; VI-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[12:13], v[16:17] +; VI-NEXT: v_div_fixup_f64 v[10:11], v[10:11], v[6:7], v[2:3] +; VI-NEXT: v_trunc_f64_e32 v[10:11], v[10:11] +; VI-NEXT: v_mul_f64 v[6:7], v[10:11], v[6:7] +; VI-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] +; VI-NEXT: v_div_scale_f64 v[6:7], s[0:1], v[4:5], v[4:5], v[0:1] +; VI-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] +; VI-NEXT: v_fma_f64 v[12:13], -v[6:7], v[10:11], 1.0 +; VI-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] +; VI-NEXT: v_fma_f64 v[12:13], -v[6:7], v[10:11], 1.0 +; VI-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] +; VI-NEXT: v_div_scale_f64 v[12:13], vcc, v[0:1], v[4:5], v[0:1] +; VI-NEXT: v_mul_f64 v[14:15], v[12:13], v[10:11] +; VI-NEXT: v_fma_f64 v[6:7], -v[6:7], v[14:15], v[12:13] +; VI-NEXT: s_nop 1 +; VI-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[14:15] +; VI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1] +; VI-NEXT: v_trunc_f64_e32 v[6:7], v[6:7] +; VI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] +; VI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] +; VI-NEXT: flat_store_dwordx4 v[8:9], v[0:3] +; VI-NEXT: s_endpgm <2 x double> addrspace(1)* %in2) #0 { %gep2 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in2, i32 4 %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1, align 16 From 8ec8ad868d9b970245e827b14306fbd11d11a9b2 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 24 Jul 2020 11:41:57 +0100 Subject: [PATCH 320/600] [AMDGPU] Use fma for lowering frem This gives shorter f64 code and perhaps better accuracy. Differential Revision: https://reviews.llvm.org/D84516 --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 10 +-- llvm/test/CodeGen/AMDGPU/frem.ll | 84 ++++++++----------- 2 files changed, 41 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 1f5d83d379495..a697df5553b73 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2079,7 +2079,7 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, return DAG.getMergeValues(Res, DL); } -// (frem x, y) -> (fsub x, (fmul (ftrunc (fdiv x, y)), y)) +// (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x) SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); EVT VT = Op.getValueType(); @@ -2089,10 +2089,10 @@ SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { // TODO: Should this propagate fast-math-flags? SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y); - SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y); - - return DAG.getNode(ISD::FSUB, SL, VT, X, Mul); + SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div); + SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc); + // TODO: For f32 use FMAD instead if !hasFastFMA32? + return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X); } SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index aef979f7d618d..0414384dabe49 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -36,7 +36,7 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1) ; SI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; SI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 ; SI-NEXT: v_trunc_f32_e32 v2, v2 -; SI-NEXT: v_mad_f32 v0, -v2, v1, v0 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -72,7 +72,7 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1) ; CI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 ; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 ; CI-NEXT: v_trunc_f32_e32 v2, v2 -; CI-NEXT: v_mad_f32 v0, -v2, v1, v0 +; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 ; CI-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; CI-NEXT: s_endpgm ; @@ -106,7 +106,7 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1) ; VI-NEXT: v_div_fmas_f32 v3, v3, v6, v7 ; VI-NEXT: v_div_fixup_f32 v3, v3, v2, v4 ; VI-NEXT: v_trunc_f32_e32 v3, v3 -; VI-NEXT: v_mad_f32 v2, -v3, v2, v4 +; VI-NEXT: v_fma_f32 v2, -v3, v2, v4 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm float addrspace(1)* %in2) #0 { @@ -140,7 +140,7 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs ; SI-NEXT: v_rcp_f32_e32 v2, v1 ; SI-NEXT: v_mul_f32_e32 v2, v0, v2 ; SI-NEXT: v_trunc_f32_e32 v2, v2 -; SI-NEXT: v_mad_f32 v0, -v2, v1, v0 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -165,7 +165,7 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs ; CI-NEXT: v_rcp_f32_e32 v2, v1 ; CI-NEXT: v_mul_f32_e32 v2, v0, v2 ; CI-NEXT: v_trunc_f32_e32 v2, v2 -; CI-NEXT: v_mad_f32 v0, -v2, v1, v0 +; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 ; CI-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; CI-NEXT: s_endpgm ; @@ -188,7 +188,7 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs ; VI-NEXT: v_rcp_f32_e32 v3, v2 ; VI-NEXT: v_mul_f32_e32 v3, v4, v3 ; VI-NEXT: v_trunc_f32_e32 v3, v3 -; VI-NEXT: v_mad_f32 v2, -v3, v2, v4 +; VI-NEXT: v_fma_f32 v2, -v3, v2, v4 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm float addrspace(1)* %in2) #1 { @@ -251,8 +251,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( ; SI-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc ; SI-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[0:1] -; SI-NEXT: v_mul_f64 v[2:3], v[4:5], v[2:3] -; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -287,8 +286,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( ; CI-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11] ; CI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1] ; CI-NEXT: v_trunc_f64_e32 v[4:5], v[4:5] -; CI-NEXT: v_mul_f64 v[2:3], v[4:5], v[2:3] -; CI-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; CI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; CI-NEXT: s_endpgm ; @@ -319,8 +317,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( ; VI-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13] ; VI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[2:3] ; VI-NEXT: v_trunc_f64_e32 v[6:7], v[6:7] -; VI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] -; VI-NEXT: v_add_f64 v[2:3], v[2:3], -v[4:5] +; VI-NEXT: v_fma_f64 v[2:3], -v[6:7], v[4:5], v[2:3] ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm double addrspace(1)* %in2) #0 { @@ -368,8 +365,7 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add ; SI-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc ; SI-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[0:1] -; SI-NEXT: v_mul_f64 v[2:3], v[4:5], v[2:3] -; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[12:15], 0 ; SI-NEXT: s_endpgm ; @@ -394,8 +390,7 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add ; CI-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; CI-NEXT: v_mul_f64 v[4:5], v[0:1], v[4:5] ; CI-NEXT: v_trunc_f64_e32 v[4:5], v[4:5] -; CI-NEXT: v_mul_f64 v[2:3], v[4:5], v[2:3] -; CI-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] +; CI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 ; CI-NEXT: s_endpgm ; @@ -416,8 +411,7 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add ; VI-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; VI-NEXT: v_mul_f64 v[6:7], v[2:3], v[6:7] ; VI-NEXT: v_trunc_f64_e32 v[6:7], v[6:7] -; VI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] -; VI-NEXT: v_add_f64 v[2:3], v[2:3], -v[4:5] +; VI-NEXT: v_fma_f64 v[2:3], -v[6:7], v[4:5], v[2:3] ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm double addrspace(1)* %in2) #1 { @@ -463,7 +457,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float ; SI-NEXT: v_div_fmas_f32 v4, v4, v6, v7 ; SI-NEXT: v_div_fixup_f32 v4, v4, v3, v1 ; SI-NEXT: v_trunc_f32_e32 v4, v4 -; SI-NEXT: v_mad_f32 v1, -v4, v3, v1 +; SI-NEXT: v_fma_f32 v1, -v4, v3, v1 ; SI-NEXT: v_div_scale_f32 v3, vcc, v0, v2, v0 ; SI-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 ; SI-NEXT: v_rcp_f32_e32 v5, v4 @@ -478,7 +472,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float ; SI-NEXT: v_div_fmas_f32 v3, v3, v5, v6 ; SI-NEXT: v_div_fixup_f32 v3, v3, v2, v0 ; SI-NEXT: v_trunc_f32_e32 v3, v3 -; SI-NEXT: v_mad_f32 v0, -v3, v2, v0 +; SI-NEXT: v_fma_f32 v0, -v3, v2, v0 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -516,7 +510,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float ; CI-NEXT: v_div_fmas_f32 v4, v4, v6, v7 ; CI-NEXT: v_div_fixup_f32 v4, v4, v3, v1 ; CI-NEXT: v_trunc_f32_e32 v4, v4 -; CI-NEXT: v_mad_f32 v1, -v4, v3, v1 +; CI-NEXT: v_fma_f32 v1, -v4, v3, v1 ; CI-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 ; CI-NEXT: v_div_scale_f32 v3, vcc, v0, v2, v0 ; CI-NEXT: v_rcp_f32_e32 v5, v4 @@ -531,7 +525,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float ; CI-NEXT: v_div_fmas_f32 v3, v3, v5, v6 ; CI-NEXT: v_div_fixup_f32 v3, v3, v2, v0 ; CI-NEXT: v_trunc_f32_e32 v3, v3 -; CI-NEXT: v_mad_f32 v0, -v3, v2, v0 +; CI-NEXT: v_fma_f32 v0, -v3, v2, v0 ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; CI-NEXT: s_endpgm ; @@ -567,7 +561,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float ; VI-NEXT: v_div_fmas_f32 v6, v6, v8, v9 ; VI-NEXT: v_div_fixup_f32 v6, v6, v5, v3 ; VI-NEXT: v_trunc_f32_e32 v6, v6 -; VI-NEXT: v_mad_f32 v3, -v6, v5, v3 +; VI-NEXT: v_fma_f32 v3, -v6, v5, v3 ; VI-NEXT: v_div_scale_f32 v6, s[0:1], v4, v4, v2 ; VI-NEXT: v_div_scale_f32 v5, vcc, v2, v4, v2 ; VI-NEXT: v_rcp_f32_e32 v7, v6 @@ -582,7 +576,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float ; VI-NEXT: v_div_fmas_f32 v5, v5, v7, v8 ; VI-NEXT: v_div_fixup_f32 v5, v5, v4, v2 ; VI-NEXT: v_trunc_f32_e32 v5, v5 -; VI-NEXT: v_mad_f32 v2, -v5, v4, v2 +; VI-NEXT: v_fma_f32 v2, -v5, v4, v2 ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm <2 x float> addrspace(1)* %in2) #0 { @@ -629,7 +623,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; SI-NEXT: v_div_fmas_f32 v8, v8, v10, v11 ; SI-NEXT: v_div_fixup_f32 v8, v8, v7, v3 ; SI-NEXT: v_trunc_f32_e32 v8, v8 -; SI-NEXT: v_mad_f32 v3, -v8, v7, v3 +; SI-NEXT: v_fma_f32 v3, -v8, v7, v3 ; SI-NEXT: v_div_scale_f32 v7, vcc, v2, v6, v2 ; SI-NEXT: v_div_scale_f32 v8, s[4:5], v6, v6, v2 ; SI-NEXT: v_rcp_f32_e32 v9, v8 @@ -644,7 +638,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; SI-NEXT: v_div_fmas_f32 v7, v7, v9, v10 ; SI-NEXT: v_div_fixup_f32 v7, v7, v6, v2 ; SI-NEXT: v_trunc_f32_e32 v7, v7 -; SI-NEXT: v_mad_f32 v2, -v7, v6, v2 +; SI-NEXT: v_fma_f32 v2, -v7, v6, v2 ; SI-NEXT: v_div_scale_f32 v6, vcc, v1, v5, v1 ; SI-NEXT: v_div_scale_f32 v7, s[4:5], v5, v5, v1 ; SI-NEXT: v_rcp_f32_e32 v8, v7 @@ -659,7 +653,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; SI-NEXT: v_div_fmas_f32 v6, v6, v8, v9 ; SI-NEXT: v_div_fixup_f32 v6, v6, v5, v1 ; SI-NEXT: v_trunc_f32_e32 v6, v6 -; SI-NEXT: v_mad_f32 v1, -v6, v5, v1 +; SI-NEXT: v_fma_f32 v1, -v6, v5, v1 ; SI-NEXT: v_div_scale_f32 v5, vcc, v0, v4, v0 ; SI-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, v0 ; SI-NEXT: v_rcp_f32_e32 v7, v6 @@ -674,7 +668,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; SI-NEXT: v_div_fmas_f32 v5, v5, v7, v8 ; SI-NEXT: v_div_fixup_f32 v5, v5, v4, v0 ; SI-NEXT: v_trunc_f32_e32 v5, v5 -; SI-NEXT: v_mad_f32 v0, -v5, v4, v0 +; SI-NEXT: v_fma_f32 v0, -v5, v4, v0 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -712,7 +706,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; CI-NEXT: v_div_fmas_f32 v8, v8, v10, v11 ; CI-NEXT: v_div_fixup_f32 v8, v8, v7, v3 ; CI-NEXT: v_trunc_f32_e32 v8, v8 -; CI-NEXT: v_mad_f32 v3, -v8, v7, v3 +; CI-NEXT: v_fma_f32 v3, -v8, v7, v3 ; CI-NEXT: v_div_scale_f32 v8, s[4:5], v6, v6, v2 ; CI-NEXT: v_div_scale_f32 v7, vcc, v2, v6, v2 ; CI-NEXT: v_rcp_f32_e32 v9, v8 @@ -727,7 +721,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; CI-NEXT: v_div_fmas_f32 v7, v7, v9, v10 ; CI-NEXT: v_div_fixup_f32 v7, v7, v6, v2 ; CI-NEXT: v_trunc_f32_e32 v7, v7 -; CI-NEXT: v_mad_f32 v2, -v7, v6, v2 +; CI-NEXT: v_fma_f32 v2, -v7, v6, v2 ; CI-NEXT: v_div_scale_f32 v7, s[4:5], v5, v5, v1 ; CI-NEXT: v_div_scale_f32 v6, vcc, v1, v5, v1 ; CI-NEXT: v_rcp_f32_e32 v8, v7 @@ -742,7 +736,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; CI-NEXT: v_div_fmas_f32 v6, v6, v8, v9 ; CI-NEXT: v_div_fixup_f32 v6, v6, v5, v1 ; CI-NEXT: v_trunc_f32_e32 v6, v6 -; CI-NEXT: v_mad_f32 v1, -v6, v5, v1 +; CI-NEXT: v_fma_f32 v1, -v6, v5, v1 ; CI-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, v0 ; CI-NEXT: v_div_scale_f32 v5, vcc, v0, v4, v0 ; CI-NEXT: v_rcp_f32_e32 v7, v6 @@ -757,7 +751,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; CI-NEXT: v_div_fmas_f32 v5, v5, v7, v8 ; CI-NEXT: v_div_fixup_f32 v5, v5, v4, v0 ; CI-NEXT: v_trunc_f32_e32 v5, v5 -; CI-NEXT: v_mad_f32 v0, -v5, v4, v0 +; CI-NEXT: v_fma_f32 v0, -v5, v4, v0 ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_endpgm ; @@ -793,7 +787,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; VI-NEXT: v_div_fmas_f32 v10, v10, v12, v13 ; VI-NEXT: v_div_fixup_f32 v10, v10, v7, v3 ; VI-NEXT: v_trunc_f32_e32 v10, v10 -; VI-NEXT: v_mad_f32 v3, -v10, v7, v3 +; VI-NEXT: v_fma_f32 v3, -v10, v7, v3 ; VI-NEXT: v_div_scale_f32 v10, s[0:1], v6, v6, v2 ; VI-NEXT: v_div_scale_f32 v7, vcc, v2, v6, v2 ; VI-NEXT: v_rcp_f32_e32 v11, v10 @@ -808,7 +802,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; VI-NEXT: v_div_fmas_f32 v7, v7, v11, v12 ; VI-NEXT: v_div_fixup_f32 v7, v7, v6, v2 ; VI-NEXT: v_trunc_f32_e32 v7, v7 -; VI-NEXT: v_mad_f32 v2, -v7, v6, v2 +; VI-NEXT: v_fma_f32 v2, -v7, v6, v2 ; VI-NEXT: v_div_scale_f32 v7, s[0:1], v5, v5, v1 ; VI-NEXT: v_div_scale_f32 v6, vcc, v1, v5, v1 ; VI-NEXT: v_rcp_f32_e32 v10, v7 @@ -823,7 +817,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; VI-NEXT: v_div_fmas_f32 v6, v6, v10, v11 ; VI-NEXT: v_div_fixup_f32 v6, v6, v5, v1 ; VI-NEXT: v_trunc_f32_e32 v6, v6 -; VI-NEXT: v_mad_f32 v1, -v6, v5, v1 +; VI-NEXT: v_fma_f32 v1, -v6, v5, v1 ; VI-NEXT: v_div_scale_f32 v6, s[0:1], v4, v4, v0 ; VI-NEXT: v_div_scale_f32 v5, vcc, v0, v4, v0 ; VI-NEXT: v_rcp_f32_e32 v7, v6 @@ -838,7 +832,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; VI-NEXT: v_div_fmas_f32 v5, v5, v7, v10 ; VI-NEXT: v_div_fixup_f32 v5, v5, v4, v0 ; VI-NEXT: v_trunc_f32_e32 v5, v5 -; VI-NEXT: v_mad_f32 v0, -v5, v4, v0 +; VI-NEXT: v_fma_f32 v0, -v5, v4, v0 ; VI-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; VI-NEXT: s_endpgm <4 x float> addrspace(1)* %in2) #0 { @@ -902,8 +896,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; SI-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc ; SI-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[0:1] -; SI-NEXT: v_mul_f64 v[6:7], v[8:9], v[6:7] -; SI-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] +; SI-NEXT: v_fma_f64 v[2:3], -v[8:9], v[6:7], v[2:3] ; SI-NEXT: v_div_scale_f64 v[6:7], s[0:1], v[4:5], v[4:5], v[0:1] ; SI-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] ; SI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 @@ -934,8 +927,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; SI-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[0:1] ; SI-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc ; SI-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[0:1] -; SI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] -; SI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] +; SI-NEXT: v_fma_f64 v[0:1], -v[6:7], v[4:5], v[0:1] ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; @@ -970,8 +962,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; CI-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[10:11], v[14:15] ; CI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3] ; CI-NEXT: v_trunc_f64_e32 v[8:9], v[8:9] -; CI-NEXT: v_mul_f64 v[6:7], v[8:9], v[6:7] -; CI-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] +; CI-NEXT: v_fma_f64 v[2:3], -v[8:9], v[6:7], v[2:3] ; CI-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[4:5], v[4:5], v[0:1] ; CI-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] ; CI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 @@ -985,8 +976,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; CI-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13] ; CI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1] ; CI-NEXT: v_trunc_f64_e32 v[6:7], v[6:7] -; CI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] -; CI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] +; CI-NEXT: v_fma_f64 v[0:1], -v[6:7], v[4:5], v[0:1] ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_endpgm ; @@ -1019,8 +1009,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; VI-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[12:13], v[16:17] ; VI-NEXT: v_div_fixup_f64 v[10:11], v[10:11], v[6:7], v[2:3] ; VI-NEXT: v_trunc_f64_e32 v[10:11], v[10:11] -; VI-NEXT: v_mul_f64 v[6:7], v[10:11], v[6:7] -; VI-NEXT: v_add_f64 v[2:3], v[2:3], -v[6:7] +; VI-NEXT: v_fma_f64 v[2:3], -v[10:11], v[6:7], v[2:3] ; VI-NEXT: v_div_scale_f64 v[6:7], s[0:1], v[4:5], v[4:5], v[0:1] ; VI-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] ; VI-NEXT: v_fma_f64 v[12:13], -v[6:7], v[10:11], 1.0 @@ -1034,8 +1023,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; VI-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[14:15] ; VI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1] ; VI-NEXT: v_trunc_f64_e32 v[6:7], v[6:7] -; VI-NEXT: v_mul_f64 v[4:5], v[6:7], v[4:5] -; VI-NEXT: v_add_f64 v[0:1], v[0:1], -v[4:5] +; VI-NEXT: v_fma_f64 v[0:1], -v[6:7], v[4:5], v[0:1] ; VI-NEXT: flat_store_dwordx4 v[8:9], v[0:3] ; VI-NEXT: s_endpgm <2 x double> addrspace(1)* %in2) #0 { From 817b3a6fe3a4452eb61a2503c8beaa7267ca0351 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Mon, 3 Aug 2020 17:33:37 -0700 Subject: [PATCH 321/600] [test] Use abspath instead of realpath sometimes In these two cases, use of `os.path.realpath` is problematic: - The name of the compiler is significant [1] . For testing purposes, we might provide a compiler called "clang" which is actually a symlink to some build script (which does some flag processing before invoking the real clang). The destination the symlink may not be called "clang", but we still want it to be treated as such. - When using a build system that puts build artifacts in an arbitrary build location, and later creates a symlink for it (e.g. creates a "/lldbsuite/test/dotest.py" symlinks that points to "/build/artifact//dotest.py"), looking at the realpath will not match the "test" convention required here. [1] See `Makefile.rules` in the lldb tree, e.g. we use different flags if the compiler is named "clang" Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D85175 --- lldb/packages/Python/lldbsuite/test/dotest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py index 3fb802f1c1aa5..f43685c069e44 100644 --- a/lldb/packages/Python/lldbsuite/test/dotest.py +++ b/lldb/packages/Python/lldbsuite/test/dotest.py @@ -241,7 +241,7 @@ def parseOptionsAndInitTestdirs(): do_help = True if args.compiler: - configuration.compiler = os.path.realpath(args.compiler) + configuration.compiler = os.path.abspath(args.compiler) if not is_exe(configuration.compiler): configuration.compiler = which(args.compiler) if not is_exe(configuration.compiler): @@ -461,7 +461,7 @@ def setupSysPath(): if "DOTEST_PROFILE" in os.environ and "DOTEST_SCRIPT_DIR" in os.environ: scriptPath = os.environ["DOTEST_SCRIPT_DIR"] else: - scriptPath = os.path.dirname(os.path.realpath(__file__)) + scriptPath = os.path.dirname(os.path.abspath(__file__)) if not scriptPath.endswith('test'): print("This script expects to reside in lldb's test directory.") sys.exit(-1) From 28e322ea9393e6b3841886006dd170ddd810fd9b Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 16 Jul 2020 13:10:12 +0100 Subject: [PATCH 322/600] [PowerPC] Custom lowering for funnel shifts The custom lowering saves an instruction over the generic expansion, by taking advantage of the fact that PowerPC shift instructions are well defined in the shift-by-bitwidth case. Differential Revision: https://reviews.llvm.org/D83948 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 12 +++--- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 2 + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 37 +++++++++++++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 + llvm/test/CodeGen/PowerPC/funnel-shift.ll | 28 ++++++-------- llvm/test/CodeGen/PowerPC/pr44183.ll | 21 +++++++---- 6 files changed, 72 insertions(+), 29 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9c1517ea74140..5e27d2db63aec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6254,12 +6254,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Zero = DAG.getConstant(0, sdl, VT); SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); - auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; - if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { - setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); - return; - } - // When X == Y, this is rotate. If the data type has a power-of-2 size, we // avoid the select that is necessary in the general case to filter out // the 0-shift possibility that leads to UB. @@ -6289,6 +6283,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } + auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; + if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { + setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); + return; + } + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt); diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3e218e14d8d44..51ff0da6c0912 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1253,6 +1253,7 @@ class BitPermutationSelector { } break; case ISD::SHL: + case PPCISD::SHL: if (isa(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); @@ -1268,6 +1269,7 @@ class BitPermutationSelector { } break; case ISD::SRL: + case PPCISD::SRL: if (isa(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ac7f4f9c34f92..0ebc8a99b8ea9 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -617,6 +617,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } + // PowerPC has better expansions for funnel shifts than the generic + // TargetLowering::expandFunnelShift. + if (Subtarget.has64BitSupport()) { + setOperationAction(ISD::FSHL, MVT::i64, Custom); + setOperationAction(ISD::FSHR, MVT::i64, Custom); + } + setOperationAction(ISD::FSHL, MVT::i32, Custom); + setOperationAction(ISD::FSHR, MVT::i32, Custom); + if (Subtarget.hasVSX()) { setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); @@ -8626,6 +8635,31 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(OutOps, dl); } +SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + + bool IsFSHL = Op.getOpcode() == ISD::FSHL; + SDValue X = Op.getOperand(0); + SDValue Y = Op.getOperand(1); + SDValue Z = Op.getOperand(2); + EVT AmtVT = Z.getValueType(); + + // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) + // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) + // This is simpler than TargetLowering::expandFunnelShift because we can rely + // on PowerPC shift by BW being well defined. + Z = DAG.getNode(ISD::AND, dl, AmtVT, Z, + DAG.getConstant(BitWidth - 1, dl, AmtVT)); + SDValue SubZ = + DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z); + X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ); + Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z); + return DAG.getNode(ISD::OR, dl, VT, X, Y); +} + //===----------------------------------------------------------------------===// // Vector related lowering. // @@ -10421,6 +10455,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); + case ISD::FSHL: return LowerFunnelShift(Op, DAG); + case ISD::FSHR: return LowerFunnelShift(Op, DAG); + // Vector-related lowering. case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 80588a1bd4019..7e9915c04b6aa 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1092,6 +1092,7 @@ namespace llvm { SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll index 48a10eda1cf1d..364ab29de3853 100644 --- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll +++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll @@ -18,12 +18,11 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: fshl_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: andi. 5, 5, 31 +; CHECK-NEXT: clrlwi 5, 5, 27 ; CHECK-NEXT: subfic 6, 5, 32 -; CHECK-NEXT: slw 5, 3, 5 +; CHECK-NEXT: slw 3, 3, 5 ; CHECK-NEXT: srw 4, 4, 6 -; CHECK-NEXT: or 4, 5, 4 -; CHECK-NEXT: iseleq 3, 3, 4 +; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: blr %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) ret i32 %f @@ -32,12 +31,11 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: fshl_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: andi. 5, 5, 63 +; CHECK-NEXT: clrlwi 5, 5, 26 ; CHECK-NEXT: subfic 6, 5, 64 -; CHECK-NEXT: sld 5, 3, 5 +; CHECK-NEXT: sld 3, 3, 5 ; CHECK-NEXT: srd 4, 4, 6 -; CHECK-NEXT: or 4, 5, 4 -; CHECK-NEXT: iseleq 3, 3, 4 +; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: blr %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) ret i64 %f @@ -138,12 +136,11 @@ define i8 @fshl_i8_const_fold() { define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: fshr_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: andi. 5, 5, 31 +; CHECK-NEXT: clrlwi 5, 5, 27 ; CHECK-NEXT: subfic 6, 5, 32 -; CHECK-NEXT: srw 5, 4, 5 +; CHECK-NEXT: srw 4, 4, 5 ; CHECK-NEXT: slw 3, 3, 6 -; CHECK-NEXT: or 3, 3, 5 -; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: blr %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) ret i32 %f @@ -152,12 +149,11 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: fshr_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: andi. 5, 5, 63 +; CHECK-NEXT: clrlwi 5, 5, 26 ; CHECK-NEXT: subfic 6, 5, 64 -; CHECK-NEXT: srd 5, 4, 5 +; CHECK-NEXT: srd 4, 4, 5 ; CHECK-NEXT: sld 3, 3, 6 -; CHECK-NEXT: or 3, 3, 5 -; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: blr %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) ret i64 %f diff --git a/llvm/test/CodeGen/PowerPC/pr44183.ll b/llvm/test/CodeGen/PowerPC/pr44183.ll index 6d56cea2402e3..a2cf40521f556 100644 --- a/llvm/test/CodeGen/PowerPC/pr44183.ll +++ b/llvm/test/CodeGen/PowerPC/pr44183.ll @@ -8,14 +8,20 @@ define void @_ZN1m1nEv(%struct.m.2.5.8.11* %this) local_unnamed_addr nounwind al ; CHECK-LABEL: _ZN1m1nEv: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: ld r4, 8(r30) +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: ld r4, 16(r30) +; CHECK-NEXT: ld r5, 8(r30) +; CHECK-NEXT: subfic r29, r3, 64 +; CHECK-NEXT: rldicl r3, r5, 60, 4 +; CHECK-NEXT: sld r4, r4, r29 ; CHECK-NEXT: lwz r5, 36(r30) -; CHECK-NEXT: rldicl r4, r4, 60, 4 -; CHECK-NEXT: rlwinm r3, r4, 31, 0, 0 +; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: rlwinm r3, r3, 31, 0, 0 ; CHECK-NEXT: clrlwi r4, r5, 31 ; CHECK-NEXT: or r4, r4, r3 ; CHECK-NEXT: bl _ZN1llsE1d @@ -23,15 +29,16 @@ define void @_ZN1m1nEv(%struct.m.2.5.8.11* %this) local_unnamed_addr nounwind al ; CHECK-NEXT: ld r3, 16(r30) ; CHECK-NEXT: ld r4, 8(r30) ; CHECK-NEXT: rldicl r4, r4, 60, 4 -; CHECK-NEXT: sldi r3, r3, 60 -; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: sld r3, r3, r29 +; CHECK-NEXT: or r3, r3, r4 ; CHECK-NEXT: sldi r3, r3, 31 ; CHECK-NEXT: clrldi r4, r3, 32 ; CHECK-NEXT: bl _ZN1llsE1d ; CHECK-NEXT: nop -; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr entry: From cc0b670abf93c9c826885ab67125857469a1b8a6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 16:31:13 +0100 Subject: [PATCH 323/600] Fix sphinx "Title underline too short" warning --- llvm/docs/Passes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/Passes.rst b/llvm/docs/Passes.rst index 13317ecc966e7..2ff28eb09e54b 100644 --- a/llvm/docs/Passes.rst +++ b/llvm/docs/Passes.rst @@ -525,7 +525,7 @@ redundant stores. .. _passes-function-attrs: ``-function-attrs``: Deduce function attributes ----------------------------------------------- +----------------------------------------------- A simple interprocedural pass which walks the call-graph, looking for functions which do not access or only read non-local memory, and marking them From 5e0a9dc0ad7704b7c49995101629010f5ff98cd2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 16:35:40 +0100 Subject: [PATCH 324/600] Separate code-block tag with a newline to fix code snippet html output --- clang/include/clang/Basic/AttrDocs.td | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index b9fcf9af323b7..76a075a97ee16 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -1084,6 +1084,7 @@ not made control-dependent on any additional values, e.g., unrolling a loop executed by all work items. Sample usage: + .. code-block:: c void convfunc(void) __attribute__((convergent)); From 14d726acd6041ee8fc595e48ec871b50b40ccc1d Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 4 Aug 2020 10:37:57 -0500 Subject: [PATCH 325/600] [PowerPC] Don't remove single swap between the load and store The swap removal pass looks to remove swaps when a loaded value is swapped, some number of lane-insensitive operations are performed and then the value is swapped again and stored. However, in a situation where we load the value, swap it and then store it without swapping again, the pass erroneously removes the single swap. The reason is that both checks in the same equivalence class: - load feeds a swap - swap feeds a store pass. However, there is no check that the two swaps are actually a single swap. This patch just fixes that. Differential revision: https://reviews.llvm.org/D84785 --- llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 23 +++++++++++++++++++ llvm/test/CodeGen/PowerPC/swaps-le-8.ll | 19 +++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/swaps-le-8.ll diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index c3729da0b07b8..b7ed8ce9f1449 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -689,6 +689,29 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { LLVM_DEBUG(UseMI.dump()); LLVM_DEBUG(dbgs() << "\n"); } + + // It is possible that the load feeds a swap and that swap feeds a + // store. In such a case, the code is actually trying to store a swapped + // vector. We must reject such webs. + if (SwapVector[UseIdx].IsSwap && !SwapVector[UseIdx].IsLoad && + !SwapVector[UseIdx].IsStore) { + Register SwapDefReg = UseMI.getOperand(0).getReg(); + for (MachineInstr &UseOfUseMI : + MRI->use_nodbg_instructions(SwapDefReg)) { + int UseOfUseIdx = SwapMap[&UseOfUseMI]; + if (SwapVector[UseOfUseIdx].IsStore) { + SwapVector[Repr].WebRejected = 1; + LLVM_DEBUG( + dbgs() << format( + "Web %d rejected for load/swap feeding a store\n", Repr)); + LLVM_DEBUG(dbgs() << " def " << EntryIdx << ": "); + LLVM_DEBUG(MI->dump()); + LLVM_DEBUG(dbgs() << " use " << UseIdx << ": "); + LLVM_DEBUG(UseMI.dump()); + LLVM_DEBUG(dbgs() << "\n"); + } + } + } } // Reject webs that contain swapping stores that are fed by something diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-8.ll b/llvm/test/CodeGen/PowerPC/swaps-le-8.ll new file mode 100644 index 0000000000000..81471b8466f9e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/swaps-le-8.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +define dso_local void @test(i64* %Src, i64* nocapture %Tgt) local_unnamed_addr { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvd2x 0, 0, 3 +; CHECK-NEXT: xxswapd 0, 0 +; CHECK-NEXT: stxvd2x 0, 0, 4 +; CHECK-NEXT: blr +entry: + %0 = bitcast i64* %Src to i8* + %1 = tail call <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8* %0) #2 + %2 = bitcast i64* %Tgt to <2 x double>* + store <2 x double> %1, <2 x double>* %2, align 1 + ret void +} + +declare <2 x double> @llvm.ppc.vsx.lxvd2x.be(i8*) #1 From 6d6750696400e7ce988d66a1a00e1d0cb32815f8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 29 Jul 2020 16:54:29 -0700 Subject: [PATCH 326/600] [clang][BPF] support type exist/size and enum exist/value relocations This patch added the following additional compile-once run-everywhere (CO-RE) relocations: - existence/size of typedef, struct/union or enum type - enum value and enum value existence These additional relocations will make CO-RE bpf programs more adaptive for potential kernel internal data structure changes. For existence/size relocations, the following two code patterns are supported: 1. uint32_t __builtin_preserve_type_info(*( *)0, flag); 2. var; uint32_t __builtin_preserve_field_info(var, flag); flag = 0 for existence relocation and flag = 1 for size relocation. For enum value existence and enum value relocations, the following code pattern is supported: uint64_t __builtin_preserve_enum_value(*( *), flag); flag = 0 means existence relocation and flag = 1 for enum value. relocation. In the above can be an enum type or a typedef to enum type. The needs to be an enumerator value from the same enum type. The return type is uint64_t to permit potential 64bit enumerator values. Differential Revision: https://reviews.llvm.org/D83242 --- clang/include/clang/Basic/BuiltinsBPF.def | 6 + .../clang/Basic/DiagnosticSemaKinds.td | 8 + clang/lib/CodeGen/CGBuiltin.cpp | 66 ++++++- clang/lib/Sema/SemaChecking.cpp | 161 ++++++++++++++---- .../builtins-bpf-preserve-field-info-3.c | 41 +++++ .../builtins-bpf-preserve-field-info-4.c | 32 ++++ clang/test/Sema/builtins-bpf.c | 60 ++++++- llvm/include/llvm/IR/IntrinsicsBPF.td | 6 + 8 files changed, 346 insertions(+), 34 deletions(-) create mode 100644 clang/test/CodeGen/builtins-bpf-preserve-field-info-3.c create mode 100644 clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c diff --git a/clang/include/clang/Basic/BuiltinsBPF.def b/clang/include/clang/Basic/BuiltinsBPF.def index 237e9dc8784b4..04b45a52cbe71 100644 --- a/clang/include/clang/Basic/BuiltinsBPF.def +++ b/clang/include/clang/Basic/BuiltinsBPF.def @@ -23,5 +23,11 @@ TARGET_BUILTIN(__builtin_preserve_field_info, "Ui.", "t", "") // Get BTF type id. TARGET_BUILTIN(__builtin_btf_type_id, "Ui.", "t", "") +// Get type information. +TARGET_BUILTIN(__builtin_preserve_type_info, "Ui.", "t", "") + +// Preserve enum value. +TARGET_BUILTIN(__builtin_preserve_enum_value, "Li.", "t", "") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 054b81c4a72b5..7bcff3eb4d8c5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10865,6 +10865,14 @@ def err_preserve_field_info_not_const: Error< "__builtin_preserve_field_info argument %0 not a constant">; def err_btf_type_id_not_const: Error< "__builtin_btf_type_id argument %0 not a constant">; +def err_preserve_type_info_invalid : Error< + "__builtin_preserve_type_info argument %0 invalid">; +def err_preserve_type_info_not_const: Error< + "__builtin_preserve_type_info argument %0 not a constant">; +def err_preserve_enum_value_invalid : Error< + "__builtin_preserve_enum_value argument %0 invalid">; +def err_preserve_enum_value_not_const: Error< + "__builtin_preserve_enum_value argument %0 not a constant">; def err_bit_cast_non_trivially_copyable : Error< "__builtin_bit_cast %select{source|destination}0 type must be trivially copyable">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2ef164b8b65ab..18911184aa414 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10921,9 +10921,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { assert((BuiltinID == BPF::BI__builtin_preserve_field_info || - BuiltinID == BPF::BI__builtin_btf_type_id) && + BuiltinID == BPF::BI__builtin_btf_type_id || + BuiltinID == BPF::BI__builtin_preserve_type_info || + BuiltinID == BPF::BI__builtin_preserve_enum_value) && "unexpected BPF builtin"); + // A sequence number, injected into IR builtin functions, to + // prevent CSE given the only difference of the funciton + // may just be the debuginfo metadata. + static uint32_t BuiltinSeqNum; + switch (BuiltinID) { default: llvm_unreachable("Unexpected BPF builtin"); @@ -11016,6 +11023,63 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } + case BPF::BI__builtin_preserve_type_info: { + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), "using builtin function without -g"); + return nullptr; + } + + const Expr *Arg0 = E->getArg(0); + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( + Arg0->getType(), Arg0->getExprLoc()); + + ConstantInt *Flag = cast(EmitScalarExpr(E->getArg(1))); + Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); + Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); + + llvm::Function *FnPreserveTypeInfo = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {}); + CallInst *Fn = + Builder.CreateCall(FnPreserveTypeInfo, {SeqNumVal, FlagValue}); + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); + return Fn; + } + case BPF::BI__builtin_preserve_enum_value: { + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), "using builtin function without -g"); + return nullptr; + } + + const Expr *Arg0 = E->getArg(0); + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( + Arg0->getType(), Arg0->getExprLoc()); + + // Find enumerator + const auto *UO = cast(Arg0->IgnoreParens()); + const auto *CE = cast(UO->getSubExpr()); + const auto *DR = cast(CE->getSubExpr()); + const auto *Enumerator = cast(DR->getDecl()); + + auto &InitVal = Enumerator->getInitVal(); + std::string InitValStr; + if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX)) + InitValStr = std::to_string(InitVal.getSExtValue()); + else + InitValStr = std::to_string(InitVal.getZExtValue()); + std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr; + Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr); + + ConstantInt *Flag = cast(EmitScalarExpr(E->getArg(1))); + Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); + Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); + + llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {}); + CallInst *Fn = + Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue}); + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); + return Fn; + } } } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index ccdb277dda1a0..7e73c51c7150a 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2557,52 +2557,151 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, return SemaBuiltinConstantArgRange(TheCall, i, l, u + l); } +static bool isValidBPFPreserveFieldInfoArg(Expr *Arg) { + if (Arg->getType()->getAsPlaceholderType()) + return false; + + // The first argument needs to be a record field access. + // If it is an array element access, we delay decision + // to BPF backend to check whether the access is a + // field access or not. + return (Arg->IgnoreParens()->getObjectKind() == OK_BitField || + dyn_cast(Arg->IgnoreParens()) || + dyn_cast(Arg->IgnoreParens())); +} + +static bool isValidBPFPreserveTypeInfoArg(Expr *Arg) { + QualType ArgType = Arg->getType(); + if (ArgType->getAsPlaceholderType()) + return false; + + // for TYPE_EXISTENCE/TYPE_SIZEOF reloc type + // format: + // 1. __builtin_preserve_type_info(*( *)0, flag); + // 2. var; + // __builtin_preserve_type_info(var, flag); + if (!dyn_cast(Arg->IgnoreParens()) && + !dyn_cast(Arg->IgnoreParens())) + return false; + + // Typedef type. + if (ArgType->getAs()) + return true; + + // Record type or Enum type. + const Type *Ty = ArgType->getUnqualifiedDesugaredType(); + if (const auto *RT = Ty->getAs()) { + if (!RT->getDecl()->getDeclName().isEmpty()) + return true; + } else if (const auto *ET = Ty->getAs()) { + if (!ET->getDecl()->getDeclName().isEmpty()) + return true; + } + + return false; +} + +static bool isValidBPFPreserveEnumValueArg(Expr *Arg) { + QualType ArgType = Arg->getType(); + if (ArgType->getAsPlaceholderType()) + return false; + + // for ENUM_VALUE_EXISTENCE/ENUM_VALUE reloc type + // format: + // __builtin_preserve_enum_value(*( *), + // flag); + const auto *UO = dyn_cast(Arg->IgnoreParens()); + if (!UO) + return false; + + const auto *CE = dyn_cast(UO->getSubExpr()); + if (!CE || CE->getCastKind() != CK_IntegralToPointer) + return false; + + // The integer must be from an EnumConstantDecl. + const auto *DR = dyn_cast(CE->getSubExpr()); + if (!DR) + return false; + + const EnumConstantDecl *Enumerator = + dyn_cast(DR->getDecl()); + if (!Enumerator) + return false; + + // The type must be EnumType. + const Type *Ty = ArgType->getUnqualifiedDesugaredType(); + const auto *ET = Ty->getAs(); + if (!ET) + return false; + + // The enum value must be supported. + for (auto *EDI : ET->getDecl()->enumerators()) { + if (EDI == Enumerator) + return true; + } + + return false; +} + bool Sema::CheckBPFBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { assert((BuiltinID == BPF::BI__builtin_preserve_field_info || - BuiltinID == BPF::BI__builtin_btf_type_id) && - "unexpected ARM builtin"); + BuiltinID == BPF::BI__builtin_btf_type_id || + BuiltinID == BPF::BI__builtin_preserve_type_info || + BuiltinID == BPF::BI__builtin_preserve_enum_value) && + "unexpected BPF builtin"); if (checkArgCount(*this, TheCall, 2)) return true; - Expr *Arg; - if (BuiltinID == BPF::BI__builtin_btf_type_id) { - // The second argument needs to be a constant int - Arg = TheCall->getArg(1); - if (!Arg->isIntegerConstantExpr(Context)) { - Diag(Arg->getBeginLoc(), diag::err_btf_type_id_not_const) - << 2 << Arg->getSourceRange(); - return true; - } - - TheCall->setType(Context.UnsignedIntTy); - return false; + // The second argument needs to be a constant int + Expr *Arg = TheCall->getArg(1); + Optional Value = Arg->getIntegerConstantExpr(Context); + diag::kind kind; + if (!Value) { + if (BuiltinID == BPF::BI__builtin_preserve_field_info) + kind = diag::err_preserve_field_info_not_const; + else if (BuiltinID == BPF::BI__builtin_btf_type_id) + kind = diag::err_btf_type_id_not_const; + else if (BuiltinID == BPF::BI__builtin_preserve_type_info) + kind = diag::err_preserve_type_info_not_const; + else + kind = diag::err_preserve_enum_value_not_const; + Diag(Arg->getBeginLoc(), kind) << 2 << Arg->getSourceRange(); + return true; } - // The first argument needs to be a record field access. - // If it is an array element access, we delay decision - // to BPF backend to check whether the access is a - // field access or not. + // The first argument Arg = TheCall->getArg(0); - if (Arg->getType()->getAsPlaceholderType() || - (Arg->IgnoreParens()->getObjectKind() != OK_BitField && - !dyn_cast(Arg->IgnoreParens()) && - !dyn_cast(Arg->IgnoreParens()))) { - Diag(Arg->getBeginLoc(), diag::err_preserve_field_info_not_field) - << 1 << Arg->getSourceRange(); - return true; + bool InvalidArg = false; + bool ReturnUnsignedInt = true; + if (BuiltinID == BPF::BI__builtin_preserve_field_info) { + if (!isValidBPFPreserveFieldInfoArg(Arg)) { + InvalidArg = true; + kind = diag::err_preserve_field_info_not_field; + } + } else if (BuiltinID == BPF::BI__builtin_preserve_type_info) { + if (!isValidBPFPreserveTypeInfoArg(Arg)) { + InvalidArg = true; + kind = diag::err_preserve_type_info_invalid; + } + } else if (BuiltinID == BPF::BI__builtin_preserve_enum_value) { + if (!isValidBPFPreserveEnumValueArg(Arg)) { + InvalidArg = true; + kind = diag::err_preserve_enum_value_invalid; + } + ReturnUnsignedInt = false; } - // The second argument needs to be a constant int - Arg = TheCall->getArg(1); - if (!Arg->isIntegerConstantExpr(Context)) { - Diag(Arg->getBeginLoc(), diag::err_preserve_field_info_not_const) - << 2 << Arg->getSourceRange(); + if (InvalidArg) { + Diag(Arg->getBeginLoc(), kind) << 1 << Arg->getSourceRange(); return true; } - TheCall->setType(Context.UnsignedIntTy); + if (ReturnUnsignedInt) + TheCall->setType(Context.UnsignedIntTy); + else + TheCall->setType(Context.UnsignedLongTy); return false; } diff --git a/clang/test/CodeGen/builtins-bpf-preserve-field-info-3.c b/clang/test/CodeGen/builtins-bpf-preserve-field-info-3.c new file mode 100644 index 0000000000000..f59d88b663e44 --- /dev/null +++ b/clang/test/CodeGen/builtins-bpf-preserve-field-info-3.c @@ -0,0 +1,41 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang -target bpf -emit-llvm -S -g %s -o - | FileCheck %s + +#define _(x, y) (__builtin_preserve_type_info((x), (y))) + +struct s { + char a; +}; +typedef int __int; +enum AA { + VAL1 = 1, + VAL2 = 2, +}; + +unsigned unit1() { + struct s v = {}; + return _(v, 0) + _(*(struct s *)0, 0); +} + +// CHECK: call i32 @llvm.bpf.preserve.type.info(i32 0, i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S:[0-9]+]] +// CHECK: call i32 @llvm.bpf.preserve.type.info(i32 1, i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S]] + +unsigned unit2() { + __int n; + return _(n, 1) + _(*(__int *)0, 1); +} + +// CHECK: call i32 @llvm.bpf.preserve.type.info(i32 2, i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_INT:[0-9]+]] +// CHECK: call i32 @llvm.bpf.preserve.type.info(i32 3, i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_INT]] + +unsigned unit3() { + enum AA t; + return _(t, 0) + _(*(enum AA *)0, 1); +} + +// CHECK: call i32 @llvm.bpf.preserve.type.info(i32 4, i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA:[0-9]+]] +// CHECK: call i32 @llvm.bpf.preserve.type.info(i32 5, i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA]] + +// CHECK: ![[ENUM_AA]] = !DICompositeType(tag: DW_TAG_enumeration_type, name: "AA" +// CHECK: ![[TYPEDEF_INT]] = !DIDerivedType(tag: DW_TAG_typedef, name: "__int" +// CHECK: ![[STRUCT_S]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s" diff --git a/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c b/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c new file mode 100644 index 0000000000000..390b4f9bc07de --- /dev/null +++ b/clang/test/CodeGen/builtins-bpf-preserve-field-info-4.c @@ -0,0 +1,32 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang -target bpf -emit-llvm -S -g %s -o - | FileCheck %s + +#define _(x, y) (__builtin_preserve_enum_value((x), (y))) + +enum AA { + VAL1 = 2, + VAL2 = 0xffffffff80000000UL, +}; +typedef enum { VAL10 = -2, VAL11 = 0xffff8000, } __BB; + +unsigned unit1() { + return _(*(enum AA *)VAL1, 0) + _(*(__BB *)VAL10, 1); +} + +unsigned unit2() { + return _(*(enum AA *)VAL2, 0) + _(*(__BB *)VAL11, 1); +} + +// CHECK: @0 = private unnamed_addr constant [7 x i8] c"VAL1:2\00", align 1 +// CHECK: @1 = private unnamed_addr constant [9 x i8] c"VAL10:-2\00", align 1 +// CHECK: @2 = private unnamed_addr constant [17 x i8] c"VAL2:-2147483648\00", align 1 +// CHECK: @3 = private unnamed_addr constant [17 x i8] c"VAL11:4294934528\00", align 1 + +// CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 0, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @0, i32 0, i32 0), i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA:[0-9]+]] +// CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 1, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @1, i32 0, i32 0), i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_ENUM:[0-9]+]] + +// CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 2, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @2, i32 0, i32 0), i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[ENUM_AA]] +// CHECK: call i64 @llvm.bpf.preserve.enum.value(i32 3, i8* getelementptr inbounds ([17 x i8], [17 x i8]* @3, i32 0, i32 0), i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_ENUM]] + +// CHECK: ![[ENUM_AA]] = !DICompositeType(tag: DW_TAG_enumeration_type, name: "AA" +// CHECK: ![[TYPEDEF_ENUM]] = !DIDerivedType(tag: DW_TAG_typedef, name: "__BB" diff --git a/clang/test/Sema/builtins-bpf.c b/clang/test/Sema/builtins-bpf.c index 8df96976f107d..52cf1c6a6e63d 100644 --- a/clang/test/Sema/builtins-bpf.c +++ b/clang/test/Sema/builtins-bpf.c @@ -1,7 +1,28 @@ // RUN: %clang_cc1 -x c -triple bpf-pc-linux-gnu -dwarf-version=4 -fsyntax-only -verify %s -struct s { int a; int b[4]; int c:1; }; -union u { int a; int b[4]; int c:1; }; +struct s { + int a; + int b[4]; + int c:1; +}; +union u { + int a; + int b[4]; + int c:1; +}; +typedef struct { + int a; + int b; +} __t; +typedef int (*__f)(void); +enum AA { + VAL1 = 10, + VAL2 = 0xffffffff80000000UL, +}; +typedef enum { + VAL10 = 10, + VAL11 = 11, +} __BB; unsigned invalid1(const int *arg) { return __builtin_preserve_field_info(arg, 1); // expected-error {{__builtin_preserve_field_info argument 1 not a field access}} @@ -46,3 +67,38 @@ unsigned invalid10(struct s *arg) { unsigned invalid11(struct s *arg, int info_kind) { return __builtin_preserve_field_info(arg->a, info_kind); // expected-error {{__builtin_preserve_field_info argument 2 not a constant}} } + +unsigned valid12() { + const struct s t; + return __builtin_preserve_type_info(t, 0) + + __builtin_preserve_type_info(*(struct s *)0, 1); +} + +unsigned valid13() { + __t t; + return __builtin_preserve_type_info(t, 1) + + __builtin_preserve_type_info(*(__t *)0, 0); +} + +unsigned valid14() { + enum AA t; + return __builtin_preserve_type_info(t, 0) + + __builtin_preserve_type_info(*(enum AA *)0, 1); +} + +unsigned valid15() { + return __builtin_preserve_enum_value(*(enum AA *)VAL1, 1) + + __builtin_preserve_enum_value(*(enum AA *)VAL2, 1); +} + +unsigned invalid16() { + return __builtin_preserve_enum_value(*(enum AA *)0, 1); // expected-error {{__builtin_preserve_enum_value argument 1 invalid}} +} + +unsigned invalid17() { + return __builtin_preserve_enum_value(*(enum AA *)VAL10, 1); // expected-error {{__builtin_preserve_enum_value argument 1 invalid}} +} + +unsigned invalid18(struct s *arg) { + return __builtin_preserve_type_info(arg->a + 2, 0); // expected-error {{__builtin_preserve_type_info argument 1 invalid}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td index c4d35b2a0a88c..f25f631c9b147 100644 --- a/llvm/include/llvm/IR/IntrinsicsBPF.td +++ b/llvm/include/llvm/IR/IntrinsicsBPF.td @@ -26,4 +26,10 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf." def int_bpf_btf_type_id : GCCBuiltin<"__builtin_bpf_btf_type_id">, Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_any_ty, llvm_i64_ty], [IntrNoMem]>; + def int_bpf_preserve_type_info : GCCBuiltin<"__builtin_bpf_preserve_type_info">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_bpf_preserve_enum_value : GCCBuiltin<"__builtin_bpf_preserve_enum_value">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrNoMem]>; } From 4a04bc8995639e1d333790518e4d42e0961f740e Mon Sep 17 00:00:00 2001 From: AndreyChurbanov Date: Tue, 4 Aug 2020 18:48:25 +0300 Subject: [PATCH 327/600] [OpenMP] Don't use MSVC workaround with MinGW Patch by mati865@gmail.com Differential Revision: https://reviews.llvm.org/D85210 --- openmp/runtime/cmake/LibompExports.cmake | 4 +-- openmp/runtime/cmake/LibompMicroTests.cmake | 2 +- openmp/runtime/src/CMakeLists.txt | 34 ++++++++++++--------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/openmp/runtime/cmake/LibompExports.cmake b/openmp/runtime/cmake/LibompExports.cmake index 96dab9f4a4657..97ecc5d691ff5 100644 --- a/openmp/runtime/cmake/LibompExports.cmake +++ b/openmp/runtime/cmake/LibompExports.cmake @@ -83,11 +83,11 @@ add_custom_command(TARGET omp POST_BUILD # Copy Windows import library into exports/ directory post build if(WIN32) - get_target_property(LIBOMPIMP_OUTPUT_DIRECTORY ompimp ARCHIVE_OUTPUT_DIRECTORY) + get_target_property(LIBOMPIMP_OUTPUT_DIRECTORY ${LIBOMP_IMP_LIB_TARGET} ARCHIVE_OUTPUT_DIRECTORY) if(NOT LIBOMPIMP_OUTPUT_DIRECTORY) set(LIBOMPIMP_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() - add_custom_command(TARGET ompimp POST_BUILD + add_custom_command(TARGET ${LIBOMP_IMP_LIB_TARGET} POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBOMP_EXPORTS_LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy ${LIBOMPIMP_OUTPUT_DIRECTORY}/${LIBOMP_IMP_LIB_FILE} ${LIBOMP_EXPORTS_LIB_DIR} ) diff --git a/openmp/runtime/cmake/LibompMicroTests.cmake b/openmp/runtime/cmake/LibompMicroTests.cmake index dc44e2c1e2fc8..1ca3412edc8e0 100644 --- a/openmp/runtime/cmake/LibompMicroTests.cmake +++ b/openmp/runtime/cmake/LibompMicroTests.cmake @@ -40,7 +40,7 @@ # get library location if(WIN32) get_target_property(LIBOMP_OUTPUT_DIRECTORY omp RUNTIME_OUTPUT_DIRECTORY) - get_target_property(LIBOMPIMP_OUTPUT_DIRECTORY ompimp ARCHIVE_OUTPUT_DIRECTORY) + get_target_property(LIBOMPIMP_OUTPUT_DIRECTORY ${LIBOMP_IMP_LIB_TARGET} ARCHIVE_OUTPUT_DIRECTORY) if(NOT LIBOMPIMP_OUTPUT_DIRECTORY) set(LIBOMPIMP_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 1211441876eb8..81275c0483dd4 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -202,21 +202,27 @@ if(WIN32) IMPORT_PREFIX "" IMPORT_SUFFIX "" # control generated import library name when building omp ARCHIVE_OUTPUT_NAME ${LIBOMP_GENERATED_IMP_LIB_FILENAME} ) - # Get generated import library from creating omp - get_target_property(LIBOMP_IMPORT_LIB_DIRECTORY omp ARCHIVE_OUTPUT_DIRECTORY) - if(LIBOMP_IMPORT_LIB_DIRECTORY) - set(LIBOMP_GENERATED_IMP_LIB ${LIBOMP_IMPORT_LIB_DIRECTORY}/${LIBOMP_GENERATED_IMP_LIB_FILENAME}) + + if(MSVC) + # Get generated import library from creating omp + get_target_property(LIBOMP_IMPORT_LIB_DIRECTORY omp ARCHIVE_OUTPUT_DIRECTORY) + if(LIBOMP_IMPORT_LIB_DIRECTORY) + set(LIBOMP_GENERATED_IMP_LIB ${LIBOMP_IMPORT_LIB_DIRECTORY}/${LIBOMP_GENERATED_IMP_LIB_FILENAME}) + else() + set(LIBOMP_GENERATED_IMP_LIB ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_GENERATED_IMP_LIB_FILENAME}) + endif() + set_source_files_properties(${LIBOMP_GENERATED_IMP_LIB} PROPERTIES GENERATED TRUE EXTERNAL_OBJECT TRUE) + # Create new import library that is just the previously created one + kmp_import.cpp + add_library(ompimp STATIC ${LIBOMP_GENERATED_IMP_LIB} kmp_import.cpp) + set_target_properties(ompimp PROPERTIES + PREFIX "" SUFFIX "" OUTPUT_NAME "${LIBOMP_IMP_LIB_FILE}" + LINKER_LANGUAGE C + ) + add_dependencies(ompimp omp) # ensure generated import library is created first + set(LIBOMP_IMP_LIB_TARGET ompimp) else() - set(LIBOMP_GENERATED_IMP_LIB ${CMAKE_CURRENT_BINARY_DIR}/${LIBOMP_GENERATED_IMP_LIB_FILENAME}) + set(LIBOMP_IMP_LIB_TARGET omp) endif() - set_source_files_properties(${LIBOMP_GENERATED_IMP_LIB} PROPERTIES GENERATED TRUE EXTERNAL_OBJECT TRUE) - # Create new import library that is just the previously created one + kmp_import.cpp - add_library(ompimp STATIC ${LIBOMP_GENERATED_IMP_LIB} kmp_import.cpp) - set_target_properties(ompimp PROPERTIES - PREFIX "" SUFFIX "" OUTPUT_NAME "${LIBOMP_IMP_LIB_FILE}" - LINKER_LANGUAGE C - ) - add_dependencies(ompimp omp) # ensure generated import library is created first # Create def file to designate exported functions libomp_get_gdflags(LIBOMP_GDFLAGS) # generate-def.pl flags (Windows only) @@ -290,7 +296,7 @@ else() endif() if(WIN32) install(TARGETS omp RUNTIME DESTINATION bin) - install(TARGETS ompimp ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}") + install(TARGETS ${LIBOMP_IMP_LIB_TARGET} ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}") # Create aliases (regular copies) of the library for backwards compatibility set(LIBOMP_ALIASES "libiomp5md") foreach(alias IN LISTS LIBOMP_ALIASES) From 593e19629744d6c8ba45fe4bb78910cf653cd6a7 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 4 Aug 2020 08:51:24 -0700 Subject: [PATCH 328/600] [llvm-symbolizer] Switch command line parsing from llvm::cl to OptTable for the advantage outlined by D83639 ([OptTable] Support grouped short options) Some behavior changes: * -i={0,false} is removed. Use --no-inlines instead. * --demangle={0,false} is removed. Use --no-demangle instead * -untag-addresses={0,false} is removed. Use --no-untag-addresses instead Added a higher level API OptTable::parseArgs which handles optional initial options populated from an environment variable, expands response files recursively, and parses options. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D83530 --- llvm/docs/CommandGuide/llvm-symbolizer.rst | 16 +- llvm/include/llvm/Option/OptTable.h | 18 + llvm/include/llvm/Support/CommandLine.h | 8 + llvm/lib/Option/OptTable.cpp | 32 +- llvm/lib/Support/CommandLine.cpp | 16 + llvm/test/DebugInfo/debuglineinfo-path.ll | 6 +- llvm/test/tools/llvm-symbolizer/basic.s | 1 + llvm/test/tools/llvm-symbolizer/help.test | 4 +- .../llvm-symbolizer/output-style-inlined.test | 8 +- .../tools/llvm-symbolizer/split-dwarf.test | 6 +- .../llvm-symbolizer/unknown-argument.test | 12 + .../llvm-symbolizer/untag-addresses.test | 2 +- llvm/tools/llvm-symbolizer/CMakeLists.txt | 7 + llvm/tools/llvm-symbolizer/Opts.td | 60 +++ .../tools/llvm-symbolizer/llvm-symbolizer.cpp | 360 ++++++++---------- 15 files changed, 339 insertions(+), 217 deletions(-) create mode 100644 llvm/test/tools/llvm-symbolizer/unknown-argument.test create mode 100644 llvm/tools/llvm-symbolizer/Opts.td diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst index 5c8465af04a7f..5c6a9511353cc 100644 --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -220,16 +220,16 @@ OPTIONS Show help and usage for this command. -.. option:: --help-list - - Show help and usage for this command without grouping the options into categories. - .. _llvm-symbolizer-opt-i: .. option:: --inlining, --inlines, -i If a source code location is in an inlined function, prints all the inlined - frames. Defaults to true. + frames. This is the default. + +.. option:: --no-inlines + + Don't print inlined frames. .. option:: --no-demangle @@ -267,17 +267,17 @@ OPTIONS foo() at /tmp/test.cpp:6:3 - $ llvm-symbolizer --output-style=LLVM --obj=inlined.elf 0x4004be 0x400486 -p -i=0 + $ llvm-symbolizer --output-style=LLVM --obj=inlined.elf 0x4004be 0x400486 -p --no-inlines main at /tmp/test.cpp:11:18 foo() at /tmp/test.cpp:6:3 - $ llvm-symbolizer --output-style=GNU --obj=inlined.elf 0x4004be 0x400486 -p -i=0 + $ llvm-symbolizer --output-style=GNU --obj=inlined.elf 0x4004be 0x400486 -p --no-inlines baz() at /tmp/test.cpp:11 foo() at /tmp/test.cpp:6 $ clang -g -fdebug-info-for-profiling test.cpp -o profiling.elf - $ llvm-symbolizer --output-style=GNU --obj=profiling.elf 0x401167 -p -i=0 + $ llvm-symbolizer --output-style=GNU --obj=profiling.elf 0x401167 -p --no-inlines main at /tmp/test.cpp:15 (discriminator 2) .. option:: --pretty-print, -p diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index b9984bed55a7b..1aabff0fd6591 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -13,6 +13,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Option/OptSpecifier.h" +#include "llvm/Support/StringSaver.h" #include #include #include @@ -20,6 +21,7 @@ namespace llvm { class raw_ostream; +template class function_ref; namespace opt { @@ -60,6 +62,7 @@ class OptTable { std::vector OptionInfos; bool IgnoreCase; bool GroupedShortOptions = false; + const char *EnvVar = nullptr; unsigned TheInputOptionID = 0; unsigned TheUnknownOptionID = 0; @@ -123,6 +126,9 @@ class OptTable { return getInfo(id).MetaVar; } + /// Specify the environment variable where initial options should be read. + void setInitialOptionsFromEnvironment(const char *E) { EnvVar = E; } + /// Support grouped short options. e.g. -ab represents -a -b. void setGroupedShortOptions(bool Value) { GroupedShortOptions = Value; } @@ -219,6 +225,18 @@ class OptTable { unsigned &MissingArgCount, unsigned FlagsToInclude = 0, unsigned FlagsToExclude = 0) const; + /// A convenience helper which handles optional initial options populated from + /// an environment variable, expands response files recursively and parses + /// options. + /// + /// \param ErrorFn - Called on a formatted error message for missing arguments + /// or unknown options. + /// \return An InputArgList; on error this will contain all the options which + /// could be parsed. + InputArgList parseArgs(int Argc, char *const *Argv, OptSpecifier Unknown, + StringSaver &Saver, + function_ref ErrorFn) const; + /// Render the help text for an option table. /// /// \param OS - The stream to write the help text to. diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 62e44aeefe9cf..38c588080069c 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -2085,6 +2085,14 @@ bool ExpandResponseFiles( llvm::vfs::FileSystem &FS = *llvm::vfs::getRealFileSystem(), llvm::Optional CurrentDir = llvm::None); +/// A convenience helper which concatenates the options specified by the +/// environment variable EnvVar and command line options, then expands response +/// files recursively. The tokenizer is a predefined GNU or Windows one. +/// \return true if all @files were expanded successfully or there were none. +bool expandResponseFiles(int Argc, const char *const *Argv, const char *EnvVar, + StringSaver &Saver, + SmallVectorImpl &NewArgv); + /// Mark all options not part of this category as cl::ReallyHidden. /// /// \param Category the category of options to keep displaying diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index 16404d3d81078..2b7fcf55a57cc 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -6,14 +6,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Option/OptTable.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" -#include "llvm/Option/Option.h" #include "llvm/Option/OptSpecifier.h" -#include "llvm/Option/OptTable.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" // for expandResponseFiles #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -490,6 +491,33 @@ InputArgList OptTable::ParseArgs(ArrayRef ArgArr, return Args; } +InputArgList OptTable::parseArgs(int Argc, char *const *Argv, + OptSpecifier Unknown, StringSaver &Saver, + function_ref ErrorFn) const { + SmallVector NewArgv; + // The environment variable specifies initial options which can be overridden + // by commnad line options. + cl::expandResponseFiles(Argc, Argv, EnvVar, Saver, NewArgv); + + unsigned MAI, MAC; + opt::InputArgList Args = ParseArgs(makeArrayRef(NewArgv), MAI, MAC); + if (MAC) + ErrorFn((Twine(Args.getArgString(MAI)) + ": missing argument").str()); + + // For each unknwon option, call ErrorFn with a formatted error message. The + // message includes a suggested alternative option spelling if available. + std::string Nearest; + for (const opt::Arg *A : Args.filtered(Unknown)) { + std::string Spelling = A->getAsString(Args); + if (findNearest(Spelling, Nearest) > 1) + ErrorFn("unknown argument '" + A->getAsString(Args) + "'"); + else + ErrorFn("unknown argument '" + A->getAsString(Args) + + "', did you mean '" + Nearest + "'?"); + } + return Args; +} + static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) { const Option O = Opts.getOption(Id); std::string Name = O.getPrefixedName(); diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 4fba6a9ada2c0..e53421a277f1f 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1251,6 +1251,22 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, return AllExpanded; } +bool cl::expandResponseFiles(int Argc, const char *const *Argv, + const char *EnvVar, StringSaver &Saver, + SmallVectorImpl &NewArgv) { + auto Tokenize = Triple(sys::getProcessTriple()).isOSWindows() + ? cl::TokenizeWindowsCommandLine + : cl::TokenizeGNUCommandLine; + // The environment variable specifies initial options. + if (EnvVar) + if (llvm::Optional EnvValue = sys::Process::GetEnv(EnvVar)) + Tokenize(*EnvValue, Saver, NewArgv, /*MarkEOLs=*/false); + + // Command line options can override the environment variable. + NewArgv.append(Argv + 1, Argv + Argc); + return ExpandResponseFiles(Saver, Tokenize, NewArgv); +} + bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver, SmallVectorImpl &Argv) { SmallString<128> AbsPath; diff --git a/llvm/test/DebugInfo/debuglineinfo-path.ll b/llvm/test/DebugInfo/debuglineinfo-path.ll index 4c5f43aa03fc6..ea32aecf5d86a 100644 --- a/llvm/test/DebugInfo/debuglineinfo-path.ll +++ b/llvm/test/DebugInfo/debuglineinfo-path.ll @@ -8,9 +8,9 @@ ; RUN: llvm-nm --radix=o %t | grep posix_absolute_func > %t.posix_absolute_func ; RUN: llvm-nm --radix=o %t | grep posix_relative_func > %t.posix_relative_func ; RUN: llvm-nm --radix=o %t | grep win_func > %t.win_func -; RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false --obj %t < %t.posix_absolute_func | FileCheck %s --check-prefix=POSIX_A -; RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false --obj %t < %t.posix_relative_func | FileCheck %s --check-prefix=POSIX_R -; RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false --obj %t < %t.win_func | FileCheck %s --check-prefix=WIN +; RUN: llvm-symbolizer --functions=linkage --inlining --no-demangle --obj %t < %t.posix_absolute_func | FileCheck %s --check-prefix=POSIX_A +; RUN: llvm-symbolizer --functions=linkage --inlining --no-demangle --obj %t < %t.posix_relative_func | FileCheck %s --check-prefix=POSIX_R +; RUN: llvm-symbolizer --functions=linkage --inlining --no-demangle --obj %t < %t.win_func | FileCheck %s --check-prefix=WIN ;POSIX_A: posix_absolute_func ;POSIX_A: /absolute/posix/path{{[\/]}}posix.c diff --git a/llvm/test/tools/llvm-symbolizer/basic.s b/llvm/test/tools/llvm-symbolizer/basic.s index b9d5c814024fb..1a28f14f3eb4a 100644 --- a/llvm/test/tools/llvm-symbolizer/basic.s +++ b/llvm/test/tools/llvm-symbolizer/basic.s @@ -17,6 +17,7 @@ foo: # Check --obj aliases --exe, -e # RUN: llvm-symbolizer 0xa 0xb --exe=%t.o | FileCheck %s +# RUN: llvm-symbolizer 0xa 0xb --exe %t.o | FileCheck %s # RUN: llvm-symbolizer 0xa 0xb -e %t.o | FileCheck %s # RUN: llvm-symbolizer 0xa 0xb -e=%t.o | FileCheck %s # RUN: llvm-symbolizer 0xa 0xb -e%t.o | FileCheck %s diff --git a/llvm/test/tools/llvm-symbolizer/help.test b/llvm/test/tools/llvm-symbolizer/help.test index 12339463631c2..c05760f618669 100644 --- a/llvm/test/tools/llvm-symbolizer/help.test +++ b/llvm/test/tools/llvm-symbolizer/help.test @@ -4,9 +4,9 @@ RUN: llvm-addr2line -h | FileCheck %s --check-prefix=ADDR2LINE RUN: llvm-addr2line --help | FileCheck %s --check-prefix=ADDR2LINE SYMBOLIZER: OVERVIEW: llvm-symbolizer -SYMBOLIZER: USAGE: llvm-symbolizer{{(.exe)?}} [options] ... +SYMBOLIZER: USAGE: llvm-symbolizer{{(.exe)?}} [options] addresses... SYMBOLIZER: @FILE ADDR2LINE: OVERVIEW: llvm-addr2line -ADDR2LINE: USAGE: llvm-addr2line{{(.exe)?}} [options] ... +ADDR2LINE: USAGE: llvm-addr2line{{(.exe)?}} [options] addresses... ADDR2LINE: @FILE diff --git a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test index 7aa9c6b4059b6..daa9584a3f48e 100644 --- a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test +++ b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test @@ -1,16 +1,16 @@ -This test checks that when inlined frames are not shown (-i=0) and the output +This test checks that when inlined frames are not shown (--no-inlines) and the output style is set to GNU (--output-style=GNU) the name of an inlined function is not replaced with the name of the top caller function. At the same time, the current behavior of llvm-symbolizer is preserved with --output-style=LLVM or when the option is not specified. -RUN: llvm-symbolizer -i=0 -e %p/Inputs/addr.exe 0x40054d \ +RUN: llvm-symbolizer --no-inlines -e %p/Inputs/addr.exe 0x40054d \ RUN: | FileCheck %s --check-prefix=LLVM --implicit-check-not=inctwo -RUN: llvm-symbolizer --output-style=LLVM -i=0 -e %p/Inputs/addr.exe 0x40054d \ +RUN: llvm-symbolizer --output-style=LLVM --no-inlines -e %p/Inputs/addr.exe 0x40054d \ RUN: | FileCheck %s --check-prefix=LLVM --implicit-check-not=inctwo -RUN: llvm-symbolizer --output-style=GNU -i=0 -e %p/Inputs/addr.exe 0x40054d \ +RUN: llvm-symbolizer --output-style=GNU --no-inlines -e %p/Inputs/addr.exe 0x40054d \ RUN: | FileCheck %s --check-prefix=GNU --implicit-check-not=main RUN: llvm-addr2line -f -e %p/Inputs/addr.exe 0x40054d \ diff --git a/llvm/test/tools/llvm-symbolizer/split-dwarf.test b/llvm/test/tools/llvm-symbolizer/split-dwarf.test index af758acd7e09e..e129d2ede3aea 100644 --- a/llvm/test/tools/llvm-symbolizer/split-dwarf.test +++ b/llvm/test/tools/llvm-symbolizer/split-dwarf.test @@ -4,14 +4,14 @@ RUN: mkdir -p %t RUN: cp %p/Inputs/split-dwarf-test.dwo %t RUN: cd %t -RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \ +RUN: llvm-symbolizer --functions=linkage --inlining --no-demangle \ RUN: --obj=%p/Inputs/split-dwarf-test 0x400504 0x4004f4 | FileCheck --check-prefixes=SPLIT,DWO %s Ensure we get the same results in the absence of gmlt-like data in the executable but the presence of a .dwo file RUN: echo "%p/Inputs/split-dwarf-test-nogmlt 0x400504" >> %t.input RUN: echo "%p/Inputs/split-dwarf-test-nogmlt 0x4004f4" >> %t.input -RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \ +RUN: llvm-symbolizer --functions=linkage --inlining --no-demangle \ RUN: --default-arch=i386 --obj=%p/Inputs/split-dwarf-test-nogmlt 0x400504 0x4004f4 | FileCheck --check-prefixes=SPLIT,DWO %s Ensure we get gmlt like results in the absence of a .dwo file but the presence of gmlt-like data in the executable @@ -19,7 +19,7 @@ Ensure we get gmlt like results in the absence of a .dwo file but the presence o RUN: rm %t/split-dwarf-test.dwo RUN: echo "%p/Inputs/split-dwarf-test 0x400504" >> %t.input RUN: echo "%p/Inputs/split-dwarf-test 0x4004f4" >> %t.input -RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \ +RUN: llvm-symbolizer --functions=linkage --inlining --no-demangle \ RUN: --default-arch=i386 --obj=%p/Inputs/split-dwarf-test 0x400504 0x4004f4 | FileCheck --check-prefixes=SPLIT,NODWO %s DWO: _Z2f2v diff --git a/llvm/test/tools/llvm-symbolizer/unknown-argument.test b/llvm/test/tools/llvm-symbolizer/unknown-argument.test new file mode 100644 index 0000000000000..a697f1a27626e --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/unknown-argument.test @@ -0,0 +1,12 @@ +# RUN: not llvm-symbolizer -x --flag 2>&1 | FileCheck %s + +# CHECK: error: unknown argument '-x'{{$}} +# CHECK-NEXT: error: unknown argument '--flag'{{$}} + +# RUN: not llvm-symbolizer --inline 2>&1 | FileCheck %s --check-prefix=SUGGEST + +# SUGGEST: error: unknown argument '--inline', did you mean '--inlines'? + +# RUN: not llvm-symbolizer -e 2>&1 | FileCheck %s --check-prefix=MISSING + +# MISSING: error: -e: missing argument diff --git a/llvm/test/tools/llvm-symbolizer/untag-addresses.test b/llvm/test/tools/llvm-symbolizer/untag-addresses.test index 3799f306cab05..f37f257d2d218 100644 --- a/llvm/test/tools/llvm-symbolizer/untag-addresses.test +++ b/llvm/test/tools/llvm-symbolizer/untag-addresses.test @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o # RUN: echo DATA %t.o 0 | llvm-symbolizer | FileCheck --check-prefix=UNTAG %s -# RUN: echo DATA %t.o 0 | llvm-symbolizer -untag-addresses=0 | FileCheck --check-prefix=NOUNTAG %s +# RUN: echo DATA %t.o 0 | llvm-symbolizer --no-untag-addresses | FileCheck --check-prefix=NOUNTAG %s # RUN: echo DATA %t.o 0 | llvm-addr2line | FileCheck --check-prefix=NOUNTAG %s # UNTAG: foo diff --git a/llvm/tools/llvm-symbolizer/CMakeLists.txt b/llvm/tools/llvm-symbolizer/CMakeLists.txt index 13da12fba7b5d..c112e344da7ea 100644 --- a/llvm/tools/llvm-symbolizer/CMakeLists.txt +++ b/llvm/tools/llvm-symbolizer/CMakeLists.txt @@ -3,17 +3,24 @@ # This means that we need LLVM libraries to be compiled for these # targets as well. Currently, there is no support for such a build strategy. +set(LLVM_TARGET_DEFINITIONS Opts.td) +tablegen(LLVM Opts.inc -gen-opt-parser-defs) +add_public_tablegen_target(SymbolizerOptsTableGen) + set(LLVM_LINK_COMPONENTS DebugInfoDWARF DebugInfoPDB Demangle Object + Option Support Symbolize ) add_llvm_tool(llvm-symbolizer llvm-symbolizer.cpp + DEPENDS + SymbolizerOptsTableGen ) add_llvm_tool_symlink(llvm-addr2line llvm-symbolizer) diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td new file mode 100644 index 0000000000000..d83b796635b8e --- /dev/null +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -0,0 +1,60 @@ +include "llvm/Option/OptParser.td" + +multiclass B { + def NAME: Flag<["--", "-"], name>, HelpText; + def no_ # NAME: Flag<["--", "-"], "no-" # name>, HelpText; +} + +multiclass Eq { + def NAME #_EQ : Joined<["--", "-"], name #"=">, + HelpText; + def : Separate<["--", "-"], name>, Alias(NAME #_EQ)>; +} + +class F: Flag<["--", "-"], name>, HelpText; + +def addresses : F<"addresses", "Show address before line information">; +defm adjust_vma + : Eq<"adjust-vma", "Add specified offset to object file addresses">, + MetaVarName<"">; +def basenames : Flag<["--"], "basenames">, HelpText<"Strip directory names from paths">; +defm debug_file_directory : Eq<"debug-file-directory", "Path to directory where to look for debug files">, MetaVarName<"">; +defm default_arch : Eq<"default-arch", "Default architecture (for multi-arch objects)">; +defm demangle : B<"demangle", "Demangle function names", "Don't demangle function names">; +def functions : F<"functions", "Print function name for a given address">; +def functions_EQ : Joined<["--"], "functions=">, HelpText<"Print function name for a given address">, Values<"none,short,linkage">; +def help : F<"help", "Display this help">; +defm dwp : Eq<"dwp", "Path to DWP file to be use for any split CUs">, MetaVarName<"">; +defm dsym_hint : Eq<"dsym-hint", "Path to .dSYM bundles to search for debug info for the object files">, MetaVarName<"">; +defm fallback_debug_path : Eq<"fallback-debug-path", "Fallback path for debug binaries">, MetaVarName<"">; +defm inlines : B<"inlines", "Print all inlined frames for a given address", + "Do not print inlined frames">; +defm obj + : Eq<"obj", "Path to object file to be symbolized (if not provided, " + "object file should be specified for each input line)">, MetaVarName<"">; +defm output_style + : Eq<"output-style", "Specify print style. Supported styles: LLVM, GNU">, + MetaVarName<"style">, + Values<"LLVM,GNU">; +def pretty_print : F<"pretty-print", "Make the output more human friendly">; +defm print_source_context_lines : Eq<"print-source-context-lines", "Print N lines of source file context">; +def relative_address : F<"relative-address", "Interpret addresses as addresses relative to the image base">; +def relativenames : F<"relativenames", "Strip the compilation directory from paths">; +defm untag_addresses : B<"untag-addresses", "", "Remove memory tags from addresses before symbolization">; +def use_native_pdb_reader : F<"use-native-pdb-reader", "Use native PDB functionality">; +def verbose : F<"verbose", "Print verbose line info">; + +def : Flag<["-"], "a">, Alias, HelpText<"Alias for --addresses">; +def : F<"print-address", "Alias for --addresses">, Alias; +def : Flag<["-"], "C">, Alias, HelpText<"Alias for --demangle">; +def : Joined<["--"], "exe=">, Alias, HelpText<"Alias for --obj">, MetaVarName<"">; +def : Separate<["--"], "exe">, Alias, HelpText<"Alias for --obj">, MetaVarName<"">; +def : JoinedOrSeparate<["-"], "e">, Alias, HelpText<"Alias for --obj">, MetaVarName<"">; +def : Joined<["-"], "e=">, Alias, HelpText<"Alias for --obj">, MetaVarName<"">; +def : Flag<["-"], "f">, Alias, HelpText<"Alias for --functions">; +def : Joined<["-"], "f=">, Alias, HelpText<"Alias for --functions=">; +def : Flag<["-"], "h">, Alias; +def : Flag<["-"], "i">, Alias, HelpText<"Alias for --inlines">; +def : F<"inlining", "Alias for --inlines">, Alias; +def : Flag<["-"], "p">, Alias, HelpText<"Alias for --pretty-print">; +def : Flag<["-"], "s">, Alias, HelpText<"Alias for --basenames">; diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 6a702c64a1053..2101d645dffa2 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -14,15 +14,20 @@ // //===----------------------------------------------------------------------===// +#include "Opts.inc" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/Symbolize/DIPrinter.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" #include "llvm/Support/COM.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/Path.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -32,144 +37,42 @@ using namespace llvm; using namespace symbolize; -static cl::opt -ClUseSymbolTable("use-symbol-table", cl::init(true), - cl::desc("Prefer names in symbol table to names " - "in debug info")); - -static cl::opt ClPrintFunctions( - "functions", cl::init(FunctionNameKind::LinkageName), - cl::desc("Print function name for a given address"), cl::ValueOptional, - cl::values(clEnumValN(FunctionNameKind::None, "none", "omit function name"), - clEnumValN(FunctionNameKind::ShortName, "short", - "print short function name"), - clEnumValN(FunctionNameKind::LinkageName, "linkage", - "print function linkage name"), - // Sentinel value for unspecified value. - clEnumValN(FunctionNameKind::LinkageName, "", ""))); -static cl::alias ClPrintFunctionsShort("f", cl::desc("Alias for -functions"), - cl::NotHidden, cl::Grouping, - cl::aliasopt(ClPrintFunctions)); - -static cl::opt - ClUseRelativeAddress("relative-address", cl::init(false), - cl::desc("Interpret addresses as relative addresses"), - cl::ReallyHidden); - -static cl::opt ClUntagAddresses( - "untag-addresses", cl::init(true), - cl::desc("Remove memory tags from addresses before symbolization")); - -static cl::opt - ClPrintInlining("inlining", cl::init(true), - cl::desc("Print all inlined frames for a given address")); -static cl::alias - ClPrintInliningAliasI("i", cl::desc("Alias for -inlining"), - cl::NotHidden, cl::aliasopt(ClPrintInlining), - cl::Grouping); -static cl::alias - ClPrintInliningAliasInlines("inlines", cl::desc("Alias for -inlining"), - cl::NotHidden, cl::aliasopt(ClPrintInlining)); - -static cl::opt ClBasenames("basenames", cl::init(false), - cl::desc("Strip directory names from paths")); -static cl::alias ClBasenamesShort("s", cl::desc("Alias for -basenames"), - cl::NotHidden, cl::aliasopt(ClBasenames)); - -static cl::opt - ClRelativenames("relativenames", cl::init(false), - cl::desc("Strip the compilation directory from paths")); - -static cl::opt -ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names")); -static cl::alias -ClDemangleShort("C", cl::desc("Alias for -demangle"), - cl::NotHidden, cl::aliasopt(ClDemangle), cl::Grouping); -static cl::opt -ClNoDemangle("no-demangle", cl::init(false), - cl::desc("Don't demangle function names")); - -static cl::opt ClDefaultArch("default-arch", cl::init(""), - cl::desc("Default architecture " - "(for multi-arch objects)")); - -static cl::opt -ClBinaryName("obj", cl::init(""), - cl::desc("Path to object file to be symbolized (if not provided, " - "object file should be specified for each input line)")); -static cl::alias -ClBinaryNameAliasExe("exe", cl::desc("Alias for -obj"), - cl::NotHidden, cl::aliasopt(ClBinaryName)); -static cl::alias ClBinaryNameAliasE("e", cl::desc("Alias for -obj"), - cl::NotHidden, cl::Grouping, cl::Prefix, - cl::aliasopt(ClBinaryName)); - -static cl::opt - ClDwpName("dwp", cl::init(""), - cl::desc("Path to DWP file to be use for any split CUs")); - -static cl::list -ClDsymHint("dsym-hint", cl::ZeroOrMore, - cl::desc("Path to .dSYM bundles to search for debug info for the " - "object files")); - -static cl::opt -ClPrintAddress("print-address", cl::init(false), - cl::desc("Show address before line information")); -static cl::alias -ClPrintAddressAliasAddresses("addresses", cl::desc("Alias for -print-address"), - cl::NotHidden, cl::aliasopt(ClPrintAddress)); -static cl::alias -ClPrintAddressAliasA("a", cl::desc("Alias for -print-address"), - cl::NotHidden, cl::aliasopt(ClPrintAddress), cl::Grouping); - -static cl::opt - ClPrettyPrint("pretty-print", cl::init(false), - cl::desc("Make the output more human friendly")); -static cl::alias ClPrettyPrintShort("p", cl::desc("Alias for -pretty-print"), - cl::NotHidden, - cl::aliasopt(ClPrettyPrint), cl::Grouping); - -static cl::opt ClPrintSourceContextLines( - "print-source-context-lines", cl::init(0), - cl::desc("Print N number of source file context")); +namespace { +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + OPT_##ID, +#include "Opts.inc" +#undef OPTION +}; -static cl::opt ClVerbose("verbose", cl::init(false), - cl::desc("Print verbose line info")); +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Opts.inc" +#undef PREFIX + +static const opt::OptTable::Info InfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR, VALUES) \ + { \ + PREFIX, NAME, HELPTEXT, \ + METAVAR, OPT_##ID, opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, \ + OPT_##ALIAS, ALIASARGS, VALUES}, +#include "Opts.inc" +#undef OPTION +}; -static cl::opt - ClAdjustVMA("adjust-vma", cl::init(0), cl::value_desc("offset"), - cl::desc("Add specified offset to object file addresses")); +class SymbolizerOptTable : public opt::OptTable { +public: + SymbolizerOptTable() : OptTable(InfoTable, true) {} +}; +} // namespace static cl::list ClInputAddresses(cl::Positional, cl::desc("..."), cl::ZeroOrMore); -static cl::opt - ClFallbackDebugPath("fallback-debug-path", cl::init(""), - cl::desc("Fallback path for debug binaries.")); - -static cl::list - ClDebugFileDirectory("debug-file-directory", cl::ZeroOrMore, - cl::value_desc("dir"), - cl::desc("Path to directory where to look for debug " - "files.")); - -static cl::opt - ClOutputStyle("output-style", cl::init(DIPrinter::OutputStyle::LLVM), - cl::desc("Specify print style"), - cl::values(clEnumValN(DIPrinter::OutputStyle::LLVM, "LLVM", - "LLVM default style"), - clEnumValN(DIPrinter::OutputStyle::GNU, "GNU", - "GNU addr2line style"))); - -static cl::opt - ClUseNativePDBReader("use-native-pdb-reader", cl::init(0), - cl::desc("Use native PDB functionality")); - -static cl::extrahelp - HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); - template static bool error(Expected &ResOrErr) { if (ResOrErr) @@ -185,7 +88,8 @@ enum class Command { Frame, }; -static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd, +static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, + StringRef InputString, Command &Cmd, std::string &ModuleName, uint64_t &ModuleOffset) { const char kDelimiters[] = " \n\r"; ModuleName = ""; @@ -201,7 +105,7 @@ static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd, } const char *Pos = InputString.data(); // Skip delimiters and parse input filename (if needed). - if (ClBinaryName.empty()) { + if (BinaryName.empty()) { Pos += strspn(Pos, kDelimiters); if (*Pos == '"' || *Pos == '\'') { char Quote = *Pos; @@ -217,7 +121,7 @@ static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd, Pos += NameLength; } } else { - ModuleName = ClBinaryName; + ModuleName = BinaryName.str(); } // Skip delimiters and parse module offset. Pos += strspn(Pos, kDelimiters); @@ -230,24 +134,26 @@ static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd, return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset); } -static void symbolizeInput(bool IsAddr2Line, StringRef InputString, - LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { +static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA, + bool IsAddr2Line, DIPrinter::OutputStyle OutputStyle, + StringRef InputString, LLVMSymbolizer &Symbolizer, + DIPrinter &Printer) { Command Cmd; std::string ModuleName; uint64_t Offset = 0; - if (!parseCommand(IsAddr2Line, StringRef(InputString), Cmd, ModuleName, - Offset)) { + if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, + StringRef(InputString), Cmd, ModuleName, Offset)) { outs() << InputString << "\n"; return; } - if (ClPrintAddress) { + if (Args.hasArg(OPT_addresses)) { outs() << "0x"; outs().write_hex(Offset); - StringRef Delimiter = ClPrettyPrint ? ": " : "\n"; + StringRef Delimiter = Args.hasArg(OPT_pretty_print) ? ": " : "\n"; outs() << Delimiter; } - Offset -= ClAdjustVMA; + Offset -= AdjustVMA; if (Cmd == Command::Data) { auto ResOrErr = Symbolizer.symbolizeData( ModuleName, {Offset, object::SectionedAddress::UndefSection}); @@ -261,13 +167,13 @@ static void symbolizeInput(bool IsAddr2Line, StringRef InputString, if (ResOrErr->empty()) outs() << "??\n"; } - } else if (ClPrintInlining) { + } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) { auto ResOrErr = Symbolizer.symbolizeInlinedCode( ModuleName, {Offset, object::SectionedAddress::UndefSection}); Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get()); - } else if (ClOutputStyle == DIPrinter::OutputStyle::GNU) { - // With ClPrintFunctions == FunctionNameKind::LinkageName (default) - // and ClUseSymbolTable == true (also default), Symbolizer.symbolizeCode() + } else if (OutputStyle == DIPrinter::OutputStyle::GNU) { + // With PrintFunctions == FunctionNameKind::LinkageName (default) + // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode() // may override the name of an inlined function with the name of the topmost // caller function in the inlining chain. This contradicts the existing // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only @@ -280,67 +186,131 @@ static void symbolizeInput(bool IsAddr2Line, StringRef InputString, ModuleName, {Offset, object::SectionedAddress::UndefSection}); Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get()); } - if (ClOutputStyle == DIPrinter::OutputStyle::LLVM) + if (OutputStyle == DIPrinter::OutputStyle::LLVM) outs() << "\n"; } -int main(int argc, char **argv) { - InitLLVM X(argc, argv); +static void printHelp(bool IsAddr2Line, const SymbolizerOptTable &Tbl, + raw_ostream &OS) { + StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer"; + const char HelpText[] = " [options] addresses..."; + Tbl.PrintHelp(OS, (ToolName + HelpText).str().c_str(), + ToolName.str().c_str()); + // TODO Replace this with OptTable API once it adds extrahelp support. + OS << "\nPass @FILE as argument to read options from FILE.\n"; +} - bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line"); +static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line, + StringSaver &Saver, + SymbolizerOptTable &Tbl) { + Tbl.setGroupedShortOptions(true); + // The environment variable specifies initial options which can be overridden + // by commnad line options. + Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS" + : "LLVM_SYMBOLIZER_OPTS"); + bool HasError = false; + opt::InputArgList Args = + Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { + errs() << ("error: " + Msg + "\n"); + HasError = true; + }); + if (HasError) + exit(1); + if (Args.hasArg(OPT_help)) { + printHelp(IsAddr2Line, Tbl, outs()); + exit(0); + } - if (IsAddr2Line) { - ClDemangle.setInitialValue(false); - ClPrintFunctions.setInitialValue(FunctionNameKind::None); - ClPrintInlining.setInitialValue(false); - ClUntagAddresses.setInitialValue(false); - ClOutputStyle.setInitialValue(DIPrinter::OutputStyle::GNU); + return Args; +} + +template +static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) { + if (const opt::Arg *A = Args.getLastArg(ID)) { + StringRef V(A->getValue()); + if (!llvm::to_integer(V, Value, 0)) { + errs() << A->getSpelling() + + ": expected a non-negative integer, but got '" + V + "'"; + exit(1); + } + } else { + Value = 0; } +} + +static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args, + bool IsAddr2Line) { + if (Args.hasArg(OPT_functions)) + return FunctionNameKind::LinkageName; + if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ)) + return StringSwitch(A->getValue()) + .Case("none", FunctionNameKind::None) + .Case("short", FunctionNameKind::ShortName) + .Default(FunctionNameKind::LinkageName); + return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName; +} - llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded); - cl::ParseCommandLineOptions( - argc, argv, IsAddr2Line ? "llvm-addr2line\n" : "llvm-symbolizer\n", - /*Errs=*/nullptr, - IsAddr2Line ? "LLVM_ADDR2LINE_OPTS" : "LLVM_SYMBOLIZER_OPTS"); +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded); - // If both --demangle and --no-demangle are specified then pick the last one. - if (ClNoDemangle.getPosition() > ClDemangle.getPosition()) - ClDemangle = !ClNoDemangle; + bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line"); + BumpPtrAllocator A; + StringSaver Saver(A); + SymbolizerOptTable Tbl; + opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl); LLVMSymbolizer::Options Opts; - Opts.PrintFunctions = ClPrintFunctions; - Opts.UseSymbolTable = ClUseSymbolTable; - Opts.Demangle = ClDemangle; - Opts.RelativeAddresses = ClUseRelativeAddress; - Opts.UntagAddresses = ClUntagAddresses; - Opts.DefaultArch = ClDefaultArch; - Opts.FallbackDebugPath = ClFallbackDebugPath; - Opts.DWPName = ClDwpName; - Opts.DebugFileDirectory = ClDebugFileDirectory; - Opts.UseNativePDBReader = ClUseNativePDBReader; - Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath; - // If both --basenames and --relativenames are specified then pick the last - // one. - if (ClBasenames.getPosition() > ClRelativenames.getPosition()) - Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly; - else if (ClRelativenames) - Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath; - - for (const auto &hint : ClDsymHint) { - if (sys::path::extension(hint) == ".dSYM") { - Opts.DsymHints.push_back(hint); + uint64_t AdjustVMA; + unsigned SourceContextLines; + parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA); + if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) { + Opts.PathStyle = + A->getOption().matches(OPT_basenames) + ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly + : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath; + } else { + Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath; + } + Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ); + Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str(); + Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line); + Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str(); + Opts.FallbackDebugPath = + Args.getLastArgValue(OPT_fallback_debug_path_EQ).str(); + Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line); + parseIntArg(Args, OPT_print_source_context_lines_EQ, SourceContextLines); + Opts.RelativeAddresses = Args.hasArg(OPT_relative_address); + Opts.UntagAddresses = + Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line); + Opts.UseNativePDBReader = Args.hasArg(OPT_use_native_pdb_reader); + Opts.UseSymbolTable = true; + + for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) { + StringRef Hint(A->getValue()); + if (sys::path::extension(Hint) == ".dSYM") { + Opts.DsymHints.emplace_back(Hint); } else { - errs() << "Warning: invalid dSYM hint: \"" << hint << - "\" (must have the '.dSYM' extension).\n"; + errs() << "Warning: invalid dSYM hint: \"" << Hint + << "\" (must have the '.dSYM' extension).\n"; } } - LLVMSymbolizer Symbolizer(Opts); - DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None, - ClPrettyPrint, ClPrintSourceContextLines, ClVerbose, - ClOutputStyle); + auto OutputStyle = + IsAddr2Line ? DIPrinter::OutputStyle::GNU : DIPrinter::OutputStyle::LLVM; + if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) { + OutputStyle = strcmp(A->getValue(), "GNU") == 0 + ? DIPrinter::OutputStyle::GNU + : DIPrinter::OutputStyle::LLVM; + } + + LLVMSymbolizer Symbolizer(Opts); + DIPrinter Printer(outs(), Opts.PrintFunctions != FunctionNameKind::None, + Args.hasArg(OPT_pretty_print), SourceContextLines, + Args.hasArg(OPT_verbose), OutputStyle); - if (ClInputAddresses.empty()) { + std::vector InputAddresses = Args.getAllArgValues(OPT_INPUT); + if (InputAddresses.empty()) { const int kMaxInputStringLength = 1024; char InputString[kMaxInputStringLength]; @@ -351,12 +321,14 @@ int main(int argc, char **argv) { std::remove_if(StrippedInputString.begin(), StrippedInputString.end(), [](char c) { return c == '\r' || c == '\n'; }), StrippedInputString.end()); - symbolizeInput(IsAddr2Line, StrippedInputString, Symbolizer, Printer); + symbolizeInput(Args, AdjustVMA, IsAddr2Line, OutputStyle, + StrippedInputString, Symbolizer, Printer); outs().flush(); } } else { - for (StringRef Address : ClInputAddresses) - symbolizeInput(IsAddr2Line, Address, Symbolizer, Printer); + for (StringRef Address : InputAddresses) + symbolizeInput(Args, AdjustVMA, IsAddr2Line, OutputStyle, Address, + Symbolizer, Printer); } return 0; From 444401c31f9f1218aed46348eda23a75766002b2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 4 Aug 2020 09:19:30 -0400 Subject: [PATCH 329/600] GlobalISel: Hack a test to avoid a bug introducing a verifier error There seems to be an unrelated CSEMIRBuilder bug that was causing expensive checks failures in this case. Hack the test to avoid this problem for now until that's fixed. --- .../AMDGPU/GlobalISel/legalize-phi.mir | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 0b1f7be76a0fa..10bd1799ccef9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -verify-machineinstrs -global-isel-abort=2 %s -o - | FileCheck %s --- name: test_phi_s32 @@ -174,11 +174,14 @@ body: | ; CHECK: G_BR %bb.2 ; CHECK: bb.2: ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[PHI]](<4 x s16>), %7(<4 x s16>), %7(<4 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) + ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[PHI]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) + ; CHECK: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF3]](<4 x s16>), [[DEF2]](<4 x s16>), [[DEF2]](<4 x s16>) + ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>), [[UV10:%[0-9]+]]:_(<3 x s16>), [[UV11:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<12 x s16>) + ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV4]](<3 x s16>), [[UV8]](<3 x s16>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS3]](<6 x s16>) ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 @@ -200,9 +203,9 @@ body: | bb.2: %6:_(<3 x s16>) = G_PHI %4, %bb.0, %5, %bb.1 - %7:_(<4 x s16>) = G_IMPLICIT_DEF - %8:_(<4 x s16>) = G_INSERT %7, %6, 0 - $vgpr0_vgpr1 = COPY %8 + %7:_(<3 x s16>) = G_IMPLICIT_DEF + %8:_(<6 x s16>) = G_CONCAT_VECTORS %6, %7 + $vgpr0_vgpr1_vgpr2 = COPY %8 S_SETPC_B64 undef $sgpr30_sgpr31 ... From 05aa29efd7ecaea9631a3ac5c471a4a4749463a2 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 5 Mar 2020 09:48:40 +0000 Subject: [PATCH 330/600] [docs] Mention LLVM_ENABLE_MODULES. --- llvm/docs/CMake.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index f96e34f21e803..da13726e8ca0c 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -306,6 +306,9 @@ LLVM-specific variables scenario where a manual override may be desirable is when using Visual Studio 2017's CMake integration, which would not be detected as an IDE otherwise. +**LLVM_ENABLE_MODULES** + Compile with C++ modules enabled. + **LLVM_ENABLE_PIC**:BOOL Add the ``-fPIC`` flag to the compiler command-line, if the compiler supports this flag. Some systems, like Windows, do not need this flag. Defaults to ON. From a16882047a3f7c37f2c7747a1b2ee0d7619d5645 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 4 Aug 2020 11:23:10 -0400 Subject: [PATCH 331/600] [InstSimplify] refactor min/max folds with shared operand; NFC --- llvm/lib/Analysis/InstructionSimplify.cpp | 63 +++++++++++++---------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 6b8f8e3acc179..8dd047ecb8b2c 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5198,8 +5198,8 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, return nullptr; } -static Intrinsic::ID getMaxMinOpposite(Intrinsic::ID ID) { - switch (ID) { +static Intrinsic::ID getMaxMinOpposite(Intrinsic::ID IID) { + switch (IID) { case Intrinsic::smax: return Intrinsic::smin; case Intrinsic::smin: return Intrinsic::smax; case Intrinsic::umax: return Intrinsic::umin; @@ -5208,8 +5208,8 @@ static Intrinsic::ID getMaxMinOpposite(Intrinsic::ID ID) { } } -static APInt getMaxMinLimit(Intrinsic::ID ID, unsigned BitWidth) { - switch (ID) { +static APInt getMaxMinLimit(Intrinsic::ID IID, unsigned BitWidth) { + switch (IID) { case Intrinsic::smax: return APInt::getSignedMaxValue(BitWidth); case Intrinsic::smin: return APInt::getSignedMinValue(BitWidth); case Intrinsic::umax: return APInt::getMaxValue(BitWidth); @@ -5218,6 +5218,34 @@ static APInt getMaxMinLimit(Intrinsic::ID ID, unsigned BitWidth) { } } +static bool isMinMax(Intrinsic::ID IID) { + return IID == Intrinsic::smax || IID == Intrinsic::smin || + IID == Intrinsic::umax || IID == Intrinsic::umin; +} + +/// Given a min/max intrinsic, see if it can be removed based on having an +/// operand that is another min/max intrinsic with shared operand(s). The caller +/// is expected to swap the operand arguments to handle commutation. +static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) { + assert(isMinMax(IID) && "Expected min/max intrinsic"); + auto *InnerMM = dyn_cast(Op0); + if (!InnerMM) + return nullptr; + Intrinsic::ID InnerID = InnerMM->getIntrinsicID(); + if (!isMinMax(InnerID)) + return nullptr; + + if (Op1 == InnerMM->getOperand(0) || Op1 == InnerMM->getOperand(1)) { + // max (max X, Y), X --> max X, Y + if (InnerID == IID) + return InnerMM; + // max (min X, Y), X --> X + if (InnerID == getMaxMinOpposite(IID)) + return Op1; + } + return nullptr; +} + static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, const SimplifyQuery &Q) { Intrinsic::ID IID = F->getIntrinsicID(); @@ -5251,28 +5279,6 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, if (isa(Op1)) return ConstantInt::get(ReturnType, getMaxMinLimit(IID, BitWidth)); - auto hasSpecificOperand = [](IntrinsicInst *II, Value *V) { - return II->getOperand(0) == V || II->getOperand(1) == V; - }; - - // For 4 commuted variants of each intrinsic: - // max (max X, Y), X --> max X, Y - // max (min X, Y), X --> X - if (auto *MinMax0 = dyn_cast(Op0)) { - Intrinsic::ID InnerID = MinMax0->getIntrinsicID(); - if (InnerID == IID && hasSpecificOperand(MinMax0, Op1)) - return MinMax0; - if (InnerID == getMaxMinOpposite(IID) && hasSpecificOperand(MinMax0, Op1)) - return Op1; - } - if (auto *MinMax1 = dyn_cast(Op1)) { - Intrinsic::ID InnerID = MinMax1->getIntrinsicID(); - if (InnerID == IID && hasSpecificOperand(MinMax1, Op0)) - return MinMax1; - if (InnerID == getMaxMinOpposite(IID) && hasSpecificOperand(MinMax1, Op0)) - return Op0; - } - const APInt *C; if (match(Op1, m_APIntAllowUndef(C))) { // Clamp to limit value. For example: @@ -5302,6 +5308,11 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, } } + if (Value *V = foldMinMaxSharedOp(IID, Op0, Op1)) + return V; + if (Value *V = foldMinMaxSharedOp(IID, Op1, Op0)) + return V; + break; } case Intrinsic::usub_with_overflow: From 041c7b84a4b925476d1e21ed302786033bb6035f Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 4 Aug 2020 09:17:56 -0700 Subject: [PATCH 332/600] [lldb/Host] Upstream macOS TCC code Upstream the code for dealing with TCC introduced in macOS Mojave. This will make the debuggee instead of the debugger responsible for the privileges it needs. Differential revision: https://reviews.llvm.org/D85217 --- lldb/source/Host/macosx/objcxx/Host.mm | 24 ++++++++++ .../macosx/objcxx/PosixSpawnResponsible.h | 46 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 lldb/source/Host/macosx/objcxx/PosixSpawnResponsible.h diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm index 1635132a154e5..398652ae30d83 100644 --- a/lldb/source/Host/macosx/objcxx/Host.mm +++ b/lldb/source/Host/macosx/objcxx/Host.mm @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/Host/Host.h" +#include "PosixSpawnResponsible.h" #include #include @@ -1083,6 +1084,29 @@ static Status LaunchProcessPosixSpawn(const char *exe_path, return error; } + bool is_graphical = true; + +#if TARGET_OS_OSX + SecuritySessionId session_id; + SessionAttributeBits session_attributes; + OSStatus status = + SessionGetInfo(callerSecuritySession, &session_id, &session_attributes); + if (status == errSessionSuccess) + is_graphical = session_attributes & sessionHasGraphicAccess; +#endif + + // When lldb is ran through a graphical session, this makes the debuggee + // process responsible for the TCC prompts. Otherwise, lldb will use the + // launching process privileges. + if (is_graphical && launch_info.GetFlags().Test(eLaunchFlagDebug)) { + error.SetError(setup_posix_spawn_responsible_flag(&attr), eErrorTypePOSIX); + if (error.Fail()) { + LLDB_LOG(log, "error: {0}, setup_posix_spawn_responsible_flag(&attr)", + error); + return error; + } + } + const char *tmp_argv[2]; char *const *argv = const_cast( launch_info.GetArguments().GetConstArgumentVector()); diff --git a/lldb/source/Host/macosx/objcxx/PosixSpawnResponsible.h b/lldb/source/Host/macosx/objcxx/PosixSpawnResponsible.h new file mode 100644 index 0000000000000..36fe09b5263a9 --- /dev/null +++ b/lldb/source/Host/macosx/objcxx/PosixSpawnResponsible.h @@ -0,0 +1,46 @@ +//===-- PosixSpawnResponsible.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_HOST_POSIXSPAWNRESPONSIBLE_H +#define LLDB_HOST_POSIXSPAWNRESPONSIBLE_H + +#include + +#if __has_include() +#include +#include +#include + +// Older SDKs have responsibility.h but not this particular function. Let's +// include the prototype here. +errno_t responsibility_spawnattrs_setdisclaim(posix_spawnattr_t *attrs, + bool disclaim); + +#endif + +static inline int setup_posix_spawn_responsible_flag(posix_spawnattr_t *attr) { + if (@available(macOS 10.14, *)) { +#if __has_include() + static __typeof__(responsibility_spawnattrs_setdisclaim) + *responsibility_spawnattrs_setdisclaim_ptr; + static dispatch_once_t pred; + dispatch_once(&pred, ^{ + responsibility_spawnattrs_setdisclaim_ptr = +#ifdef __cplusplus + reinterpret_cast<__typeof__(&responsibility_spawnattrs_setdisclaim)> +#endif + (dlsym(RTLD_DEFAULT, "responsibility_spawnattrs_setdisclaim")); + }); + if (responsibility_spawnattrs_setdisclaim_ptr) + return responsibility_spawnattrs_setdisclaim_ptr(attr, true); +#endif + } + return 0; +} + +#endif // LLDB_HOST_POSIXSPAWNRESPONSIBLE_H From 23adbac9ee23c10976e40c80999abf02ecb389b7 Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Tue, 4 Aug 2020 11:26:04 -0500 Subject: [PATCH 333/600] [GlobalISel] Don't transform FSUB(-0, X) -> FNEG(X) in GlobalISel. This patch stops unconditionally transforming FSUB(-0, X) into an FNEG(X) while building the MIR. This corresponds with the SelectionDAGISel change in D84056. Differential Revision: https://reviews.llvm.org/D85139 --- .../llvm/CodeGen/GlobalISel/IRTranslator.h | 5 +++-- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 18 ------------------ .../AArch64/GlobalISel/arm64-irtranslator.ll | 8 ++++---- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 928743a6cbd7d..b4ad3c5a2d483 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -353,8 +353,6 @@ class IRTranslator : public MachineFunctionPass { /// \pre \p U is a return instruction. bool translateRet(const User &U, MachineIRBuilder &MIRBuilder); - bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder); - bool translateFNeg(const User &U, MachineIRBuilder &MIRBuilder); bool translateAdd(const User &U, MachineIRBuilder &MIRBuilder) { @@ -439,6 +437,9 @@ class IRTranslator : public MachineFunctionPass { bool translateFAdd(const User &U, MachineIRBuilder &MIRBuilder) { return translateBinaryOp(TargetOpcode::G_FADD, U, MIRBuilder); } + bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { + return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); + } bool translateFMul(const User &U, MachineIRBuilder &MIRBuilder) { return translateBinaryOp(TargetOpcode::G_FMUL, U, MIRBuilder); } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b14492ce01236..be669eca0f6fa 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -294,24 +294,6 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, return true; } -bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { - // -0.0 - X --> G_FNEG - if (isa(U.getOperand(0)) && - U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) { - Register Op1 = getOrCreateVReg(*U.getOperand(1)); - Register Res = getOrCreateVReg(U); - uint16_t Flags = 0; - if (isa(U)) { - const Instruction &I = cast(U); - Flags = MachineInstr::copyFlagsFromInstruction(I); - } - // Negate the last operand of the FSUB - MIRBuilder.buildFNeg(Res, Op1, Flags); - return true; - } - return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); -} - bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 11ffacae7b866..0d75894a81dca 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -1507,7 +1507,7 @@ define float @test_fneg_f32(float %x) { ; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FNEG [[ARG]] ; CHECK: $s0 = COPY [[RES]](s32) - %neg = fsub float -0.000000e+00, %x + %neg = fneg float %x ret float %neg } @@ -1516,7 +1516,7 @@ define float @test_fneg_f32_fmf(float %x) { ; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FNEG [[ARG]] ; CHECK: $s0 = COPY [[RES]](s32) - %neg = fsub fast float -0.000000e+00, %x + %neg = fneg fast float %x ret float %neg } @@ -1525,7 +1525,7 @@ define double @test_fneg_f64(double %x) { ; CHECK: [[ARG:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FNEG [[ARG]] ; CHECK: $d0 = COPY [[RES]](s64) - %neg = fsub double -0.000000e+00, %x + %neg = fneg double %x ret double %neg } @@ -1534,7 +1534,7 @@ define double @test_fneg_f64_fmf(double %x) { ; CHECK: [[ARG:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK: [[RES:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FNEG [[ARG]] ; CHECK: $d0 = COPY [[RES]](s64) - %neg = fsub fast double -0.000000e+00, %x + %neg = fneg fast double %x ret double %neg } From 0de547ed4ada068ea618bdb8ce58ddc89de9a42f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 4 Aug 2020 09:19:43 -0400 Subject: [PATCH 334/600] AMDGPU/GlobalISel: Ensure subreg is valid when selecting G_UNMERGE_VALUES Fixes verifier error with SGPR unmerges with 96-bit result types. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 5 ++ .../GlobalISel/inst-select-unmerge-values.mir | 71 +++++++++++++++++++ .../GlobalISel/legalize-unmerge-values.mir | 26 +++++++ .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 12 ++-- 4 files changed, 108 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 16fc759f0cbf6..43f5e534411c9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -567,6 +567,11 @@ bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) .addReg(SrcReg, SrcFlags, SubRegs[I]); + // Make sure the subregister index is valid for the source register. + SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]); + if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) + return false; + const TargetRegisterClass *DstRC = TRI.getConstrainedRegClassForOperand(Dst, *MRI); if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir index 1c0644f7bf421..50226991b8c25 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-unmerge-values.mir @@ -266,3 +266,74 @@ body: | $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1 $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2 ... + +--- +name: test_unmerge_s_v3s32_s_v12s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11 + + ; GCN-LABEL: name: test_unmerge_s_v3s32_s_v12s32 + ; GCN: liveins: $sgpr0_sgpr1_sgpr2, $sgpr3_sgpr4_sgpr5, $sgpr6_sgpr7_sgpr8, $sgpr9_sgpr10_sgpr11 + ; GCN: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_96 = COPY $sgpr3_sgpr4_sgpr5 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_96 = COPY $sgpr6_sgpr7_sgpr8 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_96 = COPY $sgpr9_sgpr10_sgpr11 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2, [[COPY1]], %subreg.sub3_sub4_sub5, [[COPY2]], %subreg.sub6_sub7_sub8, [[COPY3]], %subreg.sub9_sub10_sub11 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11 + ; GCN: $sgpr0_sgpr1_sgpr2 = COPY [[COPY4]] + ; GCN: $sgpr3_sgpr4_sgpr5 = COPY [[COPY5]] + ; GCN: $sgpr6_sgpr7_sgpr8 = COPY [[COPY6]] + ; GCN: $sgpr9_sgpr10_sgpr11 = COPY [[COPY7]] + %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(<3 x s32>) = COPY $sgpr3_sgpr4_sgpr5 + %2:sgpr(<3 x s32>) = COPY $sgpr6_sgpr7_sgpr8 + %3:sgpr(<3 x s32>) = COPY $sgpr9_sgpr10_sgpr11 + %4:sgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2, %3 + %5:sgpr(<3 x s32>), %6:sgpr(<3 x s32>), %7:sgpr(<3 x s32>), %8:sgpr(<3 x s32>) = G_UNMERGE_VALUES %4 + $sgpr0_sgpr1_sgpr2 = COPY %5 + $sgpr3_sgpr4_sgpr5 = COPY %6 + $sgpr6_sgpr7_sgpr8 = COPY %7 + $sgpr9_sgpr10_sgpr11 = COPY %8 + +... + +--- +name: test_unmerge_v_v3s32_v_v12s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + + ; GCN-LABEL: name: test_unmerge_v_v3s32_v_v12s32 + ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; GCN: [[COPY:%[0-9]+]]:vreg_192 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; GCN: [[COPY1:%[0-9]+]]:vreg_192 = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5, [[COPY1]], %subreg.sub6_sub7_sub8_sub9_sub10_sub11 + ; GCN: [[COPY2:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub0_sub1_sub2 + ; GCN: [[COPY3:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub3_sub4_sub5 + ; GCN: [[COPY4:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub6_sub7_sub8 + ; GCN: [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]].sub9_sub10_sub11 + ; GCN: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]] + ; GCN: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]] + ; GCN: $vgpr6_vgpr7_vgpr8 = COPY [[COPY4]] + ; GCN: $vgpr9_vgpr10_vgpr11 = COPY [[COPY5]] + %0:vgpr(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:vgpr(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + %2:vgpr(<12 x s32>) = G_CONCAT_VECTORS %0, %1 + %3:vgpr(<3 x s32>), %4:vgpr(<3 x s32>), %5:vgpr(<3 x s32>), %6:vgpr(<3 x s32>) = G_UNMERGE_VALUES %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 + $vgpr3_vgpr4_vgpr5 = COPY %4 + $vgpr6_vgpr7_vgpr8 = COPY %5 + $vgpr9_vgpr10_vgpr11 = COPY %6 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir index 11b9da883008e..c57bb52f1825b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -1125,3 +1125,29 @@ body: | $vgpr1 = COPY %6 ... + +--- +name: test_unmerge_v3s32_v12s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + + ; CHECK-LABEL: name: test_unmerge_v3s32_v12s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) + ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY1]](<6 x s32>) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>) + ; CHECK: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>) + ; CHECK: $vgpr6_vgpr7_vgpr8 = COPY [[UV2]](<3 x s32>) + ; CHECK: $vgpr9_vgpr10_vgpr11 = COPY [[UV3]](<3 x s32>) + %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<6 x s32>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + %2:_(<12 x s32>) = G_CONCAT_VECTORS %0, %1 + %3:_(<3 x s32>), %4:_(<3 x s32>), %5:_(<3 x s32>), %6:_(<3 x s32>) = G_UNMERGE_VALUES %2 + $vgpr0_vgpr1_vgpr2 = COPY %3 + $vgpr3_vgpr4_vgpr5 = COPY %4 + $vgpr6_vgpr7_vgpr8 = COPY %5 + $vgpr9_vgpr10_vgpr11 = COPY %6 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index 805aa301f9383..7ff60e57d9646 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX6 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -simplify-mir -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s ; FIXME: Merge with regbankselect, which mostly overlaps when all types supported. @@ -174,7 +174,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 + ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 ; GFX6: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 ; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 ; GFX6: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 @@ -203,7 +203,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 ; GFX7: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 ; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 ; GFX7: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 @@ -232,7 +232,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4) ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 ; GFX8: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 ; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5 ; GFX8: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8 From 12605bfd1ff5c6316e74587be1b41d24abd893fc Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Wed, 5 Aug 2020 00:09:12 +0800 Subject: [PATCH 335/600] [DWARFYAML] Fix unintialized value Is64BitAddrSize. NFC. This patch fixes the undefined behavior that reported by ubsan. http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/44524/ --- llvm/include/llvm/ObjectYAML/DWARFEmitter.h | 3 ++- llvm/lib/ObjectYAML/DWARFEmitter.cpp | 5 ++++- llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h index c7c3070651504..eb56d1e293266 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h +++ b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h @@ -48,7 +48,8 @@ std::function getDWARFEmitterByName(StringRef SecName); Expected>> emitDebugSections(StringRef YAMLString, - bool IsLittleEndian = sys::IsLittleEndianHost); + bool IsLittleEndian = sys::IsLittleEndianHost, + bool Is64BitAddrSize = true); } // end namespace DWARFYAML } // end namespace llvm diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index deff6a68363b2..1f79e3379b07f 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -945,7 +945,8 @@ emitDebugSectionImpl(const DWARFYAML::Data &DI, StringRef Sec, } Expected>> -DWARFYAML::emitDebugSections(StringRef YAMLString, bool IsLittleEndian) { +DWARFYAML::emitDebugSections(StringRef YAMLString, bool IsLittleEndian, + bool Is64BitAddrSize) { auto CollectDiagnostic = [](const SMDiagnostic &Diag, void *DiagContext) { *static_cast(DiagContext) = Diag; }; @@ -956,6 +957,8 @@ DWARFYAML::emitDebugSections(StringRef YAMLString, bool IsLittleEndian) { DWARFYAML::Data DI; DI.IsLittleEndian = IsLittleEndian; + DI.Is64BitAddrSize = Is64BitAddrSize; + YIn >> DI; if (YIn.error()) return createStringError(YIn.error(), GeneratedDiag.getMessage()); diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp index 1d468a956e2b7..bdf3babe81fb5 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp @@ -65,7 +65,8 @@ TEST(DWARFDie, getLocations) { )"; Expected>> Sections = DWARFYAML::emitDebugSections(StringRef(yamldata), - /*IsLittleEndian=*/true); + /*IsLittleEndian=*/true, + /*Is64BitAddrSize=*/false); ASSERT_THAT_EXPECTED(Sections, Succeeded()); std::unique_ptr Ctx = DWARFContext::create(*Sections, 4, /*isLittleEndian=*/true); From 724b035fe4df89e807f85ee202da8b0bc227895b Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Tue, 4 Aug 2020 11:32:15 -0500 Subject: [PATCH 336/600] [GlobalISel] Remove redundant FNEG tests. These tests were made redundant by D85139. --- .../AArch64/GlobalISel/arm64-irtranslator.ll | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 0d75894a81dca..a896b05512dd7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -704,26 +704,6 @@ define float @test_frem(float %arg1, float %arg2) { ret float %res } -; CHECK-LABEL: name: test_fneg -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $s0 -; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FNEG [[ARG1]] -; CHECK-NEXT: $s0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit $s0 -define float @test_fneg(float %arg1) { - %res = fneg float %arg1 - ret float %res -} - -; CHECK-LABEL: name: test_fneg_fmf -; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY $s0 -; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FNEG [[ARG1]] -; CHECK-NEXT: $s0 = COPY [[RES]] -; CHECK-NEXT: RET_ReallyLR implicit $s0 -define float @test_fneg_fmf(float %arg1) { - %res = fneg fast float %arg1 - ret float %res -} - ; CHECK-LABEL: name: test_sadd_overflow ; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY $w1 From 56593fa370124a4d77703e7ddfa4dfca81e0c8f2 Mon Sep 17 00:00:00 2001 From: Yash Jain Date: Tue, 4 Aug 2020 20:21:13 +0530 Subject: [PATCH 337/600] [MLIR] Simplify semi-affine expressions Simplify semi-affine expression for the operations like ceildiv, floordiv and modulo by any given symbol by checking divisibilty by that symbol. Some properties used in simplification are: 1) Commutative property of the floordiv and ceildiv: ((expr1 floordiv expr2) floordiv expr3 ) = ((expr1 floordiv expr3) floordiv expr2) ((expr1 ceildiv expr2) ceildiv expr3 ) = ((expr1 ceildiv expr3) ceildiv expr2) While simplification if operations are different no simplification is possible as there is no property that simplify expressions like these: ((expr1 ceildiv expr2) floordiv expr3) or ((expr1 floordiv expr2) ceildiv expr3). 2) If both expr1 and expr2 are divisible by the expr3 then: (expr1 % expr2) / expr3 = ((expr1 / expr3) % (expr2 / expr3)) where / is divide symbol. 3) If expr1 is divisible by expr2 then expr1 % expr2 = 0. Signed-off-by: Yash Jain Differential Revision: https://reviews.llvm.org/D84920 --- mlir/lib/IR/AffineExpr.cpp | 169 +++++++++++++++++- .../Affine/simplify-affine-structures.mlir | 46 +++++ 2 files changed, 213 insertions(+), 2 deletions(-) diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index 5ba9737a5245b..0d4d9d08c935e 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -245,6 +245,170 @@ unsigned AffineDimExpr::getPosition() const { return static_cast(expr)->position; } +/// Returns true if the expression is divisible by the given symbol with +/// position `symbolPos`. The argument `opKind` specifies here what kind of +/// division or mod operation called this division. It helps in implementing the +/// commutative property of the floordiv and ceildiv operations. If the argument +///`exprKind` is floordiv and `expr` is also a binary expression of a floordiv +/// operation, then the commutative property can be used otherwise, the floordiv +/// operation is not divisible. The same argument holds for ceildiv operation. +static bool isDivisibleBySymbol(AffineExpr expr, unsigned symbolPos, + AffineExprKind opKind) { + // The argument `opKind` can either be Modulo, Floordiv or Ceildiv only. + assert((opKind == AffineExprKind::Mod || opKind == AffineExprKind::FloorDiv || + opKind == AffineExprKind::CeilDiv) && + "unexpected opKind"); + switch (expr.getKind()) { + case AffineExprKind::Constant: + if (expr.cast().getValue()) + return false; + return true; + case AffineExprKind::DimId: + return false; + case AffineExprKind::SymbolId: + return (expr.cast().getPosition() == symbolPos); + // Checks divisibility by the given symbol for both operands. + case AffineExprKind::Add: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + return isDivisibleBySymbol(binaryExpr.getLHS(), symbolPos, opKind) && + isDivisibleBySymbol(binaryExpr.getRHS(), symbolPos, opKind); + } + // Checks divisibility by the given symbol for both operands. Consider the + // expression `(((s1*s0) floordiv w) mod ((s1 * s2) floordiv p)) floordiv s1`, + // this is a division by s1 and both the operands of modulo are divisible by + // s1 but it is not divisible by s1 always. The third argument is + // `AffineExprKind::Mod` for this reason. + case AffineExprKind::Mod: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + return isDivisibleBySymbol(binaryExpr.getLHS(), symbolPos, + AffineExprKind::Mod) && + isDivisibleBySymbol(binaryExpr.getRHS(), symbolPos, + AffineExprKind::Mod); + } + // Checks if any of the operand divisible by the given symbol. + case AffineExprKind::Mul: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + return isDivisibleBySymbol(binaryExpr.getLHS(), symbolPos, opKind) || + isDivisibleBySymbol(binaryExpr.getRHS(), symbolPos, opKind); + } + // Floordiv and ceildiv are divisible by the given symbol when the first + // operand is divisible, and the affine expression kind of the argument expr + // is same as the argument `opKind`. This can be inferred from commutative + // property of floordiv and ceildiv operations and are as follow: + // (exp1 floordiv exp2) floordiv exp3 = (exp1 floordiv exp3) floordiv exp2 + // (exp1 ceildiv exp2) ceildiv exp3 = (exp1 ceildiv exp3) ceildiv expr2 + // It will fail if operations are not same. For example: + // (exps1 ceildiv exp2) floordiv exp3 can not be simplified. + case AffineExprKind::FloorDiv: + case AffineExprKind::CeilDiv: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + if (opKind != expr.getKind()) + return false; + return isDivisibleBySymbol(binaryExpr.getLHS(), symbolPos, expr.getKind()); + } + } + llvm_unreachable("Unknown AffineExpr"); +} + +/// Divides the given expression by the given symbol at position `symbolPos`. It +/// considers the divisibility condition is checked before calling itself. A +/// null expression is returned whenever the divisibility condition fails. +static AffineExpr symbolicDivide(AffineExpr expr, unsigned symbolPos, + AffineExprKind opKind) { + // THe argument `opKind` can either be Modulo, Floordiv or Ceildiv only. + assert((opKind == AffineExprKind::Mod || opKind == AffineExprKind::FloorDiv || + opKind == AffineExprKind::CeilDiv) && + "unexpected opKind"); + switch (expr.getKind()) { + case AffineExprKind::Constant: + if (expr.cast().getValue() != 0) + return nullptr; + return getAffineConstantExpr(0, expr.getContext()); + case AffineExprKind::DimId: + return nullptr; + case AffineExprKind::SymbolId: + return getAffineConstantExpr(1, expr.getContext()); + // Dividing both operands by the given symbol. + case AffineExprKind::Add: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + return getAffineBinaryOpExpr( + expr.getKind(), symbolicDivide(binaryExpr.getLHS(), symbolPos, opKind), + symbolicDivide(binaryExpr.getRHS(), symbolPos, opKind)); + } + // Dividing both operands by the given symbol. + case AffineExprKind::Mod: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + return getAffineBinaryOpExpr( + expr.getKind(), + symbolicDivide(binaryExpr.getLHS(), symbolPos, expr.getKind()), + symbolicDivide(binaryExpr.getRHS(), symbolPos, expr.getKind())); + } + // Dividing any of the operand by the given symbol. + case AffineExprKind::Mul: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + if (!isDivisibleBySymbol(binaryExpr.getLHS(), symbolPos, opKind)) + return binaryExpr.getLHS() * + symbolicDivide(binaryExpr.getRHS(), symbolPos, opKind); + return symbolicDivide(binaryExpr.getLHS(), symbolPos, opKind) * + binaryExpr.getRHS(); + } + // Dividing first operand only by the given symbol. + case AffineExprKind::FloorDiv: + case AffineExprKind::CeilDiv: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + return getAffineBinaryOpExpr( + expr.getKind(), + symbolicDivide(binaryExpr.getLHS(), symbolPos, expr.getKind()), + binaryExpr.getRHS()); + } + } + llvm_unreachable("Unknown AffineExpr"); +} + +/// Simplify a semi-affine expression by handling modulo, floordiv, or ceildiv +/// operations when the second operand simplifies to a symbol and the first +/// operand is divisible by that symbol. It can be applied to any semi-affine +/// expression. Returned expression can either be a semi-affine or pure affine +/// expression. +static AffineExpr simplifySemiAffine(AffineExpr expr) { + switch (expr.getKind()) { + case AffineExprKind::Constant: + case AffineExprKind::DimId: + case AffineExprKind::SymbolId: + return expr; + case AffineExprKind::Add: + case AffineExprKind::Mul: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + return getAffineBinaryOpExpr(expr.getKind(), + simplifySemiAffine(binaryExpr.getLHS()), + simplifySemiAffine(binaryExpr.getRHS())); + } + // Check if the simplification of the second operand is a symbol, and the + // first operand is divisible by it. If the operation is a modulo, a constant + // zero expression is returned. In the case of floordiv and ceildiv, the + // symbol from the simplification of the second operand divides the first + // operand. Otherwise, simplification is not possible. + case AffineExprKind::FloorDiv: + case AffineExprKind::CeilDiv: + case AffineExprKind::Mod: { + AffineBinaryOpExpr binaryExpr = expr.cast(); + AffineExpr sLHS = simplifySemiAffine(binaryExpr.getLHS()); + AffineExpr sRHS = simplifySemiAffine(binaryExpr.getRHS()); + AffineSymbolExpr symbolExpr = + simplifySemiAffine(binaryExpr.getRHS()).dyn_cast(); + if (!symbolExpr) + return getAffineBinaryOpExpr(expr.getKind(), sLHS, sRHS); + unsigned symbolPos = symbolExpr.getPosition(); + if (!isDivisibleBySymbol(binaryExpr.getLHS(), symbolPos, expr.getKind())) + return getAffineBinaryOpExpr(expr.getKind(), sLHS, sRHS); + if (expr.getKind() == AffineExprKind::Mod) + return getAffineConstantExpr(0, expr.getContext()); + return symbolicDivide(sLHS, symbolPos, expr.getKind()); + } + } + llvm_unreachable("Unknown AffineExpr"); +} + static AffineExpr getAffineDimOrSymbol(AffineExprKind kind, unsigned position, MLIRContext *context) { auto assignCtx = [context](AffineDimExprStorage *storage) { @@ -878,8 +1042,9 @@ int SimpleAffineExprFlattener::findLocalId(AffineExpr localExpr) { /// Simplify the affine expression by flattening it and reconstructing it. AffineExpr mlir::simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols) { - // TODO: only pure affine for now. The simplification here can - // be extended to semi-affine maps in the future. + // Simplify semi-affine expressions separately. + if (!expr.isPureAffine()) + expr = simplifySemiAffine(expr); if (!expr.isPureAffine()) return expr; diff --git a/mlir/test/Dialect/Affine/simplify-affine-structures.mlir b/mlir/test/Dialect/Affine/simplify-affine-structures.mlir index 91f153f1fb214..11fb0b128d63c 100644 --- a/mlir/test/Dialect/Affine/simplify-affine-structures.mlir +++ b/mlir/test/Dialect/Affine/simplify-affine-structures.mlir @@ -281,3 +281,49 @@ func @simplify_zero_dim_map(%in : memref) -> f32 { %out = affine.load %in[] : memref return %out : f32 } + +// ----- + +// Tests the simplification of a semi-affine expression in various cases. +// CHECK-DAG: #[[$map0:.*]] = affine_map<()[s0, s1] -> (-(s1 floordiv s0) + 2)> +// CHECK-DAG: #[[$map1:.*]] = affine_map<()[s0, s1] -> (-(s1 floordiv s0) + 42)> + +// Tests the simplification of a semi-affine expression with a modulo operartion on a floordiv and multiplication. +// CHECK-LABEL: func @semiaffine_mod +func @semiaffine_mod(%arg0: index, %arg1: index) -> index { + %a = affine.apply affine_map<(d0)[s0] ->((-((d0 floordiv s0) * s0) + s0 * s0) mod s0)> (%arg0)[%arg1] + // CHECK: %[[CST:.*]] = constant 0 + return %a : index +} + +// Tests the simplification of a semi-affine expression with a nested floordiv and a floordiv on modulo operation. +// CHECK-LABEL: func @semiaffine_floordiv +func @semiaffine_floordiv(%arg0: index, %arg1: index) -> index { + %a = affine.apply affine_map<(d0)[s0] ->((-((d0 floordiv s0) * s0) + ((2 * s0) mod (3 * s0))) floordiv s0)> (%arg0)[%arg1] + // CHECK: affine.apply #[[$map0]]()[%arg1, %arg0] + return %a : index +} + +// Tests the simplification of a semi-affine expression with a ceildiv operation and a division of constant 0 by a symbol. +// CHECK-LABEL: func @semiaffine_ceildiv +func @semiaffine_ceildiv(%arg0: index, %arg1: index) -> index { + %a = affine.apply affine_map<(d0)[s0] ->((-((d0 floordiv s0) * s0) + s0 * 42 + ((5-5) floordiv s0)) ceildiv s0)> (%arg0)[%arg1] + // CHECK: affine.apply #[[$map1]]()[%arg1, %arg0] + return %a : index +} + +// Tests the simplification of a semi-affine expression with a nested ceildiv operation and further simplifications after performing ceildiv. +// CHECK-LABEL: func @semiaffine_composite_floor +func @semiaffine_composite_floor(%arg0: index, %arg1: index) -> index { + %a = affine.apply affine_map<(d0)[s0] ->(((((s0 * 2) ceildiv 4) * 5) + s0 * 42) ceildiv s0)> (%arg0)[%arg1] + // CHECK: %[[CST:.*]] = constant 47 + return %a : index +} + +// Tests the simplification of a semi-affine expression with a modulo operation with a second operand that simplifies to symbol. +// CHECK-LABEL: func @semiaffine_unsimplified_symbol +func @semiaffine_unsimplified_symbol(%arg0: index, %arg1: index) -> index { + %a = affine.apply affine_map<(d0)[s0] ->(s0 mod (2 * s0 - s0))> (%arg0)[%arg1] + // CHECK: %[[CST:.*]] = constant 0 + return %a : index +} From 83cb98f9e7a57360e137b32b26500fca630df617 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Aug 2020 17:24:27 +0100 Subject: [PATCH 338/600] Fix sphinx indentation warnings by adding explicit line breaks to address space hierarchy --- clang/include/clang/Basic/AttrDocs.td | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 76a075a97ee16..83990721d7f74 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3134,11 +3134,12 @@ distinguish USM (Unified Shared Memory) pointers that access global device memory from those that access global host memory. These new address spaces are a subset of the ``__global/opencl_global`` address space, the full address space set model for OpenCL 2.0 with the extension looks as follows: - generic->global->host - ->device - ->private - ->local - constant + + | generic->global->host + | ->device + | ->private + | ->local + | constant As ``global_device`` and ``global_host`` are a subset of ``__global/opencl_global`` address spaces it is allowed to convert From 6a4fd03698e1aab09c47215b7eace942a23de074 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 4 Aug 2020 13:04:53 -0400 Subject: [PATCH 339/600] [gn build] (manually) merge 593e1962 --- .../gn/secondary/llvm/tools/llvm-symbolizer/BUILD.gn | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-symbolizer/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-symbolizer/BUILD.gn index ad7280b9e1fdb..3516897ef4640 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-symbolizer/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-symbolizer/BUILD.gn @@ -1,6 +1,12 @@ import("//llvm/tools/binutils_symlinks.gni") +import("//llvm/utils/TableGen/tablegen.gni") import("//llvm/utils/gn/build/symlink_or_copy.gni") +tablegen("Opts") { + visibility = [ ":llvm-symbolizer" ] + args = [ "-gen-opt-parser-defs" ] +} + symlinks = [ "llvm-addr2line" ] if (llvm_install_binutils_symlinks) { symlinks += [ "addr2line" ] @@ -23,11 +29,13 @@ group("symlinks") { executable("llvm-symbolizer") { deps = [ + ":Opts", "//llvm/lib/DebugInfo/DWARF", "//llvm/lib/DebugInfo/PDB", "//llvm/lib/DebugInfo/Symbolize", "//llvm/lib/Demangle", "//llvm/lib/Object", + "//llvm/lib/Option", "//llvm/lib/Support", ] sources = [ "llvm-symbolizer.cpp" ] From e4441fc653912ec4efc5611a8dd926f8bdd0306c Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 4 Aug 2020 10:24:59 -0700 Subject: [PATCH 340/600] sanitizer_symbolizer_libcdep.cpp: Change --inlining=true to --inlines and --inlining=false to --no-inlines --- compiler-rt/lib/asan/scripts/asan_symbolize.py | 4 ++-- .../lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py index d99e3441e9255..a2e38238a97c6 100755 --- a/compiler-rt/lib/asan/scripts/asan_symbolize.py +++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py @@ -17,7 +17,7 @@ where it is necessary to handle site-specific quirks (e.g. binaries with debug symbols only accessible via a remote service) without having to modify the script itself. - + """ import argparse import bisect @@ -92,7 +92,7 @@ def open_llvm_symbolizer(self): '--use-symbol-table=true', '--demangle=%s' % demangle, '--functions=linkage', - '--inlining=true', + '--inlines', '--default-arch=%s' % self.default_arch] if self.system == 'Darwin': for hint in self.dsym_hints: diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp index 490c6fe89beb5..77522a20ae9b6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp @@ -275,8 +275,8 @@ class LLVMSymbolizerProcess : public SymbolizerProcess { #endif const char *const inline_flag = common_flags()->symbolize_inline_frames - ? "--inlining=true" - : "--inlining=false"; + ? "--inlines" + : "--no-inlines"; int i = 0; argv[i++] = path_to_binary; argv[i++] = inline_flag; From e31cfc4cd3e393300002e9c519787c96e3b67bab Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 4 Aug 2020 10:41:27 -0700 Subject: [PATCH 341/600] Fix -Wconstant-conversion warning with explicit cast Introduced by fd6584a22043b254a323635c142b28ce80ae5b5b Following similar use of casts in AsmParser.cpp, for instance - ideally this type would use unsigned chars as they're more representative of raw data and don't get confused around implementation defined choices of char's signedness, but this is what it is & the signed/unsigned conversions are (so far as I understand) safe/bit preserving in this usage and what's intended, given the API design here. --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 30666009801c5..83653dcbb8cf7 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -442,7 +442,7 @@ MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP( // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG) SmallString<64> Expr; - Expr.push_back(dwarf::DW_OP_breg0 + /*SP*/ 31); + Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + /*SP*/ 31)); Expr.push_back(0); appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, TRI.getDwarfRegNum(AArch64::VG, true), Comment); From b9266f81bc0a2b53406a84e6ef0c4989d5f7e296 Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Tue, 4 Aug 2020 20:30:40 +0300 Subject: [PATCH 342/600] [MLIR][SPIRVToLLVM] Indentation and style fix in tests Second patch with test fixes. Redundant `%{{.*}} = ` removed, label checks added, tabs converted to spaces and some namings are changed to match the convention. Fixed tests: - constant-op-to-llvm - func-ops-to-llvm (renamed) - memory-ops-to-llvm - misc-ops-to-llvm - module-ops-to-llvm - shift-ops-to-llvm (renamed) - spirv-types-to-llvm-invalid (renamed) Reviewed By: ftynse, rriddle Differential Revision: https://reviews.llvm.org/D85206 --- .../SPIRVToLLVM/constant-op-to-llvm.mlir | 32 +++-- ...unc-to-llvm.mlir => func-ops-to-llvm.mlir} | 44 ++++--- .../SPIRVToLLVM/memory-ops-to-llvm.mlir | 33 +++-- .../SPIRVToLLVM/misc-ops-to-llvm.mlir | 28 ++-- .../SPIRVToLLVM/shift-ops-to-llvm.mlir | 121 ++++++++++++++++++ .../SPIRVToLLVM/shifts-to-llvm.mlir | 115 ----------------- ....mlir => spirv-types-to-llvm-invalid.mlir} | 0 7 files changed, 203 insertions(+), 170 deletions(-) rename mlir/test/Conversion/SPIRVToLLVM/{func-to-llvm.mlir => func-ops-to-llvm.mlir} (71%) create mode 100644 mlir/test/Conversion/SPIRVToLLVM/shift-ops-to-llvm.mlir delete mode 100644 mlir/test/Conversion/SPIRVToLLVM/shifts-to-llvm.mlir rename mlir/test/Conversion/SPIRVToLLVM/{spirv-types-to-llvm.invalid.mlir => spirv-types-to-llvm-invalid.mlir} (100%) diff --git a/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir index dc84f404906c7..90fb3afdde798 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/constant-op-to-llvm.mlir @@ -4,52 +4,58 @@ // spv.constant //===----------------------------------------------------------------------===// +// CHECK-LABEL: @bool_constant_scalar func @bool_constant_scalar() { - // CHECK: {{.*}} = llvm.mlir.constant(true) : !llvm.i1 + // CHECK: llvm.mlir.constant(true) : !llvm.i1 %0 = spv.constant true - // CHECK: {{.*}} = llvm.mlir.constant(false) : !llvm.i1 + // CHECK: llvm.mlir.constant(false) : !llvm.i1 %1 = spv.constant false return } +// CHECK-LABEL: @bool_constant_vector func @bool_constant_vector() { - // CHECK: {{.*}} = llvm.mlir.constant(dense<[true, false]> : vector<2xi1>) : !llvm.vec<2 x i1> + // CHECK: llvm.mlir.constant(dense<[true, false]> : vector<2xi1>) : !llvm.vec<2 x i1> %0 = constant dense<[true, false]> : vector<2xi1> - // CHECK: {{.*}} = llvm.mlir.constant(dense : vector<3xi1>) : !llvm.vec<3 x i1> + // CHECK: llvm.mlir.constant(dense : vector<3xi1>) : !llvm.vec<3 x i1> %1 = constant dense : vector<3xi1> return } +// CHECK-LABEL: @integer_constant_scalar func @integer_constant_scalar() { - // CHECK: {{.*}} = llvm.mlir.constant(0 : i8) : !llvm.i8 + // CHECK: llvm.mlir.constant(0 : i8) : !llvm.i8 %0 = spv.constant 0 : i8 - // CHECK: {{.*}} = llvm.mlir.constant(-5 : i64) : !llvm.i64 + // CHECK: llvm.mlir.constant(-5 : i64) : !llvm.i64 %1 = spv.constant -5 : si64 - // CHECK: {{.*}} = llvm.mlir.constant(10 : i16) : !llvm.i16 + // CHECK: llvm.mlir.constant(10 : i16) : !llvm.i16 %2 = spv.constant 10 : ui16 return } +// CHECK-LABEL: @integer_constant_vector func @integer_constant_vector() { - // CHECK: {{.*}} = llvm.mlir.constant(dense<[2, 3]> : vector<2xi32>) : !llvm.vec<2 x i32> + // CHECK: llvm.mlir.constant(dense<[2, 3]> : vector<2xi32>) : !llvm.vec<2 x i32> %0 = spv.constant dense<[2, 3]> : vector<2xi32> - // CHECK: {{.*}} = llvm.mlir.constant(dense<-4> : vector<2xi32>) : !llvm.vec<2 x i32> + // CHECK: llvm.mlir.constant(dense<-4> : vector<2xi32>) : !llvm.vec<2 x i32> %1 = spv.constant dense<-4> : vector<2xsi32> - // CHECK: {{.*}} = llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : !llvm.vec<3 x i32> + // CHECK: llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : !llvm.vec<3 x i32> %2 = spv.constant dense<[2, 3, 4]> : vector<3xui32> return } +// CHECK-LABEL: @float_constant_scalar func @float_constant_scalar() { - // CHECK: {{.*}} = llvm.mlir.constant(5.000000e+00 : f16) : !llvm.half + // CHECK: llvm.mlir.constant(5.000000e+00 : f16) : !llvm.half %0 = spv.constant 5.000000e+00 : f16 - // CHECK: {{.*}} = llvm.mlir.constant(5.000000e+00 : f64) : !llvm.double + // CHECK: llvm.mlir.constant(5.000000e+00 : f64) : !llvm.double %1 = spv.constant 5.000000e+00 : f64 return } +// CHECK-LABEL: @float_constant_vector func @float_constant_vector() { - // CHECK: {{.*}} = llvm.mlir.constant(dense<[2.000000e+00, 3.000000e+00]> : vector<2xf32>) : !llvm.vec<2 x float> + // CHECK: llvm.mlir.constant(dense<[2.000000e+00, 3.000000e+00]> : vector<2xf32>) : !llvm.vec<2 x float> %0 = spv.constant dense<[2.000000e+00, 3.000000e+00]> : vector<2xf32> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/func-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/func-ops-to-llvm.mlir similarity index 71% rename from mlir/test/Conversion/SPIRVToLLVM/func-to-llvm.mlir rename to mlir/test/Conversion/SPIRVToLLVM/func-ops-to-llvm.mlir index 7e7820dee6bef..21d3ce43b556a 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/func-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/func-ops-to-llvm.mlir @@ -4,18 +4,20 @@ // spv.Return //===----------------------------------------------------------------------===// +// CHECK-LABEL: @return func @return() { - // CHECK: llvm.return - spv.Return + // CHECK: llvm.return + spv.Return } //===----------------------------------------------------------------------===// // spv.ReturnValue //===----------------------------------------------------------------------===// +// CHECK-LABEL: @return_value func @return_value(%arg: i32) { - // CHECK: llvm.return %{{.*}} : !llvm.i32 - spv.ReturnValue %arg : i32 + // CHECK: llvm.return %{{.*}} : !llvm.i32 + spv.ReturnValue %arg : i32 } //===----------------------------------------------------------------------===// @@ -24,38 +26,38 @@ func @return_value(%arg: i32) { // CHECK-LABEL: llvm.func @none() spv.func @none() -> () "None" { - spv.Return + spv.Return } // CHECK-LABEL: llvm.func @inline() attributes {passthrough = ["alwaysinline"]} spv.func @inline() -> () "Inline" { - spv.Return + spv.Return } // CHECK-LABEL: llvm.func @dont_inline() attributes {passthrough = ["noinline"]} spv.func @dont_inline() -> () "DontInline" { - spv.Return + spv.Return } // CHECK-LABEL: llvm.func @pure() attributes {passthrough = ["readonly"]} spv.func @pure() -> () "Pure" { - spv.Return + spv.Return } // CHECK-LABEL: llvm.func @const() attributes {passthrough = ["readnone"]} spv.func @const() -> () "Const" { - spv.Return + spv.Return } // CHECK-LABEL: llvm.func @scalar_types(%arg0: !llvm.i32, %arg1: !llvm.i1, %arg2: !llvm.double, %arg3: !llvm.float) spv.func @scalar_types(%arg0: i32, %arg1: i1, %arg2: f64, %arg3: f32) -> () "None" { - spv.Return + spv.Return } // CHECK-LABEL: llvm.func @vector_types(%arg0: !llvm.vec<2 x i64>, %arg1: !llvm.vec<2 x i64>) -> !llvm.vec<2 x i64> spv.func @vector_types(%arg0: vector<2xi64>, %arg1: vector<2xi64>) -> vector<2xi64> "None" { - %0 = spv.IAdd %arg0, %arg1 : vector<2xi64> - spv.ReturnValue %0 : vector<2xi64> + %0 = spv.IAdd %arg0, %arg1 : vector<2xi64> + spv.ReturnValue %0 : vector<2xi64> } //===----------------------------------------------------------------------===// @@ -65,15 +67,15 @@ spv.func @vector_types(%arg0: vector<2xi64>, %arg1: vector<2xi64>) -> vector<2xi // CHECK-LABEL: llvm.func @function_calls // CHECK-SAME: %[[ARG0:.*]]: !llvm.i32, %[[ARG1:.*]]: !llvm.i1, %[[ARG2:.*]]: !llvm.double, %[[ARG3:.*]]: !llvm.vec<2 x i64>, %[[ARG4:.*]]: !llvm.vec<2 x float> spv.func @function_calls(%arg0: i32, %arg1: i1, %arg2: f64, %arg3: vector<2xi64>, %arg4: vector<2xf32>) -> () "None" { - // CHECK: llvm.call @void_1() : () -> () - spv.FunctionCall @void_1() : () -> () - // CHECK: llvm.call @void_2(%[[ARG3]]) : (!llvm.vec<2 x i64>) -> () - spv.FunctionCall @void_2(%arg3) : (vector<2xi64>) -> () - // CHECK: %{{.*}} = llvm.call @value_scalar(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!llvm.i32, !llvm.i1, !llvm.double) -> !llvm.i32 - %0 = spv.FunctionCall @value_scalar(%arg0, %arg1, %arg2) : (i32, i1, f64) -> i32 - // CHECK: %{{.*}} = llvm.call @value_vector(%[[ARG3]], %[[ARG4]]) : (!llvm.vec<2 x i64>, !llvm.vec<2 x float>) -> !llvm.vec<2 x float> - %1 = spv.FunctionCall @value_vector(%arg3, %arg4) : (vector<2xi64>, vector<2xf32>) -> vector<2xf32> - spv.Return + // CHECK: llvm.call @void_1() : () -> () + // CHECK: llvm.call @void_2(%[[ARG3]]) : (!llvm.vec<2 x i64>) -> () + // CHECK: llvm.call @value_scalar(%[[ARG0]], %[[ARG1]], %[[ARG2]]) : (!llvm.i32, !llvm.i1, !llvm.double) -> !llvm.i32 + // CHECK: llvm.call @value_vector(%[[ARG3]], %[[ARG4]]) : (!llvm.vec<2 x i64>, !llvm.vec<2 x float>) -> !llvm.vec<2 x float> + spv.FunctionCall @void_1() : () -> () + spv.FunctionCall @void_2(%arg3) : (vector<2xi64>) -> () + %0 = spv.FunctionCall @value_scalar(%arg0, %arg1, %arg2) : (i32, i1, f64) -> i32 + %1 = spv.FunctionCall @value_vector(%arg3, %arg4) : (vector<2xi64>, vector<2xf32>) -> vector<2xf32> + spv.Return } spv.func @void_1() -> () "None" { diff --git a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir index 6dafab982e35b..a565d396e6702 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir @@ -4,37 +4,42 @@ // spv.Load //===----------------------------------------------------------------------===// +// CHECK-LABEL: @load func @load() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load %{{.*}} : !llvm.ptr + // CHECK: llvm.load %{{.*}} : !llvm.ptr %1 = spv.Load "Function" %0 : f32 return } +// CHECK-LABEL: @load_none func @load_none() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load %{{.*}} : !llvm.ptr + // CHECK: llvm.load %{{.*}} : !llvm.ptr %1 = spv.Load "Function" %0 ["None"] : f32 return } +// CHECK-LABEL: @load_with_alignment func @load_with_alignment() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load %{{.*}} {alignment = 4 : i64} : !llvm.ptr + // CHECK: llvm.load %{{.*}} {alignment = 4 : i64} : !llvm.ptr %1 = spv.Load "Function" %0 ["Aligned", 4] : f32 return } +// CHECK-LABEL: @load_volatile func @load_volatile() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load volatile %{{.*}} : !llvm.ptr + // CHECK: llvm.load volatile %{{.*}} : !llvm.ptr %1 = spv.Load "Function" %0 ["Volatile"] : f32 return } +// CHECK-LABEL: @load_nontemporal func @load_nontemporal() { %0 = spv.Variable : !spv.ptr - // CHECK: %{{.*}} = llvm.load %{{.*}} {nontemporal} : !llvm.ptr + // CHECK: llvm.load %{{.*}} {nontemporal} : !llvm.ptr %1 = spv.Load "Function" %0 ["Nontemporal"] : f32 return } @@ -43,6 +48,7 @@ func @load_nontemporal() { // spv.Store //===----------------------------------------------------------------------===// +// CHECK-LABEL: @store func @store(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr // CHECK: llvm.store %{{.*}}, %{{.*}} : !llvm.ptr @@ -50,6 +56,7 @@ func @store(%arg0 : f32) -> () { return } +// CHECK-LABEL: @store_composite func @store_composite(%arg0 : !spv.struct) -> () { %0 = spv.Variable : !spv.ptr, Function> // CHECK: llvm.store %{{.*}}, %{{.*}} : !llvm.ptr> @@ -57,6 +64,7 @@ func @store_composite(%arg0 : !spv.struct) -> () { return } +// CHECK-LABEL: @store_with_alignment func @store_with_alignment(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr // CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 4 : i64} : !llvm.ptr @@ -64,6 +72,7 @@ func @store_with_alignment(%arg0 : f32) -> () { return } +// CHECK-LABEL: @store_volatile func @store_volatile(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr // CHECK: llvm.store volatile %{{.*}}, %{{.*}} : !llvm.ptr @@ -71,6 +80,7 @@ func @store_volatile(%arg0 : f32) -> () { return } +// CHECK-LABEL: @store_nontemporal func @store_nontemporal(%arg0 : f32) -> () { %0 = spv.Variable : !spv.ptr // CHECK: llvm.store %{{.*}}, %{{.*}} {nontemporal} : !llvm.ptr @@ -82,16 +92,18 @@ func @store_nontemporal(%arg0 : f32) -> () { // spv.Variable //===----------------------------------------------------------------------===// +// CHECK-LABEL: @variable_scalar func @variable_scalar() { // CHECK: %[[SIZE1:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %{{.*}} = llvm.alloca %[[SIZE1]] x !llvm.float : (!llvm.i32) -> !llvm.ptr + // CHECK: llvm.alloca %[[SIZE1]] x !llvm.float : (!llvm.i32) -> !llvm.ptr %0 = spv.Variable : !spv.ptr // CHECK: %[[SIZE2:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %{{.*}} = llvm.alloca %[[SIZE2]] x !llvm.i8 : (!llvm.i32) -> !llvm.ptr + // CHECK: llvm.alloca %[[SIZE2]] x !llvm.i8 : (!llvm.i32) -> !llvm.ptr %1 = spv.Variable : !spv.ptr return } +// CHECK-LABEL: @variable_scalar_with_initialization func @variable_scalar_with_initialization() { // CHECK: %[[VALUE:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64 // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 @@ -102,13 +114,15 @@ func @variable_scalar_with_initialization() { return } +// CHECK-LABEL: @variable_vector func @variable_vector() { // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %{{.*}} = llvm.alloca %[[SIZE]] x !llvm.vec<3 x float> : (!llvm.i32) -> !llvm.ptr> + // CHECK: llvm.alloca %[[SIZE]] x !llvm.vec<3 x float> : (!llvm.i32) -> !llvm.ptr> %0 = spv.Variable : !spv.ptr, Function> return } +// CHECK-LABEL: @variable_vector_with_initialization func @variable_vector_with_initialization() { // CHECK: %[[VALUE:.*]] = llvm.mlir.constant(dense : vector<3xi1>) : !llvm.vec<3 x i1> // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 @@ -119,9 +133,10 @@ func @variable_vector_with_initialization() { return } +// CHECK-LABEL: @variable_array func @variable_array() { // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 - // CHECK: %{{.*}} = llvm.alloca %[[SIZE]] x !llvm.array<10 x i32> : (!llvm.i32) -> !llvm.ptr> + // CHECK: llvm.alloca %[[SIZE]] x !llvm.array<10 x i32> : (!llvm.i32) -> !llvm.ptr> %0 = spv.Variable : !spv.ptr, Function> return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir index 60fe79c0007e8..2e74485323ede 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir @@ -4,17 +4,19 @@ // spv.Select //===----------------------------------------------------------------------===// +// CHECK-LABEL: @select_scalar func @select_scalar(%arg0: i1, %arg1: vector<3xi32>, %arg2: f32) { - // CHECK: %{{.*}} = llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i1, !llvm.vec<3 x i32> - %0 = spv.Select %arg0, %arg1, %arg1 : i1, vector<3xi32> - // CHECK: %{{.*}} = llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i1, !llvm.float - %1 = spv.Select %arg0, %arg2, %arg2 : i1, f32 + // CHECK: llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i1, !llvm.vec<3 x i32> + %0 = spv.Select %arg0, %arg1, %arg1 : i1, vector<3xi32> + // CHECK: llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i1, !llvm.float + %1 = spv.Select %arg0, %arg2, %arg2 : i1, f32 return } +// CHECK-LABEL: @select_vector func @select_vector(%arg0: vector<2xi1>, %arg1: vector<2xi32>) { - // CHECK: %{{.*}} = llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.vec<2 x i1>, !llvm.vec<2 x i32> - %0 = spv.Select %arg0, %arg1, %arg1 : vector<2xi1>, vector<2xi32> + // CHECK: llvm.select %{{.*}}, %{{.*}}, %{{.*}} : !llvm.vec<2 x i1>, !llvm.vec<2 x i32> + %0 = spv.Select %arg0, %arg1, %arg1 : vector<2xi1>, vector<2xi32> return } @@ -22,14 +24,16 @@ func @select_vector(%arg0: vector<2xi1>, %arg1: vector<2xi32>) { // spv.Undef //===----------------------------------------------------------------------===// +// CHECK-LABEL: @undef_scalar func @undef_scalar() { - // CHECK: %{{.*}} = llvm.mlir.undef : !llvm.float - %0 = spv.undef : f32 - return + // CHECK: llvm.mlir.undef : !llvm.float + %0 = spv.undef : f32 + return } +// CHECK-LABEL: @undef_vector func @undef_vector() { - // CHECK: %{{.*}} = llvm.mlir.undef : !llvm.vec<2 x i32> - %0 = spv.undef : vector<2xi32> - return + // CHECK: llvm.mlir.undef : !llvm.vec<2 x i32> + %0 = spv.undef : vector<2xi32> + return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/shift-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/shift-ops-to-llvm.mlir new file mode 100644 index 0000000000000..52df231e718e4 --- /dev/null +++ b/mlir/test/Conversion/SPIRVToLLVM/shift-ops-to-llvm.mlir @@ -0,0 +1,121 @@ +// RUN: mlir-opt -convert-spirv-to-llvm %s | FileCheck %s + +//===----------------------------------------------------------------------===// +// spv.ShiftRightArithmetic +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @shift_right_arithmetic_scalar +func @shift_right_arithmetic_scalar(%arg0: i32, %arg1: si32, %arg2 : i16, %arg3 : ui16) { + // CHECK: llvm.ashr %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.ShiftRightArithmetic %arg0, %arg0 : i32, i32 + + // CHECK: llvm.ashr %{{.*}}, %{{.*}} : !llvm.i32 + %1 = spv.ShiftRightArithmetic %arg0, %arg1 : i32, si32 + + // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : !llvm.i16 to !llvm.i32 + // CHECK: llvm.ashr %{{.*}}, %[[SEXT]] : !llvm.i32 + %2 = spv.ShiftRightArithmetic %arg0, %arg2 : i32, i16 + + // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : !llvm.i16 to !llvm.i32 + // CHECK: llvm.ashr %{{.*}}, %[[ZEXT]] : !llvm.i32 + %3 = spv.ShiftRightArithmetic %arg0, %arg3 : i32, ui16 + return +} + +// CHECK-LABEL: @shift_right_arithmetic_vector +func @shift_right_arithmetic_vector(%arg0: vector<4xi64>, %arg1: vector<4xui64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) { + // CHECK: llvm.ashr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + %0 = spv.ShiftRightArithmetic %arg0, %arg0 : vector<4xi64>, vector<4xi64> + + // CHECK: llvm.ashr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + %1 = spv.ShiftRightArithmetic %arg0, %arg1 : vector<4xi64>, vector<4xui64> + + // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: llvm.ashr %{{.*}}, %[[SEXT]] : !llvm.vec<4 x i64> + %2 = spv.ShiftRightArithmetic %arg0, %arg2 : vector<4xi64>, vector<4xi32> + + // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: llvm.ashr %{{.*}}, %[[ZEXT]] : !llvm.vec<4 x i64> + %3 = spv.ShiftRightArithmetic %arg0, %arg3 : vector<4xi64>, vector<4xui32> + return +} + +//===----------------------------------------------------------------------===// +// spv.ShiftRightLogical +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @shift_right_logical_scalar +func @shift_right_logical_scalar(%arg0: i32, %arg1: si32, %arg2 : si16, %arg3 : ui16) { + // CHECK: llvm.lshr %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.ShiftRightLogical %arg0, %arg0 : i32, i32 + + // CHECK: llvm.lshr %{{.*}}, %{{.*}} : !llvm.i32 + %1 = spv.ShiftRightLogical %arg0, %arg1 : i32, si32 + + // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : !llvm.i16 to !llvm.i32 + // CHECK: llvm.lshr %{{.*}}, %[[SEXT]] : !llvm.i32 + %2 = spv.ShiftRightLogical %arg0, %arg2 : i32, si16 + + // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : !llvm.i16 to !llvm.i32 + // CHECK: llvm.lshr %{{.*}}, %[[ZEXT]] : !llvm.i32 + %3 = spv.ShiftRightLogical %arg0, %arg3 : i32, ui16 + return +} + +// CHECK-LABEL: @shift_right_logical_vector +func @shift_right_logical_vector(%arg0: vector<4xi64>, %arg1: vector<4xsi64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) { + // CHECK: llvm.lshr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + %0 = spv.ShiftRightLogical %arg0, %arg0 : vector<4xi64>, vector<4xi64> + + // CHECK: llvm.lshr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + %1 = spv.ShiftRightLogical %arg0, %arg1 : vector<4xi64>, vector<4xsi64> + + // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: llvm.lshr %{{.*}}, %[[SEXT]] : !llvm.vec<4 x i64> + %2 = spv.ShiftRightLogical %arg0, %arg2 : vector<4xi64>, vector<4xi32> + + // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: llvm.lshr %{{.*}}, %[[ZEXT]] : !llvm.vec<4 x i64> + %3 = spv.ShiftRightLogical %arg0, %arg3 : vector<4xi64>, vector<4xui32> + return +} + +//===----------------------------------------------------------------------===// +// spv.ShiftLeftLogical +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @shift_left_logical_scalar +func @shift_left_logical_scalar(%arg0: i32, %arg1: si32, %arg2 : i16, %arg3 : ui16) { + // CHECK: llvm.shl %{{.*}}, %{{.*}} : !llvm.i32 + %0 = spv.ShiftLeftLogical %arg0, %arg0 : i32, i32 + + // CHECK: llvm.shl %{{.*}}, %{{.*}} : !llvm.i32 + %1 = spv.ShiftLeftLogical %arg0, %arg1 : i32, si32 + + // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : !llvm.i16 to !llvm.i32 + // CHECK: llvm.shl %{{.*}}, %[[SEXT]] : !llvm.i32 + %2 = spv.ShiftLeftLogical %arg0, %arg2 : i32, i16 + + // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : !llvm.i16 to !llvm.i32 + // CHECK: llvm.shl %{{.*}}, %[[ZEXT]] : !llvm.i32 + %3 = spv.ShiftLeftLogical %arg0, %arg3 : i32, ui16 + return +} + +// CHECK-LABEL: @shift_left_logical_vector +func @shift_left_logical_vector(%arg0: vector<4xi64>, %arg1: vector<4xsi64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) { + // CHECK: llvm.shl %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + %0 = spv.ShiftLeftLogical %arg0, %arg0 : vector<4xi64>, vector<4xi64> + + // CHECK: llvm.shl %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> + %1 = spv.ShiftLeftLogical %arg0, %arg1 : vector<4xi64>, vector<4xsi64> + + // CHECK: %[[SEXT:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: llvm.shl %{{.*}}, %[[SEXT]] : !llvm.vec<4 x i64> + %2 = spv.ShiftLeftLogical %arg0, %arg2 : vector<4xi64>, vector<4xi32> + + // CHECK: %[[ZEXT:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> + // CHECK: llvm.shl %{{.*}}, %[[ZEXT]] : !llvm.vec<4 x i64> + %3 = spv.ShiftLeftLogical %arg0, %arg3 : vector<4xi64>, vector<4xui32> + return +} diff --git a/mlir/test/Conversion/SPIRVToLLVM/shifts-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/shifts-to-llvm.mlir deleted file mode 100644 index 41431ea3b3b20..0000000000000 --- a/mlir/test/Conversion/SPIRVToLLVM/shifts-to-llvm.mlir +++ /dev/null @@ -1,115 +0,0 @@ -// RUN: mlir-opt -convert-spirv-to-llvm %s | FileCheck %s - -//===----------------------------------------------------------------------===// -// spv.ShiftRightArithmetic -//===----------------------------------------------------------------------===// - -func @shift_right_arithmetic_scalar(%arg0: i32, %arg1: si32, %arg2 : i16, %arg3 : ui16) { - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.ShiftRightArithmetic %arg0, %arg0 : i32, i32 - - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %{{.*}} : !llvm.i32 - %1 = spv.ShiftRightArithmetic %arg0, %arg1 : i32, si32 - - // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm.i16 to !llvm.i32 - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %[[EXT1:.*]]: !llvm.i32 - %2 = spv.ShiftRightArithmetic %arg0, %arg2 : i32, i16 - - // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm.i16 to !llvm.i32 - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %[[EXT2:.*]]: !llvm.i32 - %3 = spv.ShiftRightArithmetic %arg0, %arg3 : i32, ui16 - return -} - -func @shift_right_arithmetic_vector(%arg0: vector<4xi64>, %arg1: vector<4xui64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) { - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> - %0 = spv.ShiftRightArithmetic %arg0, %arg0 : vector<4xi64>, vector<4xi64> - - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> - %1 = spv.ShiftRightArithmetic %arg0, %arg1 : vector<4xi64>, vector<4xui64> - - // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %[[EXT1:.*]]: !llvm.vec<4 x i64> - %2 = spv.ShiftRightArithmetic %arg0, %arg2 : vector<4xi64>, vector<4xi32> - - // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: %{{.*}} = llvm.ashr %{{.*}}, %[[EXT2:.*]]: !llvm.vec<4 x i64> - %3 = spv.ShiftRightArithmetic %arg0, %arg3 : vector<4xi64>, vector<4xui32> - return -} - -//===----------------------------------------------------------------------===// -// spv.ShiftRightLogical -//===----------------------------------------------------------------------===// - -func @shift_right_logical_scalar(%arg0: i32, %arg1: si32, %arg2 : si16, %arg3 : ui16) { - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.ShiftRightLogical %arg0, %arg0 : i32, i32 - - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %{{.*}} : !llvm.i32 - %1 = spv.ShiftRightLogical %arg0, %arg1 : i32, si32 - - // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm.i16 to !llvm.i32 - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %[[EXT1:.*]]: !llvm.i32 - %2 = spv.ShiftRightLogical %arg0, %arg2 : i32, si16 - - // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm.i16 to !llvm.i32 - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %[[EXT2:.*]]: !llvm.i32 - %3 = spv.ShiftRightLogical %arg0, %arg3 : i32, ui16 - return -} - -func @shift_right_logical_vector(%arg0: vector<4xi64>, %arg1: vector<4xsi64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) { - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> - %0 = spv.ShiftRightLogical %arg0, %arg0 : vector<4xi64>, vector<4xi64> - - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> - %1 = spv.ShiftRightLogical %arg0, %arg1 : vector<4xi64>, vector<4xsi64> - - // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %[[EXT1:.*]]: !llvm.vec<4 x i64> - %2 = spv.ShiftRightLogical %arg0, %arg2 : vector<4xi64>, vector<4xi32> - - // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: %{{.*}} = llvm.lshr %{{.*}}, %[[EXT2:.*]]: !llvm.vec<4 x i64> - %3 = spv.ShiftRightLogical %arg0, %arg3 : vector<4xi64>, vector<4xui32> - return -} - -//===----------------------------------------------------------------------===// -// spv.ShiftLeftLogical -//===----------------------------------------------------------------------===// - -func @shift_left_logical_scalar(%arg0: i32, %arg1: si32, %arg2 : i16, %arg3 : ui16) { - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %{{.*}} : !llvm.i32 - %0 = spv.ShiftLeftLogical %arg0, %arg0 : i32, i32 - - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %{{.*}} : !llvm.i32 - %1 = spv.ShiftLeftLogical %arg0, %arg1 : i32, si32 - - // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm.i16 to !llvm.i32 - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %[[EXT1:.*]]: !llvm.i32 - %2 = spv.ShiftLeftLogical %arg0, %arg2 : i32, i16 - - // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm.i16 to !llvm.i32 - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %[[EXT2:.*]]: !llvm.i32 - %3 = spv.ShiftLeftLogical %arg0, %arg3 : i32, ui16 - return -} - -func @shift_left_logical_vector(%arg0: vector<4xi64>, %arg1: vector<4xsi64>, %arg2: vector<4xi32>, %arg3: vector<4xui32>) { - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> - %0 = spv.ShiftLeftLogical %arg0, %arg0 : vector<4xi64>, vector<4xi64> - - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %{{.*}} : !llvm.vec<4 x i64> - %1 = spv.ShiftLeftLogical %arg0, %arg1 : vector<4xi64>, vector<4xsi64> - - // CHECK: %[[EXT1:.*]] = llvm.sext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %[[EXT1:.*]]: !llvm.vec<4 x i64> - %2 = spv.ShiftLeftLogical %arg0, %arg2 : vector<4xi64>, vector<4xi32> - - // CHECK: %[[EXT2:.*]] = llvm.zext %{{.*}} : !llvm.vec<4 x i32> to !llvm.vec<4 x i64> - // CHECK: %{{.*}} = llvm.shl %{{.*}}, %[[EXT2:.*]]: !llvm.vec<4 x i64> - %3 = spv.ShiftLeftLogical %arg0, %arg3 : vector<4xi64>, vector<4xui32> - return -} diff --git a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.invalid.mlir b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir similarity index 100% rename from mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.invalid.mlir rename to mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir From 960cef75f4d289e01b338c2f98c5dca2520ee8ff Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 4 Aug 2020 13:54:15 -0400 Subject: [PATCH 343/600] [InstSimplify] add tests for compare of min/max; NFC The test are adapted from the existing tests for cmp/select idioms. --- .../InstSimplify/maxmin_intrinsics.ll | 324 ++++++++++++++++++ 1 file changed, 324 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll index 6b10853dd78f2..1053bfa431ed7 100644 --- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -809,3 +809,327 @@ define <2 x i8> @smax_smax_constants_partial_undef(<2 x i8> %x) { %m2 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %m) ret <2 x i8> %m2 } + +define i1 @smax_slt(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_slt( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %r = icmp slt i8 %m, %x + ret i1 %r +} + +define i1 @smax_sge(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_sge( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp sge i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %r = icmp sge i8 %m, %x + ret i1 %r +} + +define i1 @umax_ult(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_ult( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %r = icmp ult i8 %m, %x + ret i1 %r +} + +define i1 @umax_uge(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_uge( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %r = icmp uge i8 %m, %x + ret i1 %r +} + +define i1 @smax_sgt(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_sgt( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %r = icmp sgt i8 %x, %m + ret i1 %r +} + +define i1 @smax_sle(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_sle( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %r = icmp sle i8 %x, %m + ret i1 %r +} + +define i1 @umax_ugt(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_ugt( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %r = icmp ugt i8 %x, %m + ret i1 %r +} + +define i1 @umax_ule(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_ule( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %r = icmp ule i8 %x, %m + ret i1 %r +} + +define i1 @smin_sgt(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_sgt( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.smin.i8(i8 %x, i8 %y) + %r = icmp sgt i8 %m, %x + ret i1 %r +} + +define i1 @smin_sle(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_sle( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.smin.i8(i8 %x, i8 %y) + %r = icmp sle i8 %m, %x + ret i1 %r +} + +define i1 @umin_ugt(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_ugt( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.umin.i8(i8 %x, i8 %y) + %r = icmp ugt i8 %m, %x + ret i1 %r +} + +define i1 @umin_ule(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_ule( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[M]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.umin.i8(i8 %x, i8 %y) + %r = icmp ule i8 %m, %x + ret i1 %r +} + +define i1 @smin_slt(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_slt( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.smin.i8(i8 %x, i8 %y) + %r = icmp slt i8 %x, %m + ret i1 %r +} + +define i1 @smin_sge(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_sge( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp sge i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.smin.i8(i8 %x, i8 %y) + %r = icmp sge i8 %x, %m + ret i1 %r +} + +define i1 @umin_ult(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_ult( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.umin.i8(i8 %x, i8 %y) + %r = icmp ult i8 %x, %m + ret i1 %r +} + +define i1 @umin_uge(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_uge( +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[X]], [[M]] +; CHECK-NEXT: ret i1 [[R]] +; + %m = call i8 @llvm.umin.i8(i8 %x, i8 %y) + %r = icmp uge i8 %x, %m + ret i1 %r +} + +define i1 @smaxmin_sge(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @smaxmin_sge( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.smin.i8(i8 [[Z:%.*]], i8 [[X]]) +; CHECK-NEXT: [[C:%.*]] = icmp sge i8 [[MAX]], [[MIN]] +; CHECK-NEXT: ret i1 [[C]] +; + %max = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %min = call i8 @llvm.smin.i8(i8 %z, i8 %x) + %c = icmp sge i8 %max, %min + ret i1 %c +} + +define i1 @smaxmin_sgt(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @smaxmin_sgt( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.smin.i8(i8 [[Z:%.*]], i8 [[X]]) +; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[MIN]], [[MAX]] +; CHECK-NEXT: ret i1 [[C]] +; + %max = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %min = call i8 @llvm.smin.i8(i8 %z, i8 %x) + %c = icmp sgt i8 %min, %max + ret i1 %c +} + +define i1 @smaxmin_sle(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @smaxmin_sle( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.smin.i8(i8 [[Z:%.*]], i8 [[X]]) +; CHECK-NEXT: [[C:%.*]] = icmp sle i8 [[MIN]], [[MAX]] +; CHECK-NEXT: ret i1 [[C]] +; + %max = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %min = call i8 @llvm.smin.i8(i8 %z, i8 %x) + %c = icmp sle i8 %min, %max + ret i1 %c +} + +define i1 @smaxmin_slt(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @smaxmin_slt( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.smin.i8(i8 [[Z:%.*]], i8 [[X]]) +; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[MAX]], [[MIN]] +; CHECK-NEXT: ret i1 [[C]] +; + %max = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %min = call i8 @llvm.smin.i8(i8 %z, i8 %x) + %c = icmp slt i8 %max, %min + ret i1 %c +} + +define i1 @umaxmin_uge(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @umaxmin_uge( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.umin.i8(i8 [[Z:%.*]], i8 [[X]]) +; CHECK-NEXT: [[C:%.*]] = icmp uge i8 [[MAX]], [[MIN]] +; CHECK-NEXT: ret i1 [[C]] +; + %max = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %min = call i8 @llvm.umin.i8(i8 %z, i8 %x) + %c = icmp uge i8 %max, %min + ret i1 %c +} + +define i1 @umaxmin_ugt(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @umaxmin_ugt( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.umin.i8(i8 [[Z:%.*]], i8 [[X]]) +; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[MIN]], [[MAX]] +; CHECK-NEXT: ret i1 [[C]] +; + %max = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %min = call i8 @llvm.umin.i8(i8 %z, i8 %x) + %c = icmp ugt i8 %min, %max + ret i1 %c +} + +define i1 @umaxmin_ule(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @umaxmin_ule( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.umin.i8(i8 [[Z:%.*]], i8 [[X]]) +; CHECK-NEXT: [[C:%.*]] = icmp ule i8 [[MIN]], [[MAX]] +; CHECK-NEXT: ret i1 [[C]] +; + %max = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %min = call i8 @llvm.umin.i8(i8 %z, i8 %x) + %c = icmp ule i8 %min, %max + ret i1 %c +} + +define i1 @umaxmin_ult(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @umaxmin_ult( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.umin.i8(i8 [[Z:%.*]], i8 [[X]]) +; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[MAX]], [[MIN]] +; CHECK-NEXT: ret i1 [[C]] +; + %max = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %min = call i8 @llvm.umin.i8(i8 %z, i8 %x) + %c = icmp ult i8 %max, %min + ret i1 %c +} + +define i1 @smax_eq(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_eq( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[MAX]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %max = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %r = icmp eq i8 %max, %x + ret i1 %r +} + +define i1 @smax_eq_commute(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_eq_commute( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X]], [[MAX]] +; CHECK-NEXT: ret i1 [[R]] +; + %max = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %r = icmp eq i8 %x, %max + ret i1 %r +} + +define i1 @umax_eq(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_eq( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[MAX]], [[X]] +; CHECK-NEXT: ret i1 [[R]] +; + %max = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %r = icmp eq i8 %max, %x + ret i1 %r +} + +define i1 @umax_eq_commute(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_eq_commute( +; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X]], [[MAX]] +; CHECK-NEXT: ret i1 [[R]] +; + %max = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %r = icmp eq i8 %x, %max + ret i1 %r +} From 31ec6e969d601eb4fbee7bd4be3d03d23b5b3c11 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Tue, 4 Aug 2020 11:01:43 -0700 Subject: [PATCH 344/600] [test] Fix another realpath->abspath. This is a followup to 817b3a6fe3a4452eb61a2503c8beaa7267ca0351: in `builder_base` we should use abspath, not realpath, because the name is significant. This is used by test cases that use `@skipIf(compiler="clang", compiler_version=['<', ])` --- lldb/packages/Python/lldbsuite/test/plugins/builder_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py b/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py index a8114b3e05924..0cff6655ed770 100644 --- a/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py +++ b/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py @@ -34,7 +34,7 @@ def getCompiler(): """Returns the compiler in effect the test suite is running with.""" compiler = configuration.compiler if configuration.compiler else "clang" compiler = lldbutil.which(compiler) - return os.path.realpath(compiler) + return os.path.abspath(compiler) def getArchFlag(): From e18c6ef6b41a59af73bf5c3d7d52a8c53a471e5d Mon Sep 17 00:00:00 2001 From: Thorsten Schuett Date: Tue, 4 Aug 2020 11:10:01 -0700 Subject: [PATCH 345/600] [clang] improve diagnostics for misaligned and large atomics "Listing the alignment and access size (== expected alignment) in the warning seems like a good idea." solves PR 46947 struct Foo { struct Bar { void * a; void * b; }; Bar bar; }; struct ThirtyTwo { struct Large { void * a; void * b; void * c; void * d; }; Large bar; }; void braz(Foo *foo, ThirtyTwo *braz) { Foo::Bar bar; __atomic_load(&foo->bar, &bar, __ATOMIC_RELAXED); ThirtyTwo::Large foobar; __atomic_load(&braz->bar, &foobar, __ATOMIC_RELAXED); } repro.cpp:21:3: warning: misaligned atomic operation may incur significant performance penalty; the expected (16 bytes) exceeds the actual alignment (8 bytes) [-Watomic-alignment] __atomic_load(&foo->bar, &bar, __ATOMIC_RELAXED); ^ repro.cpp:24:3: warning: misaligned atomic operation may incur significant performance penalty; the expected (32 bytes) exceeds the actual alignment (8 bytes) [-Watomic-alignment] __atomic_load(&braz->bar, &foobar, __ATOMIC_RELAXED); ^ repro.cpp:24:3: warning: large atomic operation may incur significant performance penalty; the access size (32 bytes) exceeds the max lock-free size (16 bytes) [-Watomic-alignment] 3 warnings generated. Differential Revision: https://reviews.llvm.org/D85102 --- .../clang/Basic/DiagnosticFrontendKinds.td | 12 ++++++-- clang/include/clang/Basic/DiagnosticGroups.td | 1 + clang/lib/CodeGen/CGAtomic.cpp | 16 ++++++++-- clang/test/CodeGen/atomics-sema-alignment.c | 29 ++++++++++++++++--- 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index b202d2abffa00..6434d92fd8fcf 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -270,8 +270,16 @@ def err_ifunc_resolver_return : Error< "ifunc resolver function must return a pointer">; def warn_atomic_op_misaligned : Warning< - "%select{large|misaligned}0 atomic operation may incur " - "significant performance penalty">, InGroup>; + "misaligned atomic operation may incur " + "significant performance penalty" + "; the expected alignment (%0 bytes) exceeds the actual alignment (%1 bytes)">, + InGroup; + +def warn_atomic_op_oversized : Warning< + "large atomic operation may incur " + "significant performance penalty" + "; the access size (%0 bytes) exceeds the max lock-free size (%1 bytes)">, +InGroup; def warn_alias_with_section : Warning< "%select{alias|ifunc}1 will not be in section '%0' but in the same section " diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 1e829be4028e4..be62461faef48 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -699,6 +699,7 @@ def ReorderInitList : DiagGroup<"reorder-init-list">; def Reorder : DiagGroup<"reorder", [ReorderCtor, ReorderInitList]>; def UndeclaredSelector : DiagGroup<"undeclared-selector">; def ImplicitAtomic : DiagGroup<"implicit-atomic-properties">; +def AtomicAlignment : DiagGroup<"atomic-alignment">; def CustomAtomic : DiagGroup<"custom-atomic-properties">; def AtomicProperties : DiagGroup<"atomic-properties", [ImplicitAtomic, CustomAtomic]>; diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index a58450ddd4c5f..b7ada4ac7e3bc 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -807,10 +807,20 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { bool Oversized = getContext().toBits(sizeChars) > MaxInlineWidthInBits; bool Misaligned = (Ptr.getAlignment() % sizeChars) != 0; bool UseLibcall = Misaligned | Oversized; + CharUnits MaxInlineWidth = + getContext().toCharUnitsFromBits(MaxInlineWidthInBits); - if (UseLibcall) { - CGM.getDiags().Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned) - << !Oversized; + DiagnosticsEngine &Diags = CGM.getDiags(); + + if (Misaligned) { + Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned) + << (int)sizeChars.getQuantity() + << (int)Ptr.getAlignment().getQuantity(); + } + + if (Oversized) { + Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_oversized) + << (int)sizeChars.getQuantity() << (int)MaxInlineWidth.getQuantity(); } llvm::Value *Order = EmitScalarExpr(E->getOrder()); diff --git a/clang/test/CodeGen/atomics-sema-alignment.c b/clang/test/CodeGen/atomics-sema-alignment.c index 9443af354ec5d..d0058f1da8b01 100644 --- a/clang/test/CodeGen/atomics-sema-alignment.c +++ b/clang/test/CodeGen/atomics-sema-alignment.c @@ -12,10 +12,10 @@ typedef int __attribute__((aligned(1))) unaligned_int; void func(IntPair *p) { IntPair res; - __atomic_load(p, &res, 0); // expected-warning {{misaligned atomic operation may incur significant performance penalty}} - __atomic_store(p, &res, 0); // expected-warning {{misaligned atomic operation may incur significant performance penalty}} - __atomic_fetch_add((unaligned_int *)p, 1, 2); // expected-warning {{misaligned atomic operation may incur significant performance penalty}} - __atomic_fetch_sub((unaligned_int *)p, 1, 3); // expected-warning {{misaligned atomic operation may incur significant performance penalty}} + __atomic_load(p, &res, 0); // expected-warning {{misaligned atomic operation may incur significant performance penalty; the expected alignment (8 bytes) exceeds the actual alignment (4 bytes)}} + __atomic_store(p, &res, 0); // expected-warning {{misaligned atomic operation may incur significant performance penalty; the expected alignment (8 bytes) exceeds the actual alignment (4 bytes)}} + __atomic_fetch_add((unaligned_int *)p, 1, 2); // expected-warning {{misaligned atomic operation may incur significant performance penalty; the expected alignment (4 bytes) exceeds the actual alignment (1 bytes)}} + __atomic_fetch_sub((unaligned_int *)p, 1, 3); // expected-warning {{misaligned atomic operation may incur significant performance penalty; the expected alignment (4 bytes) exceeds the actual alignment (1 bytes)}} } void func1(LongStruct *p) { @@ -25,3 +25,24 @@ void func1(LongStruct *p) { __atomic_fetch_add((int *)p, 1, 2); __atomic_fetch_sub((int *)p, 1, 3); } + +typedef struct { + void *a; + void *b; +} Foo; + +typedef struct { + void *a; + void *b; + void *c; + void *d; +} __attribute__((aligned(32))) ThirtyTwo; + +void braz(Foo *foo, ThirtyTwo *braz) { + Foo bar; + __atomic_load(foo, &bar, __ATOMIC_RELAXED); // expected-warning {{misaligned atomic operation may incur significant performance penalty; the expected alignment (16 bytes) exceeds the actual alignment (8 bytes)}} + + ThirtyTwo thirtyTwo1; + ThirtyTwo thirtyTwo2; + __atomic_load(&thirtyTwo1, &thirtyTwo2, __ATOMIC_RELAXED); // expected-warning {{large atomic operation may incur significant performance penalty; the access size (32 bytes) exceeds the max lock-free size (16 bytes)}} +} From f8cc94a61afe48a4b77f111a8ad313fc3b9417de Mon Sep 17 00:00:00 2001 From: AK <1894981+hiraditya@users.noreply.github.com> Date: Tue, 4 Aug 2020 10:57:52 -0700 Subject: [PATCH 346/600] Revert "[HotColdSplit] Add test case for unlikely attribute in outlined function" This reverts commit aa1f905890fbbfedf396530f1e14409875ece13c. The flag -codegenprepare maybe causing failures. Reverting this to investigate the root cause. --- .../test/Transforms/HotColdSplit/coldentrycount.ll | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll index 7b196bf4c1048..d63acc188f544 100644 --- a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll +++ b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll @@ -1,14 +1,13 @@ ; Test to ensure that split cold function gets 0 entry count profile ; metadata when compiling with pgo. -; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -codegenprepare -S < %s | FileCheck %s +; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" -; CHECK: define {{.*}} @fun{{.*}} ![[HOTPROF:[0-9]+]] {{.*}}section_prefix ![[LIKELY:[0-9]+]] +; CHECK-LABEL: @fun ; CHECK: call void @fun.cold.1 - define void @fun() !prof !14 { entry: br i1 undef, label %if.then, label %if.else @@ -23,12 +22,8 @@ if.else: declare void @sink() cold -; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] {{.*}}section_prefix ![[UNLIKELY:[0-9]+]] - -; CHECK: ![[HOTPROF]] = !{!"function_entry_count", i64 100} -; CHECK: ![[LIKELY]] = !{!"function_section_prefix", !".hot"} +; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] ; CHECK: ![[PROF]] = !{!"function_entry_count", i64 0} -; CHECK: ![[UNLIKELY]] = !{!"function_section_prefix", !".unlikely"} !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} @@ -46,6 +41,3 @@ declare void @sink() cold !12 = !{i32 999000, i64 100, i32 1} !13 = !{i32 999999, i64 1, i32 2} !14 = !{!"function_entry_count", i64 100} -!15 = !{!"function_section_prefix", !".hot"} -!16 = !{!"function_entry_count", i64 0} -!17 = !{!"function_section_prefix", !".unlikely"} From 456497450482153afe86838ac2e2be395206d377 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 7 Jul 2020 22:50:12 +0200 Subject: [PATCH 347/600] [SCCP] Propagate inequalities Teach SCCP to create notconstant lattice values from inequality assumes and nonnull metadata, and update getConstant() to make use of them. Additionally isOverdefined() needs to be changed to consider notconstant an overdefined value. Handling inequality branches is delayed until our branch on undef story in other passes has been improved. Differential Revision: https://reviews.llvm.org/D83643 --- llvm/include/llvm/Analysis/ValueLattice.h | 11 +++++++ llvm/lib/Transforms/Scalar/SCCP.cpp | 36 +++++++++++++++-------- llvm/test/Transforms/SCCP/assume.ll | 12 +++----- llvm/test/Transforms/SCCP/metadata.ll | 12 +++----- 4 files changed, 42 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index bf5bab9ced228..108d08033ac34 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -11,6 +11,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" // //===----------------------------------------------------------------------===// // ValueLatticeElement @@ -456,6 +457,16 @@ class ValueLatticeElement { if (isConstant() && Other.isConstant()) return ConstantExpr::getCompare(Pred, getConstant(), Other.getConstant()); + if (ICmpInst::isEquality(Pred)) { + // not(C) != C => true, not(C) == C => false. + if ((isNotConstant() && Other.isConstant() && + getNotConstant() == Other.getConstant()) || + (isConstant() && Other.isNotConstant() && + getConstant() == Other.getNotConstant())) + return Pred == ICmpInst::ICMP_NE + ? ConstantInt::getTrue(Ty) : ConstantInt::getFalse(Ty); + } + // Integer constants are represented as ConstantRanges with single // elements. if (!isConstantRange() || !Other.isConstantRange()) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 9c9f483ab103d..bd0968b67ab12 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -104,8 +104,7 @@ bool isConstant(const ValueLatticeElement &LV) { // ValueLatticeElement::isOverdefined() and is intended to be used in the // transition to ValueLatticeElement. bool isOverdefined(const ValueLatticeElement &LV) { - return LV.isOverdefined() || - (LV.isConstantRange() && !LV.getConstantRange().isSingleElement()); + return !LV.isUnknownOrUndef() && !isConstant(LV); } //===----------------------------------------------------------------------===// @@ -1123,7 +1122,9 @@ static ValueLatticeElement getValueFromMetadata(const Instruction *I) { if (I->getType()->isIntegerTy()) return ValueLatticeElement::getRange( getConstantRangeFromMetadata(*Ranges)); - // TODO: Also handle MD_nonnull. + if (I->hasMetadata(LLVMContext::MD_nonnull)) + return ValueLatticeElement::getNot( + ConstantPointerNull::get(cast(I->getType()))); return ValueLatticeElement::getOverdefined(); } @@ -1291,6 +1292,17 @@ void SCCPSolver::handleCallResult(CallBase &CB) { return; } + // TODO: Actually filp MayIncludeUndef for the created range to false, + // once most places in the optimizer respect the branches on + // undef/poison are UB rule. The reason why the new range cannot be + // undef is as follows below: + // The new range is based on a branch condition. That guarantees that + // neither of the compare operands can be undef in the branch targets, + // unless we have conditions that are always true/false (e.g. icmp ule + // i32, %a, i32_max). For the latter overdefined/empty range will be + // inferred, but the branch will get folded accordingly anyways. + bool MayIncludeUndef = !isa(PI); + ValueLatticeElement CondVal = getValueState(OtherOp); ValueLatticeElement &IV = ValueState[&CB]; if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) { @@ -1316,18 +1328,9 @@ void SCCPSolver::handleCallResult(CallBase &CB) { NewCR = CopyOfCR; addAdditionalUser(OtherOp, &CB); - // TODO: Actually filp MayIncludeUndef for the created range to false, - // once most places in the optimizer respect the branches on - // undef/poison are UB rule. The reason why the new range cannot be - // undef is as follows below: - // The new range is based on a branch condition. That guarantees that - // neither of the compare operands can be undef in the branch targets, - // unless we have conditions that are always true/false (e.g. icmp ule - // i32, %a, i32_max). For the latter overdefined/empty range will be - // inferred, but the branch will get folded accordingly anyways. mergeInValue( IV, &CB, - ValueLatticeElement::getRange(NewCR, /*MayIncludeUndef=*/true)); + ValueLatticeElement::getRange(NewCR, MayIncludeUndef)); return; } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) { // For non-integer values or integer constant expressions, only @@ -1335,6 +1338,13 @@ void SCCPSolver::handleCallResult(CallBase &CB) { addAdditionalUser(OtherOp, &CB); mergeInValue(IV, &CB, CondVal); return; + } else if (Pred == CmpInst::ICMP_NE && CondVal.isConstant() && + !MayIncludeUndef) { + // Propagate inequalities. + addAdditionalUser(OtherOp, &CB); + mergeInValue(IV, &CB, + ValueLatticeElement::getNot(CondVal.getConstant())); + return; } return (void)mergeInValue(IV, &CB, CopyOfVal); diff --git a/llvm/test/Transforms/SCCP/assume.ll b/llvm/test/Transforms/SCCP/assume.ll index dc827f03c0abe..ce47877e9e3c5 100644 --- a/llvm/test/Transforms/SCCP/assume.ll +++ b/llvm/test/Transforms/SCCP/assume.ll @@ -51,14 +51,10 @@ define void @nonnull(i32* %v) { ; CHECK-LABEL: @nonnull( ; CHECK-NEXT: [[A:%.*]] = icmp ne i32* [[V:%.*]], null ; CHECK-NEXT: call void @llvm.assume(i1 [[A]]) -; CHECK-NEXT: [[C1:%.*]] = icmp eq i32* [[V]], null -; CHECK-NEXT: call void @use(i1 [[C1]]) -; CHECK-NEXT: [[C2:%.*]] = icmp ne i32* [[V]], null -; CHECK-NEXT: call void @use(i1 [[C2]]) -; CHECK-NEXT: [[C3:%.*]] = icmp eq i32* null, [[V]] -; CHECK-NEXT: call void @use(i1 [[C3]]) -; CHECK-NEXT: [[C4:%.*]] = icmp ne i32* null, [[V]] -; CHECK-NEXT: call void @use(i1 [[C4]]) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; %a = icmp ne i32* %v, null diff --git a/llvm/test/Transforms/SCCP/metadata.ll b/llvm/test/Transforms/SCCP/metadata.ll index 844e2103ae318..f32dca231196b 100644 --- a/llvm/test/Transforms/SCCP/metadata.ll +++ b/llvm/test/Transforms/SCCP/metadata.ll @@ -48,14 +48,10 @@ define void @load_nonnull(i32** %p, i32** %p2) { ; CHECK-LABEL: @load_nonnull( ; CHECK-NEXT: [[V:%.*]] = load i32*, i32** [[P:%.*]], align 8, !nonnull !2 ; CHECK-NEXT: [[V2:%.*]] = load i32*, i32** [[P2:%.*]], align 8, !nonnull !2 -; CHECK-NEXT: [[C1:%.*]] = icmp ne i32* [[V]], null -; CHECK-NEXT: call void @use(i1 [[C1]]) -; CHECK-NEXT: [[C2:%.*]] = icmp eq i32* [[V]], null -; CHECK-NEXT: call void @use(i1 [[C2]]) -; CHECK-NEXT: [[C3:%.*]] = icmp ne i32* null, [[V]] -; CHECK-NEXT: call void @use(i1 [[C3]]) -; CHECK-NEXT: [[C4:%.*]] = icmp eq i32* null, [[V]] -; CHECK-NEXT: call void @use(i1 [[C4]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[C5:%.*]] = icmp eq i32* [[V]], [[V2]] ; CHECK-NEXT: call void @use(i1 [[C5]]) ; CHECK-NEXT: [[C6:%.*]] = icmp ne i32* [[V]], [[V2]] From 12cb400fd25863154c6d4d26427642cac0e52da1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 4 Aug 2020 11:01:02 -0700 Subject: [PATCH 348/600] [llvm-symbolizer] Add compatibility aliases for --inlining={true,false} D83530 removed --inlining={true,false} which were used by old asan_symbolize.py script. Add compatibility aliases so that old asan_symbolize.py and sanitizer binaries can work with new llvm-symbolizer. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D85228 --- llvm/test/tools/llvm-symbolizer/output-style-inlined.test | 4 ++++ llvm/test/tools/llvm-symbolizer/sym.test | 4 ++++ llvm/tools/llvm-symbolizer/Opts.td | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test index daa9584a3f48e..7e9f7e7ce1806 100644 --- a/llvm/test/tools/llvm-symbolizer/output-style-inlined.test +++ b/llvm/test/tools/llvm-symbolizer/output-style-inlined.test @@ -6,6 +6,10 @@ the option is not specified. RUN: llvm-symbolizer --no-inlines -e %p/Inputs/addr.exe 0x40054d \ RUN: | FileCheck %s --check-prefix=LLVM --implicit-check-not=inctwo +## Before 2020-08-04, asan_symbolize.py passed --inlining=false if 'symbolize_inline_frames' +## is set to false. Support this compatibility alias for a while. +RUN: llvm-symbolizer --inlining=false -e %p/Inputs/addr.exe 0x40054d \ +RUN: | FileCheck %s --check-prefix=LLVM --implicit-check-not=inctwo RUN: llvm-symbolizer --output-style=LLVM --no-inlines -e %p/Inputs/addr.exe 0x40054d \ RUN: | FileCheck %s --check-prefix=LLVM --implicit-check-not=inctwo diff --git a/llvm/test/tools/llvm-symbolizer/sym.test b/llvm/test/tools/llvm-symbolizer/sym.test index 20377ebf86c40..73097b2c4b9b6 100644 --- a/llvm/test/tools/llvm-symbolizer/sym.test +++ b/llvm/test/tools/llvm-symbolizer/sym.test @@ -26,6 +26,10 @@ RUN: llvm-symbolizer -inlines -print-address -pretty-print -obj=%p/Inputs/addr.e RUN: llvm-symbolizer -inlines -print-address -p -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s RUN: llvm-symbolizer -i -print-address -pretty-print -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s RUN: llvm-symbolizer -i -print-address -p -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s +## Before 2020-08-04, asan_symbolize.py passed --inlining=true. +## Support this compatibility alias for a while. +RUN: llvm-symbolizer --inlining=true --print-address -p --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s + RUN: echo "0x1" > %t.input RUN: llvm-symbolizer -obj=%p/Inputs/zero < %t.input | FileCheck -check-prefix="ZERO" %s diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td index d83b796635b8e..66b38924023f2 100644 --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -58,3 +58,7 @@ def : Flag<["-"], "i">, Alias, HelpText<"Alias for --inlines">; def : F<"inlining", "Alias for --inlines">, Alias; def : Flag<["-"], "p">, Alias, HelpText<"Alias for --pretty-print">; def : Flag<["-"], "s">, Alias, HelpText<"Alias for --basenames">; + +// Compatibility aliases for old asan_symbolize.py and sanitizer binaries (before 2020-08). +def : Flag<["--"], "inlining=true">, Alias, HelpText<"Alias for --inlines">; +def : Flag<["--"], "inlining=false">, Alias, HelpText<"Alias for --no-inlines">; From b778b04b69d02a2fa18b22a1858f3eb26c2f7f24 Mon Sep 17 00:00:00 2001 From: Xavier Denis Date: Tue, 4 Aug 2020 20:44:47 +0200 Subject: [PATCH 349/600] [InstSimplify] Add tests for icmp with urem divisor (NFC) --- llvm/test/Transforms/InstSimplify/compare.ll | 50 ++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index 86d26a82fa1e9..7f665018a6fdb 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -723,6 +723,50 @@ define i1 @urem7(i32 %X) { ret i1 %B } +define i1 @urem8(i8 %X, i8 %Y) { +; CHECK-LABEL: @urem8( +; CHECK-NEXT: [[A:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp ule i8 [[A]], [[X]] +; CHECK-NEXT: ret i1 [[B]] +; + %A = urem i8 %X, %Y + %B = icmp ule i8 %A, %X + ret i1 %B +} + +define i1 @urem9(i8 %X, i8 %Y) { +; CHECK-LABEL: @urem9( +; CHECK-NEXT: [[A:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 [[A]], [[X]] +; CHECK-NEXT: ret i1 [[B]] +; + %A = urem i8 %X, %Y + %B = icmp ugt i8 %A, %X + ret i1 %B +} + +define i1 @urem10(i8 %X, i8 %Y) { +; CHECK-LABEL: @urem10( +; CHECK-NEXT: [[A:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp uge i8 [[X]], [[A]] +; CHECK-NEXT: ret i1 [[B]] +; + %A = urem i8 %X, %Y + %B = icmp uge i8 %X, %A + ret i1 %B +} + +define i1 @urem11(i8 %X, i8 %Y) { +; CHECK-LABEL: @urem11( +; CHECK-NEXT: [[A:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[B:%.*]] = icmp ult i8 [[X]], [[A]] +; CHECK-NEXT: ret i1 [[B]] +; + %A = urem i8 %X, %Y + %B = icmp ult i8 %X, %A + ret i1 %B +} + ; PR9343 #15 define i1 @srem2(i16 %X, i32 %Y) { ; CHECK-LABEL: @srem2( @@ -961,7 +1005,7 @@ define i1 @alloca_compare(i64 %idx) { define i1 @alloca_compare_no_null_opt(i64 %idx) #0 { ; CHECK-LABEL: @alloca_compare_no_null_opt( -; CHECK-NEXT: [[SV:%.*]] = alloca { i32, i32, [124 x i32] } +; CHECK-NEXT: [[SV:%.*]] = alloca { i32, i32, [124 x i32] }, align 8 ; CHECK-NEXT: [[CMP:%.*]] = getelementptr inbounds { i32, i32, [124 x i32] }, { i32, i32, [124 x i32] }* [[SV]], i32 0, i32 2, i64 [[IDX:%.*]] ; CHECK-NEXT: [[X:%.*]] = icmp eq i32* [[CMP]], null ; CHECK-NEXT: ret i1 [[X]] @@ -995,7 +1039,7 @@ unreachableblock: define i1 @alloca_argument_compare(i64* %arg) { ; CHECK-LABEL: @alloca_argument_compare( -; CHECK-NEXT: [[ALLOC:%.*]] = alloca i64 +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i64, align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64* [[ARG:%.*]], [[ALLOC]] ; CHECK-NEXT: ret i1 [[CMP]] ; @@ -1008,7 +1052,7 @@ define i1 @alloca_argument_compare(i64* %arg) { define i1 @alloca_argument_compare_swapped(i64* %arg) { ; CHECK-LABEL: @alloca_argument_compare_swapped( -; CHECK-NEXT: [[ALLOC:%.*]] = alloca i64 +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i64, align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64* [[ALLOC]], [[ARG:%.*]] ; CHECK-NEXT: ret i1 [[CMP]] ; From 29fe3fe6155fd79ce731a119ce8065a8a0d26b56 Mon Sep 17 00:00:00 2001 From: Xavier Denis Date: Tue, 4 Aug 2020 20:44:47 +0200 Subject: [PATCH 350/600] [InstSimplify] Peephole optimization for icmp (urem X, Y), X This revision adds the following peephole optimization and it's negation: %a = urem i64 %x, %y %b = icmp ule i64 %a, %x ====> %b = true With John Regehr's help this optimization was checked with Alive2 which suggests it should be valid. This pattern occurs in the bound checks of Rust code, the program const N: usize = 3; const T = u8; pub fn split_mutiple(slice: &[T]) -> (&[T], &[T]) { let len = slice.len() / N; slice.split_at(len * N) } the method call slice.split_at will check that len * N is within the bounds of slice, this bounds check is after some transformations turned into the urem seen above and then LLVM fails to optimize it any further. Adding this optimization would cause this bounds check to be fully optimized away. ref: https://github.com/rust-lang/rust/issues/74938 Differential Revision: https://reviews.llvm.org/D85092 --- llvm/lib/Analysis/InstructionSimplify.cpp | 8 ++++++++ llvm/test/Transforms/InstSimplify/compare.ll | 16 ++++------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 8dd047ecb8b2c..97ac9ab458cc4 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2814,6 +2814,14 @@ static Value *simplifyICmpWithBinOpOnLHS( } } + // icmp pred (urem X, Y), X + if (match(LBO, m_URem(m_Specific(RHS), m_Value()))) { + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + } + // x >> y <=u x // x udiv y <=u x. if (match(LBO, m_LShr(m_Specific(RHS), m_Value())) || diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index 7f665018a6fdb..f9bef673f34e6 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -725,9 +725,7 @@ define i1 @urem7(i32 %X) { define i1 @urem8(i8 %X, i8 %Y) { ; CHECK-LABEL: @urem8( -; CHECK-NEXT: [[A:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = icmp ule i8 [[A]], [[X]] -; CHECK-NEXT: ret i1 [[B]] +; CHECK-NEXT: ret i1 true ; %A = urem i8 %X, %Y %B = icmp ule i8 %A, %X @@ -736,9 +734,7 @@ define i1 @urem8(i8 %X, i8 %Y) { define i1 @urem9(i8 %X, i8 %Y) { ; CHECK-LABEL: @urem9( -; CHECK-NEXT: [[A:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 [[A]], [[X]] -; CHECK-NEXT: ret i1 [[B]] +; CHECK-NEXT: ret i1 false ; %A = urem i8 %X, %Y %B = icmp ugt i8 %A, %X @@ -747,9 +743,7 @@ define i1 @urem9(i8 %X, i8 %Y) { define i1 @urem10(i8 %X, i8 %Y) { ; CHECK-LABEL: @urem10( -; CHECK-NEXT: [[A:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = icmp uge i8 [[X]], [[A]] -; CHECK-NEXT: ret i1 [[B]] +; CHECK-NEXT: ret i1 true ; %A = urem i8 %X, %Y %B = icmp uge i8 %X, %A @@ -758,9 +752,7 @@ define i1 @urem10(i8 %X, i8 %Y) { define i1 @urem11(i8 %X, i8 %Y) { ; CHECK-LABEL: @urem11( -; CHECK-NEXT: [[A:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = icmp ult i8 [[X]], [[A]] -; CHECK-NEXT: ret i1 [[B]] +; CHECK-NEXT: ret i1 false ; %A = urem i8 %X, %Y %B = icmp ult i8 %X, %A From 3c0f34700230fc4fd23ef408adb75387dcfeff41 Mon Sep 17 00:00:00 2001 From: Bardia Mahjour Date: Tue, 4 Aug 2020 14:47:24 -0400 Subject: [PATCH 351/600] [NFC][LV] Vectorized Loop Skeleton Refactoring This patch tries to improve readability and maintenance of createVectorizedLoopSkeleton by reorganizing some lines, updating some of the comments and breaking it up into smaller logical units. Reviewed By: pjeeva01 Differential Revision: https://reviews.llvm.org/D83824 --- .../Transforms/Vectorize/LoopVectorize.cpp | 230 ++++++++++-------- 1 file changed, 132 insertions(+), 98 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 33bd31f6b9833..7bf846d2a617c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -410,8 +410,11 @@ class InnerLoopVectorizer { virtual ~InnerLoopVectorizer() = default; - /// Create a new empty loop. Unlink the old loop and connect the new one. - /// Return the pre-header block of the new loop. + /// Create a new empty loop that will contain vectorized instructions later + /// on, while the old loop will be used as the scalar remainder. Control flow + /// is generated around the vectorized (and scalar epilogue) loops consisting + /// of various checks and bypasses. Return the pre-header block of the new + /// loop. BasicBlock *createVectorizedLoopSkeleton(); /// Widen a single instruction within the innermost loop. @@ -662,6 +665,22 @@ class InnerLoopVectorizer { const DataLayout &DL, const InductionDescriptor &ID) const; + /// Emit basic blocks (prefixed with \p Prefix) for the iteration check, + /// vector loop preheader, middle block and scalar preheader. Also + /// allocate a loop object for the new vector loop and return it. + Loop *createVectorLoopSkeleton(StringRef Prefix); + + /// Create new phi nodes for the induction variables to resume iteration count + /// in the scalar epilogue, from where the vectorized loop left off (given by + /// \p VectorTripCount). + void createInductionResumeValues(Loop *L, Value *VectorTripCount); + + /// Complete the loop skeleton by adding debug MDs, creating appropriate + /// conditional branches in the middle block, preparing the builder and + /// running the verifier. Take in the vector loop \p L as argument, and return + /// the preheader of the completed vector loop. + BasicBlock *completeLoopSkeleton(Loop *L, MDNode *OrigLoopID); + /// Add additional metadata to \p To that was not present on \p Orig. /// /// Currently this is used to add the noalias annotations based on the @@ -2957,56 +2976,7 @@ Value *InnerLoopVectorizer::emitTransformedIndex( llvm_unreachable("invalid enum"); } -BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { - /* - In this function we generate a new loop. The new loop will contain - the vectorized instructions while the old loop will continue to run the - scalar remainder. - - [ ] <-- loop iteration number check. - / | - / v - | [ ] <-- vector loop bypass (may consist of multiple blocks). - | / | - | / v - || [ ] <-- vector pre header. - |/ | - | v - | [ ] \ - | [ ]_| <-- vector loop. - | | - | v - | -[ ] <--- middle-block. - | / | - | / v - -|- >[ ] <--- new preheader. - | | - | v - | [ ] \ - | [ ]_| <-- old scalar loop to handle remainder. - \ | - \ v - >[ ] <-- exit block. - ... - */ - - MDNode *OrigLoopID = OrigLoop->getLoopID(); - - // Some loops have a single integer induction variable, while other loops - // don't. One example is c++ iterators that often have multiple pointer - // induction variables. In the code below we also support a case where we - // don't have a single induction variable. - // - // We try to obtain an induction variable from the original loop as hard - // as possible. However if we don't find one that: - // - is an integer - // - counts from zero, stepping by one - // - is the size of the widest induction variable type - // then we create a new one. - OldInduction = Legal->getPrimaryInduction(); - Type *IdxTy = Legal->getWidestInductionType(); - - // Split the single block loop into the two loop structure described above. +Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopScalarBody = OrigLoop->getHeader(); LoopVectorPreHeader = OrigLoop->getLoopPreheader(); LoopExitBlock = OrigLoop->getExitBlock(); @@ -3015,16 +2985,16 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { LoopMiddleBlock = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, - LI, nullptr, "middle.block"); + LI, nullptr, Twine(Prefix) + "middle.block"); LoopScalarPreHeader = SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI, - nullptr, "scalar.ph"); + nullptr, Twine(Prefix) + "scalar.ph"); // We intentionally don't let SplitBlock to update LoopInfo since // LoopVectorBody should belong to another loop than LoopVectorPreHeader. // LoopVectorBody is explicitly added to the correct place few lines later. LoopVectorBody = SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, - nullptr, nullptr, "vector.body"); + nullptr, nullptr, Twine(Prefix) + "vector.body"); // Update dominator for loop exit. DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); @@ -3041,37 +3011,12 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { LI->addTopLevelLoop(Lp); } Lp->addBasicBlockToLoop(LoopVectorBody, *LI); + return Lp; +} - // Find the loop boundaries. - Value *Count = getOrCreateTripCount(Lp); - - Value *StartIdx = ConstantInt::get(IdxTy, 0); - - // Now, compare the new count to zero. If it is zero skip the vector loop and - // jump to the scalar loop. This check also covers the case where the - // backedge-taken count is uint##_max: adding one to it will overflow leading - // to an incorrect trip count of zero. In this (rare) case we will also jump - // to the scalar loop. - emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader); - - // Generate the code to check any assumptions that we've made for SCEV - // expressions. - emitSCEVChecks(Lp, LoopScalarPreHeader); - - // Generate the code that checks in runtime if arrays overlap. We put the - // checks into a separate block to make the more common case of few elements - // faster. - emitMemRuntimeChecks(Lp, LoopScalarPreHeader); - - // Generate the induction variable. - // The loop step is equal to the vectorization factor (num of SIMD elements) - // times the unroll factor (num of SIMD instructions). - Value *CountRoundDown = getOrCreateVectorTripCount(Lp); - Constant *Step = ConstantInt::get(IdxTy, VF * UF); - Induction = - createInductionVariable(Lp, StartIdx, CountRoundDown, Step, - getDebugLocFromInstOrOperands(OldInduction)); - +void InnerLoopVectorizer::createInductionResumeValues(Loop *L, + Value *VectorTripCount) { + assert(VectorTripCount && L && "Expected valid arguments"); // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the // PHIs that are left in the scalar version of the loop. @@ -3079,10 +3024,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { // iteration in the vectorized loop. // If we come from a bypass edge then we need to start from the original // start value. - - // This variable saves the new starting index for the scalar loop. It is used - // to test if there are any tail iterations left once the vector loop has - // completed. for (auto &InductionEntry : Legal->getInductionVars()) { PHINode *OrigPhi = InductionEntry.first; InductionDescriptor II = InductionEntry.second; @@ -3096,13 +3037,13 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { Value *&EndValue = IVEndValues[OrigPhi]; if (OrigPhi == OldInduction) { // We know what the end value is. - EndValue = CountRoundDown; + EndValue = VectorTripCount; } else { - IRBuilder<> B(Lp->getLoopPreheader()->getTerminator()); + IRBuilder<> B(L->getLoopPreheader()->getTerminator()); Type *StepType = II.getStep()->getType(); Instruction::CastOps CastOp = - CastInst::getCastOpcode(CountRoundDown, true, StepType, true); - Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd"); + CastInst::getCastOpcode(VectorTripCount, true, StepType, true); + Value *CRD = B.CreateCast(CastOp, VectorTripCount, StepType, "cast.crd"); const DataLayout &DL = LoopScalarBody->getModule()->getDataLayout(); EndValue = emitTransformedIndex(B, CRD, PSE.getSE(), DL, II); EndValue->setName("ind.end"); @@ -3119,6 +3060,15 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { BCResumeVal->addIncoming(II.getStartValue(), BB); OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal); } +} + +BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(Loop *L, + MDNode *OrigLoopID) { + assert(L && "Expected valid loop."); + + // The trip counts should be cached by now. + Value *Count = getOrCreateTripCount(L); + Value *VectorTripCount = getOrCreateVectorTripCount(L); // We need the OrigLoop (scalar loop part) latch terminator to help // produce correct debug info for the middle block BB instructions. @@ -3136,7 +3086,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { Value *CmpN = Builder.getTrue(); if (!Cost->foldTailByMasking()) { CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count, - CountRoundDown, "cmp.n", + VectorTripCount, "cmp.n", LoopMiddleBlock->getTerminator()); // Here we use the same DebugLoc as the scalar loop latch branch instead @@ -3152,7 +3102,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst); // Get ready to start creating new instructions into the vectorized body. - assert(LoopVectorPreHeader == Lp->getLoopPreheader() && + assert(LoopVectorPreHeader == L->getLoopPreheader() && "Inconsistent vector loop preheader"); Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt()); @@ -3160,7 +3110,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupVectorized}); if (VectorizedLoopID.hasValue()) { - Lp->setLoopID(VectorizedLoopID.getValue()); + L->setLoopID(VectorizedLoopID.getValue()); // Do not setAlreadyVectorized if loop attributes have been defined // explicitly. @@ -3170,9 +3120,9 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { // Keep all loop hints from the original loop on the vector loop (we'll // replace the vectorizer-specific hints below). if (MDNode *LID = OrigLoop->getLoopID()) - Lp->setLoopID(LID); + L->setLoopID(LID); - LoopVectorizeHints Hints(Lp, true, *ORE); + LoopVectorizeHints Hints(L, true, *ORE); Hints.setAlreadyVectorized(); #ifdef EXPENSIVE_CHECKS @@ -3183,6 +3133,90 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { return LoopVectorPreHeader; } +BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { + /* + In this function we generate a new loop. The new loop will contain + the vectorized instructions while the old loop will continue to run the + scalar remainder. + + [ ] <-- loop iteration number check. + / | + / v + | [ ] <-- vector loop bypass (may consist of multiple blocks). + | / | + | / v + || [ ] <-- vector pre header. + |/ | + | v + | [ ] \ + | [ ]_| <-- vector loop. + | | + | v + | -[ ] <--- middle-block. + | / | + | / v + -|- >[ ] <--- new preheader. + | | + | v + | [ ] \ + | [ ]_| <-- old scalar loop to handle remainder. + \ | + \ v + >[ ] <-- exit block. + ... + */ + + // Get the metadata of the original loop before it gets modified. + MDNode *OrigLoopID = OrigLoop->getLoopID(); + + // Create an empty vector loop, and prepare basic blocks for the runtime + // checks. + Loop *Lp = createVectorLoopSkeleton(""); + + // Now, compare the new count to zero. If it is zero skip the vector loop and + // jump to the scalar loop. This check also covers the case where the + // backedge-taken count is uint##_max: adding one to it will overflow leading + // to an incorrect trip count of zero. In this (rare) case we will also jump + // to the scalar loop. + emitMinimumIterationCountCheck(Lp, LoopScalarPreHeader); + + // Generate the code to check any assumptions that we've made for SCEV + // expressions. + emitSCEVChecks(Lp, LoopScalarPreHeader); + + // Generate the code that checks in runtime if arrays overlap. We put the + // checks into a separate block to make the more common case of few elements + // faster. + emitMemRuntimeChecks(Lp, LoopScalarPreHeader); + + // Some loops have a single integer induction variable, while other loops + // don't. One example is c++ iterators that often have multiple pointer + // induction variables. In the code below we also support a case where we + // don't have a single induction variable. + // + // We try to obtain an induction variable from the original loop as hard + // as possible. However if we don't find one that: + // - is an integer + // - counts from zero, stepping by one + // - is the size of the widest induction variable type + // then we create a new one. + OldInduction = Legal->getPrimaryInduction(); + Type *IdxTy = Legal->getWidestInductionType(); + Value *StartIdx = ConstantInt::get(IdxTy, 0); + // The loop step is equal to the vectorization factor (num of SIMD elements) + // times the unroll factor (num of SIMD instructions). + Constant *Step = ConstantInt::get(IdxTy, VF * UF); + Value *CountRoundDown = getOrCreateVectorTripCount(Lp); + Induction = + createInductionVariable(Lp, StartIdx, CountRoundDown, Step, + getDebugLocFromInstOrOperands(OldInduction)); + + // Emit phis for the new starting index of the scalar loop. + createInductionResumeValues(Lp, CountRoundDown); + + return completeLoopSkeleton(Lp, OrigLoopID); +} + // Fix up external users of the induction variable. At this point, we are // in LCSSA form, with all external PHIs that use the IV having one input value, // coming from the remainder loop. We need those PHIs to also have a correct From e8dcf5f87dc20b3f08005ac767ff934e36bf2a5b Mon Sep 17 00:00:00 2001 From: aartbik Date: Fri, 31 Jul 2020 12:47:25 -0700 Subject: [PATCH 352/600] [mlir] [VectorOps] Add expand/compress operations to Vector dialect Introduces the expand and compress operations to the Vector dialect (important memory operations for sparse computations), together with a first reference implementation that lowers to the LLVM IR dialect to enable running on CPU (and other targets that support the corresponding LLVM IR intrinsics). Reviewed By: reidtatge Differential Revision: https://reviews.llvm.org/D84888 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 10 ++ mlir/include/mlir/Dialect/Vector/VectorOps.td | 107 +++++++++++++++++- .../Dialect/Vector/CPU/test-compress.mlir | 90 +++++++++++++++ .../Dialect/Vector/CPU/test-expand.mlir | 82 ++++++++++++++ .../Dialect/Vector/CPU/test-scatter.mlir | 56 ++++----- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 106 ++++++++++++++--- mlir/lib/Dialect/Vector/VectorOps.cpp | 35 ++++++ .../VectorToLLVM/vector-to-llvm.mlir | 20 ++++ mlir/test/Dialect/Vector/invalid.mlir | 35 ++++++ mlir/test/Dialect/Vector/ops.mlir | 9 ++ mlir/test/Target/llvmir-intrinsics.mlir | 15 ++- 11 files changed, 505 insertions(+), 60 deletions(-) create mode 100644 mlir/integration_test/Dialect/Vector/CPU/test-compress.mlir create mode 100644 mlir/integration_test/Dialect/Vector/CPU/test-expand.mlir diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 4b1a6efe002f2..768d8db121df4 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1042,6 +1042,16 @@ def LLVM_masked_scatter "type($value) `,` type($mask) `into` type($ptrs)"; } +/// Create a call to Masked Expand Load intrinsic. +def LLVM_masked_expandload + : LLVM_IntrOp<"masked.expandload", [0], [], [], 1>, + Arguments<(ins LLVM_Type, LLVM_Type, LLVM_Type)>; + +/// Create a call to Masked Compress Store intrinsic. +def LLVM_masked_compressstore + : LLVM_IntrOp<"masked.compressstore", [], [0], [], 0>, + Arguments<(ins LLVM_Type, LLVM_Type, LLVM_Type)>; + // // Atomic operations. // diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td index b49cc4a62a502..89a2b1226e1e2 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td @@ -1158,7 +1158,7 @@ def Vector_GatherOp : Variadic>:$pass_thru)>, Results<(outs VectorOfRank<[1]>:$result)> { - let summary = "gathers elements from memory into a vector as defined by an index vector"; + let summary = "gathers elements from memory into a vector as defined by an index vector and mask"; let description = [{ The gather operation gathers elements from memory into a 1-D vector as @@ -1186,7 +1186,6 @@ def Vector_GatherOp : %g = vector.gather %base, %indices, %mask, %pass_thru : (memref, vector<16xi32>, vector<16xi1>, vector<16xf32>) -> vector<16xf32> ``` - }]; let extraClassDeclaration = [{ MemRefType getMemRefType() { @@ -1217,7 +1216,7 @@ def Vector_ScatterOp : VectorOfRankAndType<[1], [I1]>:$mask, VectorOfRank<[1]>:$value)> { - let summary = "scatters elements from a vector into memory as defined by an index vector"; + let summary = "scatters elements from a vector into memory as defined by an index vector and mask"; let description = [{ The scatter operation scatters elements from a 1-D vector into memory as @@ -1265,6 +1264,108 @@ def Vector_ScatterOp : "type($indices) `,` type($mask) `,` type($value) `into` type($base)"; } +def Vector_ExpandLoadOp : + Vector_Op<"expandload">, + Arguments<(ins AnyMemRef:$base, + VectorOfRankAndType<[1], [I1]>:$mask, + VectorOfRank<[1]>:$pass_thru)>, + Results<(outs VectorOfRank<[1]>:$result)> { + + let summary = "reads elements from memory and spreads them into a vector as defined by a mask"; + + let description = [{ + The expand load reads elements from memory into a 1-D vector as defined + by a base and a 1-D mask vector. When the mask is set, the next element + is read from memory. Otherwise, the corresponding element is taken from + a 1-D pass-through vector. Informally the semantics are: + ``` + index = base + result[0] := mask[0] ? MEM[index++] : pass_thru[0] + result[1] := mask[1] ? MEM[index++] : pass_thru[1] + etc. + ``` + Note that the index increment is done conditionally. + + The expand load can be used directly where applicable, or can be used + during progressively lowering to bring other memory operations closer to + hardware ISA support for an expand. The semantics of the operation closely + correspond to those of the `llvm.masked.expandload` + [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-expandload-intrinsics). + + Example: + + ```mlir + %0 = vector.expandload %base, %mask, %pass_thru + : memref, vector<8xi1>, vector<8xf32> into vector<8xf32> + ``` + }]; + let extraClassDeclaration = [{ + MemRefType getMemRefType() { + return base().getType().cast(); + } + VectorType getMaskVectorType() { + return mask().getType().cast(); + } + VectorType getPassThruVectorType() { + return pass_thru().getType().cast(); + } + VectorType getResultVectorType() { + return result().getType().cast(); + } + }]; + let assemblyFormat = "$base `,` $mask `,` $pass_thru attr-dict `:` " + "type($base) `,` type($mask) `,` type($pass_thru) `into` type($result)"; +} + +def Vector_CompressStoreOp : + Vector_Op<"compressstore">, + Arguments<(ins AnyMemRef:$base, + VectorOfRankAndType<[1], [I1]>:$mask, + VectorOfRank<[1]>:$value)> { + + let summary = "writes elements selectively from a vector as defined by a mask"; + + let description = [{ + The compress store operation writes elements from a 1-D vector into memory + as defined by a base and a 1-D mask vector. When the mask is set, the + corresponding element from the vector is written next to memory. Otherwise, + no action is taken for the element. Informally the semantics are: + ``` + index = base + if (mask[0]) MEM[index++] = value[0] + if (mask[1]) MEM[index++] = value[1] + etc. + ``` + Note that the index increment is done conditionally. + + The compress store can be used directly where applicable, or can be used + during progressively lowering to bring other memory operations closer to + hardware ISA support for a compress. The semantics of the operation closely + correspond to those of the `llvm.masked.compressstore` + [intrinsic](https://llvm.org/docs/LangRef.html#llvm-masked-compressstore-intrinsics). + + Example: + + ```mlir + vector.compressstore %base, %mask, %value + : memref, vector<8xi1>, vector<8xf32> + ``` + }]; + let extraClassDeclaration = [{ + MemRefType getMemRefType() { + return base().getType().cast(); + } + VectorType getMaskVectorType() { + return mask().getType().cast(); + } + VectorType getValueVectorType() { + return value().getType().cast(); + } + }]; + let assemblyFormat = "$base `,` $mask `,` $value attr-dict `:` " + "type($base) `,` type($mask) `,` type($value)"; +} + def Vector_ShapeCastOp : Vector_Op<"shape_cast", [NoSideEffect]>, Arguments<(ins AnyTypeOf<[AnyVector, TupleOf<[AnyVector]>]>:$source)>, diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-compress.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-compress.mlir new file mode 100644 index 0000000000000..6310d6ee87900 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-compress.mlir @@ -0,0 +1,90 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @compress16(%base: memref, + %mask: vector<16xi1>, %value: vector<16xf32>) { + vector.compressstore %base, %mask, %value + : memref, vector<16xi1>, vector<16xf32> + return +} + +func @printmem16(%A: memref) { + %c0 = constant 0: index + %c1 = constant 1: index + %c16 = constant 16: index + %z = constant 0.0: f32 + %m = vector.broadcast %z : f32 to vector<16xf32> + %mem = scf.for %i = %c0 to %c16 step %c1 + iter_args(%m_iter = %m) -> (vector<16xf32>) { + %c = load %A[%i] : memref + %i32 = index_cast %i : index to i32 + %m_new = vector.insertelement %c, %m_iter[%i32 : i32] : vector<16xf32> + scf.yield %m_new : vector<16xf32> + } + vector.print %mem : vector<16xf32> + return +} + +func @entry() { + // Set up memory. + %c0 = constant 0: index + %c1 = constant 1: index + %c16 = constant 16: index + %A = alloc(%c16) : memref + %z = constant 0.0: f32 + %v = vector.broadcast %z : f32 to vector<16xf32> + %value = scf.for %i = %c0 to %c16 step %c1 + iter_args(%v_iter = %v) -> (vector<16xf32>) { + store %z, %A[%i] : memref + %i32 = index_cast %i : index to i32 + %fi = sitofp %i32 : i32 to f32 + %v_new = vector.insertelement %fi, %v_iter[%i32 : i32] : vector<16xf32> + scf.yield %v_new : vector<16xf32> + } + + // Set up masks. + %f = constant 0: i1 + %t = constant 1: i1 + %none = vector.constant_mask [0] : vector<16xi1> + %all = vector.constant_mask [16] : vector<16xi1> + %some1 = vector.constant_mask [4] : vector<16xi1> + %0 = vector.insert %f, %some1[0] : i1 into vector<16xi1> + %1 = vector.insert %t, %0[7] : i1 into vector<16xi1> + %2 = vector.insert %t, %1[11] : i1 into vector<16xi1> + %3 = vector.insert %t, %2[13] : i1 into vector<16xi1> + %some2 = vector.insert %t, %3[15] : i1 into vector<16xi1> + %some3 = vector.insert %f, %some2[2] : i1 into vector<16xi1> + + // + // Expanding load tests. + // + + call @compress16(%A, %none, %value) + : (memref, vector<16xi1>, vector<16xf32>) -> () + call @printmem16(%A) : (memref) -> () + // CHECK: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) + + call @compress16(%A, %all, %value) + : (memref, vector<16xi1>, vector<16xf32>) -> () + call @printmem16(%A) : (memref) -> () + // CHECK-NEXT: ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ) + + call @compress16(%A, %some3, %value) + : (memref, vector<16xi1>, vector<16xf32>) -> () + call @printmem16(%A) : (memref) -> () + // CHECK-NEXT: ( 1, 3, 7, 11, 13, 15, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ) + + call @compress16(%A, %some2, %value) + : (memref, vector<16xi1>, vector<16xf32>) -> () + call @printmem16(%A) : (memref) -> () + // CHECK-NEXT: ( 1, 2, 3, 7, 11, 13, 15, 7, 8, 9, 10, 11, 12, 13, 14, 15 ) + + call @compress16(%A, %some1, %value) + : (memref, vector<16xi1>, vector<16xf32>) -> () + call @printmem16(%A) : (memref) -> () + // CHECK-NEXT: ( 0, 1, 2, 3, 11, 13, 15, 7, 8, 9, 10, 11, 12, 13, 14, 15 ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-expand.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-expand.mlir new file mode 100644 index 0000000000000..74118fc1125b4 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-expand.mlir @@ -0,0 +1,82 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @expand16(%base: memref, + %mask: vector<16xi1>, + %pass_thru: vector<16xf32>) -> vector<16xf32> { + %e = vector.expandload %base, %mask, %pass_thru + : memref, vector<16xi1>, vector<16xf32> into vector<16xf32> + return %e : vector<16xf32> +} + +func @entry() { + // Set up memory. + %c0 = constant 0: index + %c1 = constant 1: index + %c16 = constant 16: index + %A = alloc(%c16) : memref + scf.for %i = %c0 to %c16 step %c1 { + %i32 = index_cast %i : index to i32 + %fi = sitofp %i32 : i32 to f32 + store %fi, %A[%i] : memref + } + + // Set up pass thru vector. + %u = constant -7.0: f32 + %v = constant 7.7: f32 + %pass = vector.broadcast %u : f32 to vector<16xf32> + + // Set up masks. + %f = constant 0: i1 + %t = constant 1: i1 + %none = vector.constant_mask [0] : vector<16xi1> + %all = vector.constant_mask [16] : vector<16xi1> + %some1 = vector.constant_mask [4] : vector<16xi1> + %0 = vector.insert %f, %some1[0] : i1 into vector<16xi1> + %1 = vector.insert %t, %0[7] : i1 into vector<16xi1> + %2 = vector.insert %t, %1[11] : i1 into vector<16xi1> + %3 = vector.insert %t, %2[13] : i1 into vector<16xi1> + %some2 = vector.insert %t, %3[15] : i1 into vector<16xi1> + %some3 = vector.insert %f, %some2[2] : i1 into vector<16xi1> + + // + // Expanding load tests. + // + + %e1 = call @expand16(%A, %none, %pass) + : (memref, vector<16xi1>, vector<16xf32>) -> (vector<16xf32>) + vector.print %e1 : vector<16xf32> + // CHECK: ( -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7 ) + + %e2 = call @expand16(%A, %all, %pass) + : (memref, vector<16xi1>, vector<16xf32>) -> (vector<16xf32>) + vector.print %e2 : vector<16xf32> + // CHECK-NEXT: ( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ) + + %e3 = call @expand16(%A, %some1, %pass) + : (memref, vector<16xi1>, vector<16xf32>) -> (vector<16xf32>) + vector.print %e3 : vector<16xf32> + // CHECK-NEXT: ( 0, 1, 2, 3, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7, -7 ) + + %e4 = call @expand16(%A, %some2, %pass) + : (memref, vector<16xi1>, vector<16xf32>) -> (vector<16xf32>) + vector.print %e4 : vector<16xf32> + // CHECK-NEXT: ( -7, 0, 1, 2, -7, -7, -7, 3, -7, -7, -7, 4, -7, 5, -7, 6 ) + + %e5 = call @expand16(%A, %some3, %pass) + : (memref, vector<16xi1>, vector<16xf32>) -> (vector<16xf32>) + vector.print %e5 : vector<16xf32> + // CHECK-NEXT: ( -7, 0, -7, 1, -7, -7, -7, 2, -7, -7, -7, 3, -7, 4, -7, 5 ) + + %4 = vector.insert %v, %pass[1] : f32 into vector<16xf32> + %5 = vector.insert %v, %4[2] : f32 into vector<16xf32> + %alt_pass = vector.insert %v, %5[14] : f32 into vector<16xf32> + %e6 = call @expand16(%A, %some3, %alt_pass) + : (memref, vector<16xi1>, vector<16xf32>) -> (vector<16xf32>) + vector.print %e6 : vector<16xf32> + // CHECK-NEXT: ( -7, 0, 7.7, 1, -7, -7, -7, 2, -7, -7, -7, 3, -7, 4, 7.7, 5 ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-scatter.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-scatter.mlir index 6dd0cf1695525..54171e7446055 100644 --- a/mlir/integration_test/Dialect/Vector/CPU/test-scatter.mlir +++ b/mlir/integration_test/Dialect/Vector/CPU/test-scatter.mlir @@ -11,34 +11,20 @@ func @scatter8(%base: memref, return } -func @printmem(%A: memref) { - %f = constant 0.0: f32 - %0 = vector.broadcast %f : f32 to vector<8xf32> - %1 = constant 0: index - %2 = load %A[%1] : memref - %3 = vector.insert %2, %0[0] : f32 into vector<8xf32> - %4 = constant 1: index - %5 = load %A[%4] : memref - %6 = vector.insert %5, %3[1] : f32 into vector<8xf32> - %7 = constant 2: index - %8 = load %A[%7] : memref - %9 = vector.insert %8, %6[2] : f32 into vector<8xf32> - %10 = constant 3: index - %11 = load %A[%10] : memref - %12 = vector.insert %11, %9[3] : f32 into vector<8xf32> - %13 = constant 4: index - %14 = load %A[%13] : memref - %15 = vector.insert %14, %12[4] : f32 into vector<8xf32> - %16 = constant 5: index - %17 = load %A[%16] : memref - %18 = vector.insert %17, %15[5] : f32 into vector<8xf32> - %19 = constant 6: index - %20 = load %A[%19] : memref - %21 = vector.insert %20, %18[6] : f32 into vector<8xf32> - %22 = constant 7: index - %23 = load %A[%22] : memref - %24 = vector.insert %23, %21[7] : f32 into vector<8xf32> - vector.print %24 : vector<8xf32> +func @printmem8(%A: memref) { + %c0 = constant 0: index + %c1 = constant 1: index + %c8 = constant 8: index + %z = constant 0.0: f32 + %m = vector.broadcast %z : f32 to vector<8xf32> + %mem = scf.for %i = %c0 to %c8 step %c1 + iter_args(%m_iter = %m) -> (vector<8xf32>) { + %c = load %A[%i] : memref + %i32 = index_cast %i : index to i32 + %m_new = vector.insertelement %c, %m_iter[%i32 : i32] : vector<8xf32> + scf.yield %m_new : vector<8xf32> + } + vector.print %mem : vector<8xf32> return } @@ -104,31 +90,27 @@ func @entry() { vector.print %idx : vector<8xi32> // CHECK: ( 7, 0, 1, 6, 2, 4, 5, 3 ) - call @printmem(%A) : (memref) -> () + call @printmem8(%A) : (memref) -> () // CHECK: ( 0, 1, 2, 3, 4, 5, 6, 7 ) call @scatter8(%A, %idx, %none, %val) : (memref, vector<8xi32>, vector<8xi1>, vector<8xf32>) -> () - - call @printmem(%A) : (memref) -> () + call @printmem8(%A) : (memref) -> () // CHECK: ( 0, 1, 2, 3, 4, 5, 6, 7 ) call @scatter8(%A, %idx, %some, %val) : (memref, vector<8xi32>, vector<8xi1>, vector<8xf32>) -> () - - call @printmem(%A) : (memref) -> () + call @printmem8(%A) : (memref) -> () // CHECK: ( 1, 2, 2, 3, 4, 5, 3, 0 ) call @scatter8(%A, %idx, %more, %val) : (memref, vector<8xi32>, vector<8xi1>, vector<8xf32>) -> () - - call @printmem(%A) : (memref) -> () + call @printmem8(%A) : (memref) -> () // CHECK: ( 1, 2, 2, 7, 4, 5, 3, 0 ) call @scatter8(%A, %idx, %all, %val) : (memref, vector<8xi32>, vector<8xi1>, vector<8xf32>) -> () - - call @printmem(%A) : (memref) -> () + call @printmem8(%A) : (memref) -> () // CHECK: ( 1, 2, 4, 7, 5, 6, 3, 0 ) return diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 3dbfaf88a443b..23373f5c7edff 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -134,11 +134,9 @@ LogicalResult getMemRefAlignment(LLVMTypeConverter &typeConverter, T op, return success(); } -// Helper that returns vector of pointers given a base and an index vector. -LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter, - LLVMTypeConverter &typeConverter, Location loc, - Value memref, Value indices, MemRefType memRefType, - VectorType vType, Type iType, Value &ptrs) { +// Helper that returns the base address of a memref. +LogicalResult getBase(ConversionPatternRewriter &rewriter, Location loc, + Value memref, MemRefType memRefType, Value &base) { // Inspect stride and offset structure. // // TODO: flat memory only for now, generalize @@ -149,13 +147,31 @@ LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter, if (failed(successStrides) || strides.size() != 1 || strides[0] != 1 || offset != 0 || memRefType.getMemorySpace() != 0) return failure(); + base = MemRefDescriptor(memref).alignedPtr(rewriter, loc); + return success(); +} + +// Helper that returns a pointer given a memref base. +LogicalResult getBasePtr(ConversionPatternRewriter &rewriter, Location loc, + Value memref, MemRefType memRefType, Value &ptr) { + Value base; + if (failed(getBase(rewriter, loc, memref, memRefType, base))) + return failure(); + auto pType = MemRefDescriptor(memref).getElementType(); + ptr = rewriter.create(loc, pType, base); + return success(); +} - // Create a vector of pointers from base and indices. - MemRefDescriptor memRefDescriptor(memref); - Value base = memRefDescriptor.alignedPtr(rewriter, loc); - int64_t size = vType.getDimSize(0); - auto pType = memRefDescriptor.getElementType(); - auto ptrsType = LLVM::LLVMType::getVectorTy(pType, size); +// Helper that returns vector of pointers given a memref base and an index +// vector. +LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter, Location loc, + Value memref, Value indices, MemRefType memRefType, + VectorType vType, Type iType, Value &ptrs) { + Value base; + if (failed(getBase(rewriter, loc, memref, memRefType, base))) + return failure(); + auto pType = MemRefDescriptor(memref).getElementType(); + auto ptrsType = LLVM::LLVMType::getVectorTy(pType, vType.getDimSize(0)); ptrs = rewriter.create(loc, ptrsType, base, indices); return success(); } @@ -305,9 +321,8 @@ class VectorGatherOpConversion : public ConvertToLLVMPattern { VectorType vType = gather.getResultVectorType(); Type iType = gather.getIndicesVectorType().getElementType(); Value ptrs; - if (failed(getIndexedPtrs(rewriter, typeConverter, loc, adaptor.base(), - adaptor.indices(), gather.getMemRefType(), vType, - iType, ptrs))) + if (failed(getIndexedPtrs(rewriter, loc, adaptor.base(), adaptor.indices(), + gather.getMemRefType(), vType, iType, ptrs))) return failure(); // Replace with the gather intrinsic. @@ -344,9 +359,8 @@ class VectorScatterOpConversion : public ConvertToLLVMPattern { VectorType vType = scatter.getValueVectorType(); Type iType = scatter.getIndicesVectorType().getElementType(); Value ptrs; - if (failed(getIndexedPtrs(rewriter, typeConverter, loc, adaptor.base(), - adaptor.indices(), scatter.getMemRefType(), vType, - iType, ptrs))) + if (failed(getIndexedPtrs(rewriter, loc, adaptor.base(), adaptor.indices(), + scatter.getMemRefType(), vType, iType, ptrs))) return failure(); // Replace with the scatter intrinsic. @@ -357,6 +371,60 @@ class VectorScatterOpConversion : public ConvertToLLVMPattern { } }; +/// Conversion pattern for a vector.expandload. +class VectorExpandLoadOpConversion : public ConvertToLLVMPattern { +public: + explicit VectorExpandLoadOpConversion(MLIRContext *context, + LLVMTypeConverter &typeConverter) + : ConvertToLLVMPattern(vector::ExpandLoadOp::getOperationName(), context, + typeConverter) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + auto expand = cast(op); + auto adaptor = vector::ExpandLoadOpAdaptor(operands); + + Value ptr; + if (failed(getBasePtr(rewriter, loc, adaptor.base(), expand.getMemRefType(), + ptr))) + return failure(); + + auto vType = expand.getResultVectorType(); + rewriter.replaceOpWithNewOp( + op, typeConverter.convertType(vType), ptr, adaptor.mask(), + adaptor.pass_thru()); + return success(); + } +}; + +/// Conversion pattern for a vector.compressstore. +class VectorCompressStoreOpConversion : public ConvertToLLVMPattern { +public: + explicit VectorCompressStoreOpConversion(MLIRContext *context, + LLVMTypeConverter &typeConverter) + : ConvertToLLVMPattern(vector::CompressStoreOp::getOperationName(), + context, typeConverter) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto loc = op->getLoc(); + auto compress = cast(op); + auto adaptor = vector::CompressStoreOpAdaptor(operands); + + Value ptr; + if (failed(getBasePtr(rewriter, loc, adaptor.base(), + compress.getMemRefType(), ptr))) + return failure(); + + rewriter.replaceOpWithNewOp( + op, adaptor.value(), ptr, adaptor.mask()); + return success(); + } +}; + /// Conversion pattern for all vector reductions. class VectorReductionOpConversion : public ConvertToLLVMPattern { public: @@ -1274,7 +1342,9 @@ void mlir::populateVectorToLLVMConversionPatterns( VectorTransferConversion, VectorTypeCastOpConversion, VectorGatherOpConversion, - VectorScatterOpConversion>(ctx, converter); + VectorScatterOpConversion, + VectorExpandLoadOpConversion, + VectorCompressStoreOpConversion>(ctx, converter); // clang-format on } diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index c788d4ccb4a08..9e64ff9af80a3 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -1898,6 +1898,41 @@ static LogicalResult verify(ScatterOp op) { return success(); } +//===----------------------------------------------------------------------===// +// ExpandLoadOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(ExpandLoadOp op) { + VectorType maskVType = op.getMaskVectorType(); + VectorType passVType = op.getPassThruVectorType(); + VectorType resVType = op.getResultVectorType(); + + if (resVType.getElementType() != op.getMemRefType().getElementType()) + return op.emitOpError("base and result element type should match"); + + if (resVType.getDimSize(0) != maskVType.getDimSize(0)) + return op.emitOpError("expected result dim to match mask dim"); + if (resVType != passVType) + return op.emitOpError("expected pass_thru of same type as result type"); + return success(); +} + +//===----------------------------------------------------------------------===// +// CompressStoreOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(CompressStoreOp op) { + VectorType maskVType = op.getMaskVectorType(); + VectorType valueVType = op.getValueVectorType(); + + if (valueVType.getElementType() != op.getMemRefType().getElementType()) + return op.emitOpError("base and value element type should match"); + + if (valueVType.getDimSize(0) != maskVType.getDimSize(0)) + return op.emitOpError("expected value dim to match mask dim"); + return success(); +} + //===----------------------------------------------------------------------===// // ShapeCastOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index 2e5aae886c380..be70c08bc9486 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -989,3 +989,23 @@ func @scatter_op(%arg0: memref, %arg1: vector<3xi32>, %arg2: vector<3xi1> // CHECK: %[[P:.*]] = llvm.getelementptr {{.*}}[%{{.*}}] : (!llvm.ptr, !llvm.vec<3 x i32>) -> !llvm.vec<3 x ptr> // CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : !llvm.vec<3 x float>, !llvm.vec<3 x i1> into !llvm.vec<3 x ptr> // CHECK: llvm.return + +func @expand_load_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vector<11xf32>) -> vector<11xf32> { + %0 = vector.expandload %arg0, %arg1, %arg2 : memref, vector<11xi1>, vector<11xf32> into vector<11xf32> + return %0 : vector<11xf32> +} + +// CHECK-LABEL: func @expand_load_op +// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[] : (!llvm.ptr) -> !llvm.ptr +// CHECK: %[[E:.*]] = "llvm.intr.masked.expandload"(%[[P]], %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.vec<11 x i1>, !llvm.vec<11 x float>) -> !llvm.vec<11 x float> +// CHECK: llvm.return %[[E]] : !llvm.vec<11 x float> + +func @compress_store_op(%arg0: memref, %arg1: vector<11xi1>, %arg2: vector<11xf32>) { + vector.compressstore %arg0, %arg1, %arg2 : memref, vector<11xi1>, vector<11xf32> + return +} + +// CHECK-LABEL: func @compress_store_op +// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[] : (!llvm.ptr) -> !llvm.ptr +// CHECK: "llvm.intr.masked.compressstore"(%{{.*}}, %[[P]], %{{.*}}) : (!llvm.vec<11 x float>, !llvm.ptr, !llvm.vec<11 x i1>) -> () +// CHECK: llvm.return diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index ea354f51645af..651fe27cd36c7 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1240,3 +1240,38 @@ func @scatter_dim_mask_mismatch(%base: memref, %indices: vector<16xi32>, // expected-error@+1 {{'vector.scatter' op expected value dim to match mask dim}} vector.scatter %base, %indices, %mask, %value : vector<16xi32>, vector<17xi1>, vector<16xf32> into memref } + +// ----- + +func @expand_base_type_mismatch(%base: memref, %mask: vector<16xi1>, %pass_thru: vector<16xf32>) { + // expected-error@+1 {{'vector.expandload' op base and result element type should match}} + %0 = vector.expandload %base, %mask, %pass_thru : memref, vector<16xi1>, vector<16xf32> into vector<16xf32> +} + +// ----- + +func @expand_dim_mask_mismatch(%base: memref, %mask: vector<17xi1>, %pass_thru: vector<16xf32>) { + // expected-error@+1 {{'vector.expandload' op expected result dim to match mask dim}} + %0 = vector.expandload %base, %mask, %pass_thru : memref, vector<17xi1>, vector<16xf32> into vector<16xf32> +} + +// ----- + +func @expand_pass_thru_mismatch(%base: memref, %mask: vector<16xi1>, %pass_thru: vector<17xf32>) { + // expected-error@+1 {{'vector.expandload' op expected pass_thru of same type as result type}} + %0 = vector.expandload %base, %mask, %pass_thru : memref, vector<16xi1>, vector<17xf32> into vector<16xf32> +} + +// ----- + +func @compress_base_type_mismatch(%base: memref, %mask: vector<16xi1>, %value: vector<16xf32>) { + // expected-error@+1 {{'vector.compressstore' op base and value element type should match}} + vector.compressstore %base, %mask, %value : memref, vector<16xi1>, vector<16xf32> +} + +// ----- + +func @compress_dim_mask_mismatch(%base: memref, %mask: vector<17xi1>, %value: vector<16xf32>) { + // expected-error@+1 {{'vector.compressstore' op expected value dim to match mask dim}} + vector.compressstore %base, %mask, %value : memref, vector<17xi1>, vector<16xf32> +} diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 0bf4ed8f84c7c..d4d1abe8e646c 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -379,3 +379,12 @@ func @gather_and_scatter(%base: memref, %indices: vector<16xi32>, %mask: vector.scatter %base, %indices, %mask, %1 : vector<16xi32>, vector<16xi1>, vector<16xf32> into memref return } + +// CHECK-LABEL: @expand_and_compress +func @expand_and_compress(%base: memref, %mask: vector<16xi1>, %passthru: vector<16xf32>) { + // CHECK: %[[X:.*]] = vector.expandload %{{.*}}, %{{.*}}, %{{.*}} : memref, vector<16xi1>, vector<16xf32> into vector<16xf32> + %0 = vector.expandload %base, %mask, %passthru : memref, vector<16xi1>, vector<16xf32> into vector<16xf32> + // CHECK: vector.compressstore %{{.*}}, %{{.*}}, %[[X]] : memref, vector<16xi1>, vector<16xf32> + vector.compressstore %base, %mask, %0 : memref, vector<16xi1>, vector<16xf32> + return +} diff --git a/mlir/test/Target/llvmir-intrinsics.mlir b/mlir/test/Target/llvmir-intrinsics.mlir index fc286599ee955..6bf9b9768dd3a 100644 --- a/mlir/test/Target/llvmir-intrinsics.mlir +++ b/mlir/test/Target/llvmir-intrinsics.mlir @@ -237,8 +237,8 @@ llvm.func @matrix_intrinsics(%A: !llvm.vec<64 x float>, %B: !llvm.vec<48 x float llvm.return } -// CHECK-LABEL: @masked_intrinsics -llvm.func @masked_intrinsics(%A: !llvm.ptr>, %mask: !llvm.vec<7 x i1>) { +// CHECK-LABEL: @masked_load_store_intrinsics +llvm.func @masked_load_store_intrinsics(%A: !llvm.ptr>, %mask: !llvm.vec<7 x i1>) { // CHECK: call <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>* %{{.*}}, i32 1, <7 x i1> %{{.*}}, <7 x float> undef) %a = llvm.intr.masked.load %A, %mask { alignment = 1: i32} : (!llvm.ptr>, !llvm.vec<7 x i1>) -> !llvm.vec<7 x float> @@ -265,6 +265,17 @@ llvm.func @masked_gather_scatter_intrinsics(%M: !llvm.vec<7 x ptr>, %mask llvm.return } +// CHECK-LABEL: @masked_expand_compress_intrinsics +llvm.func @masked_expand_compress_intrinsics(%ptr: !llvm.ptr, %mask: !llvm.vec<7 x i1>, %passthru: !llvm.vec<7 x float>) { + // CHECK: call <7 x float> @llvm.masked.expandload.v7f32(float* %{{.*}}, <7 x i1> %{{.*}}, <7 x float> %{{.*}}) + %0 = "llvm.intr.masked.expandload"(%ptr, %mask, %passthru) + : (!llvm.ptr, !llvm.vec<7 x i1>, !llvm.vec<7 x float>) -> (!llvm.vec<7 x float>) + // CHECK: call void @llvm.masked.compressstore.v7f32(<7 x float> %{{.*}}, float* %{{.*}}, <7 x i1> %{{.*}}) + "llvm.intr.masked.compressstore"(%0, %ptr, %mask) + : (!llvm.vec<7 x float>, !llvm.ptr, !llvm.vec<7 x i1>) -> () + llvm.return +} + // CHECK-LABEL: @memcpy_test llvm.func @memcpy_test(%arg0: !llvm.i32, %arg1: !llvm.i1, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 %{{.*}}) From d0ad9e93ce546eb3d372f1b815b164ba137fd701 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 4 Aug 2020 15:09:05 -0400 Subject: [PATCH 353/600] [libc++abi] Make sure we use a 32 bit guard on 32 bit Aarch64 --- libcxxabi/include/__cxxabi_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxxabi/include/__cxxabi_config.h b/libcxxabi/include/__cxxabi_config.h index b5444d67ee44b..98f55b2a256d9 100644 --- a/libcxxabi/include/__cxxabi_config.h +++ b/libcxxabi/include/__cxxabi_config.h @@ -72,7 +72,7 @@ #endif // wasm32 follows the arm32 ABI convention of using 32-bit guard. -#if defined(__arm__) || defined(__wasm32__) +#if defined(__arm__) || defined(__wasm32__) || defined(__ARM64_ARCH_8_32__) # define _LIBCXXABI_GUARD_ABI_ARM #endif From 14ed5cf5c461cf34faf5729424b4221d30e24dcc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 26 Jul 2020 17:44:28 -0400 Subject: [PATCH 354/600] AMDGPU/GlobalISel: Add baseline tests for andn2/orn2 matching --- llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll | 719 +++++++++++++++++++ llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll | 719 +++++++++++++++++++ 2 files changed, 1438 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll new file mode 100644 index 0000000000000..29bf1b0280762 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll @@ -0,0 +1,719 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) { +; GCN-LABEL: s_andn2_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s0, s3 +; GCN-NEXT: s_and_b32 s0, s2, s0 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %and = and i32 %src0, %not.src1 + ret i32 %and +} + +define amdgpu_ps i32 @s_andn2_i32_commute(i32 inreg %src0, i32 inreg %src1) { +; GCN-LABEL: s_andn2_i32_commute: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s0, s3 +; GCN-NEXT: s_and_b32 s0, s0, s2 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %and = and i32 %not.src1, %src0 + ret i32 %and +} + +define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_use(i32 inreg %src0, i32 inreg %src1) { +; GCN-LABEL: s_andn2_i32_multi_use: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s1, s3 +; GCN-NEXT: s_and_b32 s0, s2, s1 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %and = and i32 %src0, %not.src1 + %insert.0 = insertvalue { i32, i32 } undef, i32 %and, 0 + %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %not.src1, 1 + ret { i32, i32 } %insert.1 +} + +define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) { +; GCN-LABEL: s_andn2_i32_multi_foldable_use: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s1, s4 +; GCN-NEXT: s_and_b32 s0, s2, s1 +; GCN-NEXT: s_and_b32 s1, s3, s1 +; GCN-NEXT: ; return to shader part epilog + %not.src2 = xor i32 %src2, -1 + %and0 = and i32 %src0, %not.src2 + %and1 = and i32 %src1, %not.src2 + %insert.0 = insertvalue { i32, i32 } undef, i32 %and0, 0 + %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %and1, 1 + ret { i32, i32 } %insert.1 +} + +define i32 @v_andn2_i32(i32 %src0, i32 %src1) { +; GCN-LABEL: v_andn2_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 +; GCN-NEXT: v_and_b32_e32 v0, v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor i32 %src1, -1 + %and = and i32 %src0, %not.src1 + ret i32 %and +} + +define amdgpu_ps float @v_andn2_i32_sv(i32 inreg %src0, i32 %src1) { +; GCN-LABEL: v_andn2_i32_sv: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, -1, v0 +; GCN-NEXT: v_and_b32_e32 v0, s2, v0 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %and = and i32 %src0, %not.src1 + %cast = bitcast i32 %and to float + ret float %cast +} + +define amdgpu_ps float @v_andn2_i32_vs(i32 %src0, i32 inreg %src1) { +; GCN-LABEL: v_andn2_i32_vs: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s0, s2 +; GCN-NEXT: v_and_b32_e32 v0, s0, v0 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %and = and i32 %src0, %not.src1 + %cast = bitcast i32 %and to float + ret float %cast +} + +define amdgpu_ps i64 @s_andn2_i64(i64 inreg %src0, i64 inreg %src1) { +; GCN-LABEL: s_andn2_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[0:1], s[4:5] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %and = and i64 %src0, %not.src1 + ret i64 %and +} + +define amdgpu_ps i64 @s_andn2_i64_commute(i64 inreg %src0, i64 inreg %src1) { +; GCN-LABEL: s_andn2_i64_commute: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[0:1], s[4:5] +; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %and = and i64 %not.src1, %src0 + ret i64 %and +} + +define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) { +; GCN-LABEL: s_andn2_i64_multi_foldable_use: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[6:7], s[6:7] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] +; GCN-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] +; GCN-NEXT: ; return to shader part epilog + %not.src2 = xor i64 %src2, -1 + %and0 = and i64 %src0, %not.src2 + %and1 = and i64 %src1, %not.src2 + %insert.0 = insertvalue { i64, i64 } undef, i64 %and0, 0 + %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %and1, 1 + ret { i64, i64 } %insert.1 +} + +define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) { +; GCN-LABEL: s_andn2_i64_multi_use: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[4:5], s[4:5] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %and = and i64 %src0, %not.src1 + %insert.0 = insertvalue { i64, i64 } undef, i64 %and, 0 + %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %not.src1, 1 + ret { i64, i64 } %insert.1 +} + +define i64 @v_andn2_i64(i64 %src0, i64 %src1) { +; GCN-LABEL: v_andn2_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v2, -1, v2 +; GCN-NEXT: v_xor_b32_e32 v3, -1, v3 +; GCN-NEXT: v_and_b32_e32 v0, v0, v2 +; GCN-NEXT: v_and_b32_e32 v1, v1, v3 +; GCN-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor i64 %src1, -1 + %and = and i64 %src0, %not.src1 + ret i64 %and +} + +define amdgpu_ps <2 x float> @v_andn2_i64_sv(i64 inreg %src0, i64 %src1) { +; GCN-LABEL: v_andn2_i64_sv: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, -1, v0 +; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 +; GCN-NEXT: v_and_b32_e32 v0, s2, v0 +; GCN-NEXT: v_and_b32_e32 v1, s3, v1 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %and = and i64 %src0, %not.src1 + %cast = bitcast i64 %and to <2 x float> + ret <2 x float> %cast +} + +define amdgpu_ps <2 x float> @v_andn2_i64_vs(i64 %src0, i64 inreg %src1) { +; GCN-LABEL: v_andn2_i64_vs: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[0:1], s[2:3] +; GCN-NEXT: v_and_b32_e32 v0, s0, v0 +; GCN-NEXT: v_and_b32_e32 v1, s1, v1 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %and = and i64 %src0, %not.src1 + %cast = bitcast i64 %and to <2 x float> + ret <2 x float> %cast +} + +define amdgpu_ps <2 x i32> @s_andn2_v2i32(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GCN-LABEL: s_andn2_v2i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s0, -1 +; GCN-NEXT: s_mov_b32 s1, s0 +; GCN-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i32> %src1, + %and = and <2 x i32> %src0, %not.src1 + ret <2 x i32> %and +} + +define amdgpu_ps <2 x i32> @s_andn2_v2i32_commute(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GCN-LABEL: s_andn2_v2i32_commute: +; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s0, -1 +; GCN-NEXT: s_mov_b32 s1, s0 +; GCN-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i32> %src1, + %and = and <2 x i32> %not.src1, %src0 + ret <2 x i32> %and +} + +define amdgpu_ps i16 @s_andn2_i16(i16 inreg %src0, i16 inreg %src1) { +; GFX6-LABEL: s_andn2_i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s0, s3, -1 +; GFX6-NEXT: s_and_b32 s0, s2, s0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NEXT: s_and_b32 s1, s3, s0 +; GFX9-NEXT: s_xor_b32 s1, s1, s0 +; GFX9-NEXT: s_and_b32 s2, s2, s0 +; GFX9-NEXT: s_and_b32 s0, s1, s0 +; GFX9-NEXT: s_and_b32 s0, s2, s0 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %and = and i16 %src0, %not.src1 + ret i16 %and +} + +define amdgpu_ps i16 @s_andn2_i16_commute(i16 inreg %src0, i16 inreg %src1) { +; GFX6-LABEL: s_andn2_i16_commute: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s0, s3, -1 +; GFX6-NEXT: s_and_b32 s0, s0, s2 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_i16_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NEXT: s_and_b32 s1, s3, s0 +; GFX9-NEXT: s_xor_b32 s1, s1, s0 +; GFX9-NEXT: s_and_b32 s1, s1, s0 +; GFX9-NEXT: s_and_b32 s0, s2, s0 +; GFX9-NEXT: s_and_b32 s0, s1, s0 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %and = and i16 %not.src1, %src0 + ret i16 %and +} + +define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_use(i16 inreg %src0, i16 inreg %src1) { +; GFX6-LABEL: s_andn2_i16_multi_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s1, s3, -1 +; GFX6-NEXT: s_and_b32 s0, s2, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_i16_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NEXT: s_and_b32 s1, s3, s0 +; GFX9-NEXT: s_xor_b32 s1, s1, s0 +; GFX9-NEXT: s_and_b32 s2, s2, s0 +; GFX9-NEXT: s_and_b32 s0, s1, s0 +; GFX9-NEXT: s_and_b32 s0, s2, s0 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %and = and i16 %src0, %not.src1 + %insert.0 = insertvalue { i16, i16 } undef, i16 %and, 0 + %insert.1 = insertvalue { i16, i16 } %insert.0, i16 %not.src1, 1 + ret { i16, i16 } %insert.1 +} + +define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_foldable_use(i16 inreg %src0, i16 inreg %src1, i16 inreg %src2) { +; GFX6-LABEL: s_andn2_i16_multi_foldable_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s1, s4, -1 +; GFX6-NEXT: s_and_b32 s0, s2, s1 +; GFX6-NEXT: s_and_b32 s1, s3, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_i16_multi_foldable_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s1, 0xffff +; GFX9-NEXT: s_and_b32 s0, s4, s1 +; GFX9-NEXT: s_xor_b32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s2, s2, s1 +; GFX9-NEXT: s_and_b32 s4, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s3, s1 +; GFX9-NEXT: s_and_b32 s0, s2, s4 +; GFX9-NEXT: s_and_b32 s1, s1, s4 +; GFX9-NEXT: ; return to shader part epilog + %not.src2 = xor i16 %src2, -1 + %and0 = and i16 %src0, %not.src2 + %and1 = and i16 %src1, %not.src2 + %insert.0 = insertvalue { i16, i16 } undef, i16 %and0, 0 + %insert.1 = insertvalue { i16, i16 } %insert.0, i16 %and1, 1 + ret { i16, i16 } %insert.1 +} + +define i16 @v_andn2_i16(i16 %src0, i16 %src1) { +; GCN-LABEL: v_andn2_i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 +; GCN-NEXT: v_and_b32_e32 v0, v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor i16 %src1, -1 + %and = and i16 %src0, %not.src1 + ret i16 %and +} + +define amdgpu_ps float @v_andn2_i16_sv(i16 inreg %src0, i16 %src1) { +; GCN-LABEL: v_andn2_i16_sv: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, -1, v0 +; GCN-NEXT: v_and_b32_e32 v0, s2, v0 +; GCN-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %and = and i16 %src0, %not.src1 + %zext = zext i16 %and to i32 + %cast.zext = bitcast i32 %zext to float + ret float %cast.zext +} + +define amdgpu_ps float @v_andn2_i16_vs(i16 %src0, i16 inreg %src1) { +; GFX6-LABEL: v_andn2_i16_vs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s0, s2, -1 +; GFX6-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_andn2_i16_vs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NEXT: s_xor_b32 s0, s1, s0 +; GFX9-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX9-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %and = and i16 %src0, %not.src1 + %zext = zext i16 %and to i32 + %cast.zext = bitcast i32 %zext to float + ret float %cast.zext +} + +define amdgpu_ps i32 @s_andn2_v2i16(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { +; GFX6-LABEL: s_andn2_v2i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s1, 0xffff +; GFX6-NEXT: s_and_b32 s2, s2, s1 +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_and_b32 s1, s4, s1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_xor_b32 s1, s1, -1 +; GFX6-NEXT: s_and_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_xor_b32 s0, s3, -1 +; GFX9-NEXT: s_and_b32 s0, s2, s0 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i16> %src1, + %and = and <2 x i16> %src0, %not.src1 + %cast = bitcast <2 x i16> %and to i32 + ret i32 %cast +} + +define amdgpu_ps i32 @s_andn2_v2i16_commute(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { +; GFX6-LABEL: s_andn2_v2i16_commute: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s1, 0xffff +; GFX6-NEXT: s_and_b32 s2, s2, s1 +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_and_b32 s1, s4, s1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_xor_b32 s1, s1, -1 +; GFX6-NEXT: s_and_b32 s0, s1, s0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_v2i16_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_xor_b32 s0, s3, -1 +; GFX9-NEXT: s_and_b32 s0, s0, s2 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i16> %src1, + %and = and <2 x i16> %not.src1, %src0 + %cast = bitcast <2 x i16> %and to i32 + ret i32 %cast +} + +define amdgpu_ps { i32, i32 } @s_andn2_v2i16_multi_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { +; GFX6-LABEL: s_andn2_v2i16_multi_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s1, 0xffff +; GFX6-NEXT: s_and_b32 s2, s2, s1 +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_and_b32 s1, s4, s1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_xor_b32 s1, s1, -1 +; GFX6-NEXT: s_and_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_v2i16_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_xor_b32 s1, s3, -1 +; GFX9-NEXT: s_and_b32 s0, s2, s1 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i16> %src1, + %and = and <2 x i16> %src0, %not.src1 + + %cast.0 = bitcast <2 x i16> %and to i32 + %cast.1 = bitcast <2 x i16> %not.src1 to i32 + %insert.0 = insertvalue { i32, i32 } undef, i32 %cast.0, 0 + %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %cast.1, 1 + ret { i32, i32 } %insert.1 +} + +define amdgpu_ps { i32, i32 } @s_andn2_v2i16_multi_foldable_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1, <2 x i16> inreg %src2) { +; GFX6-LABEL: s_andn2_v2i16_multi_foldable_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s1, 0xffff +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_and_b32 s2, s2, s1 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_and_b32 s3, s4, s1 +; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s3 +; GFX6-NEXT: s_lshl_b32 s3, s7, 16 +; GFX6-NEXT: s_and_b32 s1, s6, s1 +; GFX6-NEXT: s_or_b32 s1, s3, s1 +; GFX6-NEXT: s_xor_b32 s1, s1, -1 +; GFX6-NEXT: s_and_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s1, s2, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_v2i16_multi_foldable_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_xor_b32 s1, s4, -1 +; GFX9-NEXT: s_and_b32 s0, s2, s1 +; GFX9-NEXT: s_and_b32 s1, s3, s1 +; GFX9-NEXT: ; return to shader part epilog + %not.src2 = xor <2 x i16> %src2, + %and0 = and <2 x i16> %src0, %not.src2 + %and1 = and <2 x i16> %src1, %not.src2 + + %cast.0 = bitcast <2 x i16> %and0 to i32 + %cast.1 = bitcast <2 x i16> %and1 to i32 + %insert.0 = insertvalue { i32, i32 } undef, i32 %cast.0, 0 + %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %cast.1, 1 + ret { i32, i32 } %insert.1 +} + +define <2 x i16> @v_andn2_v2i16(<2 x i16> %src0, <2 x i16> %src1) { +; GFX6-LABEL: v_andn2_v2i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v0, v0, v4 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 +; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX6-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_andn2_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX9-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor <2 x i16> %src1, + %and = and <2 x i16> %src0, %not.src1 + ret <2 x i16> %and +} + +; FIXME: +; define amdgpu_ps i48 @s_andn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1) { +; %not.src1 = xor <3 x i16> %src1, +; %and = and <3 x i16> %src0, %not.src1 +; %cast = bitcast <3 x i16> %and to i48 +; ret i48 %cast +; } + +; define amdgpu_ps i48 @s_andn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inreg %src1) { +; %not.src1 = xor <3 x i16> %src1, +; %and = and <3 x i16> %not.src1, %src0 +; %cast = bitcast <3 x i16> %and to i48 +; ret i48 %cast +; } + +; define amdgpu_ps { i48, i48 } @s_andn2_v3i16_multi_use(<3 x i16> inreg %src0, <3 x i16> inreg %src1) { +; %not.src1 = xor <3 x i16> %src1, +; %and = and <3 x i16> %src0, %not.src1 + +; %cast.0 = bitcast <3 x i16> %and to i48 +; %cast.1 = bitcast <3 x i16> %not.src1 to i48 +; %insert.0 = insertvalue { i48, i48 } undef, i48 %cast.0, 0 +; %insert.1 = insertvalue { i48, i48 } %insert.0, i48 %cast.1, 1 +; ret { i48, i48 } %insert.1 +; } + +; define <3 x i16> @v_andn2_v3i16(<3 x i16> %src0, <3 x i16> %src1) { +; %not.src1 = xor <3 x i16> %src1, +; %and = and <3 x i16> %src0, %not.src1 +; ret <3 x i16> %and +; } + +define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1) { +; GFX6-LABEL: s_andn2_v4i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_mov_b32 s3, 0xffff +; GFX6-NEXT: s_and_b32 s1, s2, s3 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s2, s4, s3 +; GFX6-NEXT: s_lshl_b32 s1, s5, 16 +; GFX6-NEXT: s_or_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s4, s6, s3 +; GFX6-NEXT: s_lshl_b32 s2, s7, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s4 +; GFX6-NEXT: s_lshl_b32 s4, s9, 16 +; GFX6-NEXT: s_and_b32 s3, s8, s3 +; GFX6-NEXT: s_or_b32 s3, s4, s3 +; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: s_mov_b32 s5, s4 +; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 +; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <4 x i16> %src1, + %and = and <4 x i16> %src0, %not.src1 + %cast = bitcast <4 x i16> %and to i64 + ret i64 %cast +} + +define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inreg %src1) { +; GFX6-LABEL: s_andn2_v4i16_commute: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_mov_b32 s3, 0xffff +; GFX6-NEXT: s_and_b32 s1, s2, s3 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s2, s4, s3 +; GFX6-NEXT: s_lshl_b32 s1, s5, 16 +; GFX6-NEXT: s_or_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s4, s6, s3 +; GFX6-NEXT: s_lshl_b32 s2, s7, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s4 +; GFX6-NEXT: s_lshl_b32 s4, s9, 16 +; GFX6-NEXT: s_and_b32 s3, s8, s3 +; GFX6-NEXT: s_or_b32 s3, s4, s3 +; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: s_mov_b32 s5, s4 +; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_v4i16_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 +; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GFX9-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <4 x i16> %src1, + %and = and <4 x i16> %not.src1, %src0 + %cast = bitcast <4 x i16> %and to i64 + ret i64 %cast +} + +define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4 x i16> inreg %src1) { +; GFX6-LABEL: s_andn2_v4i16_multi_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_mov_b32 s3, 0xffff +; GFX6-NEXT: s_and_b32 s1, s2, s3 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s2, s4, s3 +; GFX6-NEXT: s_lshl_b32 s1, s5, 16 +; GFX6-NEXT: s_or_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s4, s6, s3 +; GFX6-NEXT: s_lshl_b32 s2, s7, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s4 +; GFX6-NEXT: s_lshl_b32 s4, s9, 16 +; GFX6-NEXT: s_and_b32 s3, s8, s3 +; GFX6-NEXT: s_or_b32 s3, s4, s3 +; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: s_mov_b32 s5, s4 +; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_v4i16_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 +; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] +; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <4 x i16> %src1, + %and = and <4 x i16> %src0, %not.src1 + + %cast.0 = bitcast <4 x i16> %and to i64 + %cast.1 = bitcast <4 x i16> %not.src1 to i64 + %insert.0 = insertvalue { i64, i64 } undef, i64 %cast.0, 0 + %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %cast.1, 1 + ret { i64, i64 } %insert.1 +} + +define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg %src0, <4 x i16> inreg %src1, <4 x i16> inreg %src2) { +; GFX6-LABEL: s_andn2_v4i16_multi_foldable_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s14, 0xffff +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_and_b32 s1, s2, s14 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s1, s5, 16 +; GFX6-NEXT: s_and_b32 s2, s4, s14 +; GFX6-NEXT: s_or_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s3, s6, s14 +; GFX6-NEXT: s_lshl_b32 s2, s7, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s3 +; GFX6-NEXT: s_lshl_b32 s3, s9, 16 +; GFX6-NEXT: s_and_b32 s4, s8, s14 +; GFX6-NEXT: s_or_b32 s3, s3, s4 +; GFX6-NEXT: s_lshl_b32 s4, s11, 16 +; GFX6-NEXT: s_and_b32 s5, s10, s14 +; GFX6-NEXT: s_or_b32 s4, s4, s5 +; GFX6-NEXT: s_lshl_b32 s5, s13, 16 +; GFX6-NEXT: s_and_b32 s6, s12, s14 +; GFX6-NEXT: s_or_b32 s5, s5, s6 +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s7, s6 +; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] +; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] +; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_andn2_v4i16_multi_foldable_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 +; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1] +; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] +; GFX9-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] +; GFX9-NEXT: ; return to shader part epilog + %not.src2 = xor <4 x i16> %src2, + %and0 = and <4 x i16> %src0, %not.src2 + %and1 = and <4 x i16> %src1, %not.src2 + + %cast.0 = bitcast <4 x i16> %and0 to i64 + %cast.1 = bitcast <4 x i16> %and1 to i64 + %insert.0 = insertvalue { i64, i64 } undef, i64 %cast.0, 0 + %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %cast.1, 1 + ret { i64, i64 } %insert.1 +} + +define <4 x i16> @v_andn2_v4i16(<4 x i16> %src0, <4 x i16> %src1) { +; GFX6-LABEL: v_andn2_v4i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mov_b32_e32 v8, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 +; GFX6-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_and_b32_e32 v3, v4, v8 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX6-NEXT: v_and_b32_e32 v4, v6, v8 +; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3 +; GFX6-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX6-NEXT: v_and_b32_e32 v2, v1, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_andn2_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3 +; GFX9-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX9-NEXT: v_and_b32_e32 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor <4 x i16> %src1, + %and = and <4 x i16> %src0, %not.src1 + ret <4 x i16> %and +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll new file mode 100644 index 0000000000000..dd9758d621e85 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/orn2.ll @@ -0,0 +1,719 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +define amdgpu_ps i32 @s_orn2_i32(i32 inreg %src0, i32 inreg %src1) { +; GCN-LABEL: s_orn2_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s0, s3 +; GCN-NEXT: s_or_b32 s0, s2, s0 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %or = or i32 %src0, %not.src1 + ret i32 %or +} + +define amdgpu_ps i32 @s_orn2_i32_commute(i32 inreg %src0, i32 inreg %src1) { +; GCN-LABEL: s_orn2_i32_commute: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s0, s3 +; GCN-NEXT: s_or_b32 s0, s0, s2 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %or = or i32 %not.src1, %src0 + ret i32 %or +} + +define amdgpu_ps { i32, i32 } @s_orn2_i32_multi_use(i32 inreg %src0, i32 inreg %src1) { +; GCN-LABEL: s_orn2_i32_multi_use: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s1, s3 +; GCN-NEXT: s_or_b32 s0, s2, s1 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %or = or i32 %src0, %not.src1 + %insert.0 = insertvalue { i32, i32 } undef, i32 %or, 0 + %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %not.src1, 1 + ret { i32, i32 } %insert.1 +} + +define amdgpu_ps { i32, i32 } @s_orn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) { +; GCN-LABEL: s_orn2_i32_multi_foldable_use: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s1, s4 +; GCN-NEXT: s_or_b32 s0, s2, s1 +; GCN-NEXT: s_or_b32 s1, s3, s1 +; GCN-NEXT: ; return to shader part epilog + %not.src2 = xor i32 %src2, -1 + %or0 = or i32 %src0, %not.src2 + %or1 = or i32 %src1, %not.src2 + %insert.0 = insertvalue { i32, i32 } undef, i32 %or0, 0 + %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %or1, 1 + ret { i32, i32 } %insert.1 +} + +define i32 @v_orn2_i32(i32 %src0, i32 %src1) { +; GCN-LABEL: v_orn2_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 +; GCN-NEXT: v_or_b32_e32 v0, v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor i32 %src1, -1 + %or = or i32 %src0, %not.src1 + ret i32 %or +} + +define amdgpu_ps float @v_orn2_i32_sv(i32 inreg %src0, i32 %src1) { +; GCN-LABEL: v_orn2_i32_sv: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, -1, v0 +; GCN-NEXT: v_or_b32_e32 v0, s2, v0 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %or = or i32 %src0, %not.src1 + %cast = bitcast i32 %or to float + ret float %cast +} + +define amdgpu_ps float @v_orn2_i32_vs(i32 %src0, i32 inreg %src1) { +; GCN-LABEL: v_orn2_i32_vs: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b32 s0, s2 +; GCN-NEXT: v_or_b32_e32 v0, s0, v0 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i32 %src1, -1 + %or = or i32 %src0, %not.src1 + %cast = bitcast i32 %or to float + ret float %cast +} + +define amdgpu_ps i64 @s_orn2_i64(i64 inreg %src0, i64 inreg %src1) { +; GCN-LABEL: s_orn2_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[0:1], s[4:5] +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %or = or i64 %src0, %not.src1 + ret i64 %or +} + +define amdgpu_ps i64 @s_orn2_i64_commute(i64 inreg %src0, i64 inreg %src1) { +; GCN-LABEL: s_orn2_i64_commute: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[0:1], s[4:5] +; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %or = or i64 %not.src1, %src0 + ret i64 %or +} + +define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) { +; GCN-LABEL: s_orn2_i64_multi_foldable_use: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[6:7], s[6:7] +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] +; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[6:7] +; GCN-NEXT: ; return to shader part epilog + %not.src2 = xor i64 %src2, -1 + %or0 = or i64 %src0, %not.src2 + %or1 = or i64 %src1, %not.src2 + %insert.0 = insertvalue { i64, i64 } undef, i64 %or0, 0 + %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %or1, 1 + ret { i64, i64 } %insert.1 +} + +define amdgpu_ps { i64, i64 } @s_orn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) { +; GCN-LABEL: s_orn2_i64_multi_use: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[4:5], s[4:5] +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] +; GCN-NEXT: s_mov_b32 s2, s4 +; GCN-NEXT: s_mov_b32 s3, s5 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %or = or i64 %src0, %not.src1 + %insert.0 = insertvalue { i64, i64 } undef, i64 %or, 0 + %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %not.src1, 1 + ret { i64, i64 } %insert.1 +} + +define i64 @v_orn2_i64(i64 %src0, i64 %src1) { +; GCN-LABEL: v_orn2_i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v2, -1, v2 +; GCN-NEXT: v_xor_b32_e32 v3, -1, v3 +; GCN-NEXT: v_or_b32_e32 v0, v0, v2 +; GCN-NEXT: v_or_b32_e32 v1, v1, v3 +; GCN-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor i64 %src1, -1 + %or = or i64 %src0, %not.src1 + ret i64 %or +} + +define amdgpu_ps <2 x float> @v_orn2_i64_sv(i64 inreg %src0, i64 %src1) { +; GCN-LABEL: v_orn2_i64_sv: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, -1, v0 +; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 +; GCN-NEXT: v_or_b32_e32 v0, s2, v0 +; GCN-NEXT: v_or_b32_e32 v1, s3, v1 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %or = or i64 %src0, %not.src1 + %cast = bitcast i64 %or to <2 x float> + ret <2 x float> %cast +} + +define amdgpu_ps <2 x float> @v_orn2_i64_vs(i64 %src0, i64 inreg %src1) { +; GCN-LABEL: v_orn2_i64_vs: +; GCN: ; %bb.0: +; GCN-NEXT: s_not_b64 s[0:1], s[2:3] +; GCN-NEXT: v_or_b32_e32 v0, s0, v0 +; GCN-NEXT: v_or_b32_e32 v1, s1, v1 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i64 %src1, -1 + %or = or i64 %src0, %not.src1 + %cast = bitcast i64 %or to <2 x float> + ret <2 x float> %cast +} + +define amdgpu_ps <2 x i32> @s_orn2_v2i32(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GCN-LABEL: s_orn2_v2i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s0, -1 +; GCN-NEXT: s_mov_b32 s1, s0 +; GCN-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i32> %src1, + %or = or <2 x i32> %src0, %not.src1 + ret <2 x i32> %or +} + +define amdgpu_ps <2 x i32> @s_orn2_v2i32_commute(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { +; GCN-LABEL: s_orn2_v2i32_commute: +; GCN: ; %bb.0: +; GCN-NEXT: s_mov_b32 s0, -1 +; GCN-NEXT: s_mov_b32 s1, s0 +; GCN-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i32> %src1, + %or = or <2 x i32> %not.src1, %src0 + ret <2 x i32> %or +} + +define amdgpu_ps i16 @s_orn2_i16(i16 inreg %src0, i16 inreg %src1) { +; GFX6-LABEL: s_orn2_i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s0, s3, -1 +; GFX6-NEXT: s_or_b32 s0, s2, s0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NEXT: s_and_b32 s1, s3, s0 +; GFX9-NEXT: s_xor_b32 s1, s1, s0 +; GFX9-NEXT: s_and_b32 s2, s2, s0 +; GFX9-NEXT: s_and_b32 s0, s1, s0 +; GFX9-NEXT: s_or_b32 s0, s2, s0 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %or = or i16 %src0, %not.src1 + ret i16 %or +} + +define amdgpu_ps i16 @s_orn2_i16_commute(i16 inreg %src0, i16 inreg %src1) { +; GFX6-LABEL: s_orn2_i16_commute: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s0, s3, -1 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_i16_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NEXT: s_and_b32 s1, s3, s0 +; GFX9-NEXT: s_xor_b32 s1, s1, s0 +; GFX9-NEXT: s_and_b32 s1, s1, s0 +; GFX9-NEXT: s_and_b32 s0, s2, s0 +; GFX9-NEXT: s_or_b32 s0, s1, s0 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %or = or i16 %not.src1, %src0 + ret i16 %or +} + +define amdgpu_ps { i16, i16 } @s_orn2_i16_multi_use(i16 inreg %src0, i16 inreg %src1) { +; GFX6-LABEL: s_orn2_i16_multi_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s1, s3, -1 +; GFX6-NEXT: s_or_b32 s0, s2, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_i16_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NEXT: s_and_b32 s1, s3, s0 +; GFX9-NEXT: s_xor_b32 s1, s1, s0 +; GFX9-NEXT: s_and_b32 s2, s2, s0 +; GFX9-NEXT: s_and_b32 s0, s1, s0 +; GFX9-NEXT: s_or_b32 s0, s2, s0 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %or = or i16 %src0, %not.src1 + %insert.0 = insertvalue { i16, i16 } undef, i16 %or, 0 + %insert.1 = insertvalue { i16, i16 } %insert.0, i16 %not.src1, 1 + ret { i16, i16 } %insert.1 +} + +define amdgpu_ps { i16, i16 } @s_orn2_i16_multi_foldable_use(i16 inreg %src0, i16 inreg %src1, i16 inreg %src2) { +; GFX6-LABEL: s_orn2_i16_multi_foldable_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s1, s4, -1 +; GFX6-NEXT: s_or_b32 s0, s2, s1 +; GFX6-NEXT: s_or_b32 s1, s3, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_i16_multi_foldable_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s1, 0xffff +; GFX9-NEXT: s_and_b32 s0, s4, s1 +; GFX9-NEXT: s_xor_b32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s2, s2, s1 +; GFX9-NEXT: s_and_b32 s4, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s3, s1 +; GFX9-NEXT: s_or_b32 s0, s2, s4 +; GFX9-NEXT: s_or_b32 s1, s1, s4 +; GFX9-NEXT: ; return to shader part epilog + %not.src2 = xor i16 %src2, -1 + %or0 = or i16 %src0, %not.src2 + %or1 = or i16 %src1, %not.src2 + %insert.0 = insertvalue { i16, i16 } undef, i16 %or0, 0 + %insert.1 = insertvalue { i16, i16 } %insert.0, i16 %or1, 1 + ret { i16, i16 } %insert.1 +} + +define i16 @v_orn2_i16(i16 %src0, i16 %src1) { +; GCN-LABEL: v_orn2_i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 +; GCN-NEXT: v_or_b32_e32 v0, v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor i16 %src1, -1 + %or = or i16 %src0, %not.src1 + ret i16 %or +} + +define amdgpu_ps float @v_orn2_i16_sv(i16 inreg %src0, i16 %src1) { +; GCN-LABEL: v_orn2_i16_sv: +; GCN: ; %bb.0: +; GCN-NEXT: v_xor_b32_e32 v0, -1, v0 +; GCN-NEXT: v_or_b32_e32 v0, s2, v0 +; GCN-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GCN-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %or = or i16 %src0, %not.src1 + %zext = zext i16 %or to i32 + %cast.zext = bitcast i32 %zext to float + ret float %cast.zext +} + +define amdgpu_ps float @v_orn2_i16_vs(i16 %src0, i16 inreg %src1) { +; GFX6-LABEL: v_orn2_i16_vs: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_xor_b32 s0, s2, -1 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: v_orn2_i16_vs: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NEXT: s_xor_b32 s0, s1, s0 +; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX9-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor i16 %src1, -1 + %or = or i16 %src0, %not.src1 + %zext = zext i16 %or to i32 + %cast.zext = bitcast i32 %zext to float + ret float %cast.zext +} + +define amdgpu_ps i32 @s_orn2_v2i16(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { +; GFX6-LABEL: s_orn2_v2i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s1, 0xffff +; GFX6-NEXT: s_and_b32 s2, s2, s1 +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_and_b32 s1, s4, s1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_xor_b32 s1, s1, -1 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_xor_b32 s0, s3, -1 +; GFX9-NEXT: s_or_b32 s0, s2, s0 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i16> %src1, + %or = or <2 x i16> %src0, %not.src1 + %cast = bitcast <2 x i16> %or to i32 + ret i32 %cast +} + +define amdgpu_ps i32 @s_orn2_v2i16_commute(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { +; GFX6-LABEL: s_orn2_v2i16_commute: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s1, 0xffff +; GFX6-NEXT: s_and_b32 s2, s2, s1 +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_and_b32 s1, s4, s1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_xor_b32 s1, s1, -1 +; GFX6-NEXT: s_or_b32 s0, s1, s0 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_v2i16_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_xor_b32 s0, s3, -1 +; GFX9-NEXT: s_or_b32 s0, s0, s2 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i16> %src1, + %or = or <2 x i16> %not.src1, %src0 + %cast = bitcast <2 x i16> %or to i32 + ret i32 %cast +} + +define amdgpu_ps { i32, i32 } @s_orn2_v2i16_multi_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { +; GFX6-LABEL: s_orn2_v2i16_multi_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s1, 0xffff +; GFX6-NEXT: s_and_b32 s2, s2, s1 +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_and_b32 s1, s4, s1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_xor_b32 s1, s1, -1 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_v2i16_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_xor_b32 s1, s3, -1 +; GFX9-NEXT: s_or_b32 s0, s2, s1 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <2 x i16> %src1, + %or = or <2 x i16> %src0, %not.src1 + + %cast.0 = bitcast <2 x i16> %or to i32 + %cast.1 = bitcast <2 x i16> %not.src1 to i32 + %insert.0 = insertvalue { i32, i32 } undef, i32 %cast.0, 0 + %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %cast.1, 1 + ret { i32, i32 } %insert.1 +} + +define amdgpu_ps { i32, i32 } @s_orn2_v2i16_multi_foldable_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1, <2 x i16> inreg %src2) { +; GFX6-LABEL: s_orn2_v2i16_multi_foldable_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s1, 0xffff +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_and_b32 s2, s2, s1 +; GFX6-NEXT: s_or_b32 s0, s0, s2 +; GFX6-NEXT: s_and_b32 s3, s4, s1 +; GFX6-NEXT: s_lshl_b32 s2, s5, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s3 +; GFX6-NEXT: s_lshl_b32 s3, s7, 16 +; GFX6-NEXT: s_and_b32 s1, s6, s1 +; GFX6-NEXT: s_or_b32 s1, s3, s1 +; GFX6-NEXT: s_xor_b32 s1, s1, -1 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_v2i16_multi_foldable_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_xor_b32 s1, s4, -1 +; GFX9-NEXT: s_or_b32 s0, s2, s1 +; GFX9-NEXT: s_or_b32 s1, s3, s1 +; GFX9-NEXT: ; return to shader part epilog + %not.src2 = xor <2 x i16> %src2, + %or0 = or <2 x i16> %src0, %not.src2 + %or1 = or <2 x i16> %src1, %not.src2 + + %cast.0 = bitcast <2 x i16> %or0 to i32 + %cast.1 = bitcast <2 x i16> %or1 to i32 + %insert.0 = insertvalue { i32, i32 } undef, i32 %cast.0, 0 + %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %cast.1, 1 + ret { i32, i32 } %insert.1 +} + +define <2 x i16> @v_orn2_v2i16(<2 x i16> %src0, <2 x i16> %src1) { +; GFX6-LABEL: v_orn2_v2i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v0, v0, v4 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 +; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_orn2_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor <2 x i16> %src1, + %or = or <2 x i16> %src0, %not.src1 + ret <2 x i16> %or +} + +; FIXME: +; define amdgpu_ps i48 @s_orn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1) { +; %not.src1 = xor <3 x i16> %src1, +; %or = or <3 x i16> %src0, %not.src1 +; %cast = bitcast <3 x i16> %or to i48 +; ret i48 %cast +; } + +; define amdgpu_ps i48 @s_orn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inreg %src1) { +; %not.src1 = xor <3 x i16> %src1, +; %or = or <3 x i16> %not.src1, %src0 +; %cast = bitcast <3 x i16> %or to i48 +; ret i48 %cast +; } + +; define amdgpu_ps { i48, i48 } @s_orn2_v3i16_multi_use(<3 x i16> inreg %src0, <3 x i16> inreg %src1) { +; %not.src1 = xor <3 x i16> %src1, +; %or = or <3 x i16> %src0, %not.src1 + +; %cast.0 = bitcast <3 x i16> %or to i48 +; %cast.1 = bitcast <3 x i16> %not.src1 to i48 +; %insert.0 = insertvalue { i48, i48 } undef, i48 %cast.0, 0 +; %insert.1 = insertvalue { i48, i48 } %insert.0, i48 %cast.1, 1 +; ret { i48, i48 } %insert.1 +; } + +; define <3 x i16> @v_orn2_v3i16(<3 x i16> %src0, <3 x i16> %src1) { +; %not.src1 = xor <3 x i16> %src1, +; %or = or <3 x i16> %src0, %not.src1 +; ret <3 x i16> %or +; } + +define amdgpu_ps i64 @s_orn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1) { +; GFX6-LABEL: s_orn2_v4i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_mov_b32 s3, 0xffff +; GFX6-NEXT: s_and_b32 s1, s2, s3 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s2, s4, s3 +; GFX6-NEXT: s_lshl_b32 s1, s5, 16 +; GFX6-NEXT: s_or_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s4, s6, s3 +; GFX6-NEXT: s_lshl_b32 s2, s7, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s4 +; GFX6-NEXT: s_lshl_b32 s4, s9, 16 +; GFX6-NEXT: s_and_b32 s3, s8, s3 +; GFX6-NEXT: s_or_b32 s3, s4, s3 +; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: s_mov_b32 s5, s4 +; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 +; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <4 x i16> %src1, + %or = or <4 x i16> %src0, %not.src1 + %cast = bitcast <4 x i16> %or to i64 + ret i64 %cast +} + +define amdgpu_ps i64 @s_orn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inreg %src1) { +; GFX6-LABEL: s_orn2_v4i16_commute: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_mov_b32 s3, 0xffff +; GFX6-NEXT: s_and_b32 s1, s2, s3 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s2, s4, s3 +; GFX6-NEXT: s_lshl_b32 s1, s5, 16 +; GFX6-NEXT: s_or_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s4, s6, s3 +; GFX6-NEXT: s_lshl_b32 s2, s7, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s4 +; GFX6-NEXT: s_lshl_b32 s4, s9, 16 +; GFX6-NEXT: s_and_b32 s3, s8, s3 +; GFX6-NEXT: s_or_b32 s3, s4, s3 +; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: s_mov_b32 s5, s4 +; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_v4i16_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 +; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] +; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <4 x i16> %src1, + %or = or <4 x i16> %not.src1, %src0 + %cast = bitcast <4 x i16> %or to i64 + ret i64 %cast +} + +define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_use(<4 x i16> inreg %src0, <4 x i16> inreg %src1) { +; GFX6-LABEL: s_orn2_v4i16_multi_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_mov_b32 s3, 0xffff +; GFX6-NEXT: s_and_b32 s1, s2, s3 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_and_b32 s2, s4, s3 +; GFX6-NEXT: s_lshl_b32 s1, s5, 16 +; GFX6-NEXT: s_or_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s4, s6, s3 +; GFX6-NEXT: s_lshl_b32 s2, s7, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s4 +; GFX6-NEXT: s_lshl_b32 s4, s9, 16 +; GFX6-NEXT: s_and_b32 s3, s8, s3 +; GFX6-NEXT: s_or_b32 s3, s4, s3 +; GFX6-NEXT: s_mov_b32 s4, -1 +; GFX6-NEXT: s_mov_b32 s5, s4 +; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_v4i16_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 +; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] +; GFX9-NEXT: s_mov_b32 s2, s4 +; GFX9-NEXT: s_mov_b32 s3, s5 +; GFX9-NEXT: ; return to shader part epilog + %not.src1 = xor <4 x i16> %src1, + %or = or <4 x i16> %src0, %not.src1 + + %cast.0 = bitcast <4 x i16> %or to i64 + %cast.1 = bitcast <4 x i16> %not.src1 to i64 + %insert.0 = insertvalue { i64, i64 } undef, i64 %cast.0, 0 + %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %cast.1, 1 + ret { i64, i64 } %insert.1 +} + +define amdgpu_ps { i64, i64 } @s_orn2_v4i16_multi_foldable_use(<4 x i16> inreg %src0, <4 x i16> inreg %src1, <4 x i16> inreg %src2) { +; GFX6-LABEL: s_orn2_v4i16_multi_foldable_use: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_mov_b32 s14, 0xffff +; GFX6-NEXT: s_lshl_b32 s0, s3, 16 +; GFX6-NEXT: s_and_b32 s1, s2, s14 +; GFX6-NEXT: s_or_b32 s0, s0, s1 +; GFX6-NEXT: s_lshl_b32 s1, s5, 16 +; GFX6-NEXT: s_and_b32 s2, s4, s14 +; GFX6-NEXT: s_or_b32 s1, s1, s2 +; GFX6-NEXT: s_and_b32 s3, s6, s14 +; GFX6-NEXT: s_lshl_b32 s2, s7, 16 +; GFX6-NEXT: s_or_b32 s2, s2, s3 +; GFX6-NEXT: s_lshl_b32 s3, s9, 16 +; GFX6-NEXT: s_and_b32 s4, s8, s14 +; GFX6-NEXT: s_or_b32 s3, s3, s4 +; GFX6-NEXT: s_lshl_b32 s4, s11, 16 +; GFX6-NEXT: s_and_b32 s5, s10, s14 +; GFX6-NEXT: s_or_b32 s4, s4, s5 +; GFX6-NEXT: s_lshl_b32 s5, s13, 16 +; GFX6-NEXT: s_and_b32 s6, s12, s14 +; GFX6-NEXT: s_or_b32 s5, s5, s6 +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s7, s6 +; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] +; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] +; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; GFX6-NEXT: ; return to shader part epilog +; +; GFX9-LABEL: s_orn2_v4i16_multi_foldable_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_mov_b32 s0, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 +; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1] +; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] +; GFX9-NEXT: s_or_b64 s[2:3], s[4:5], s[6:7] +; GFX9-NEXT: ; return to shader part epilog + %not.src2 = xor <4 x i16> %src2, + %or0 = or <4 x i16> %src0, %not.src2 + %or1 = or <4 x i16> %src1, %not.src2 + + %cast.0 = bitcast <4 x i16> %or0 to i64 + %cast.1 = bitcast <4 x i16> %or1 to i64 + %insert.0 = insertvalue { i64, i64 } undef, i64 %cast.0, 0 + %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %cast.1, 1 + ret { i64, i64 } %insert.1 +} + +define <4 x i16> @v_orn2_v4i16(<4 x i16> %src0, <4 x i16> %src1) { +; GFX6-LABEL: v_orn2_v4i16: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: v_mov_b32_e32 v8, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-NEXT: v_and_b32_e32 v0, v0, v8 +; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 +; GFX6-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX6-NEXT: v_and_b32_e32 v3, v4, v8 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5 +; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7 +; GFX6-NEXT: v_and_b32_e32 v4, v6, v8 +; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX6-NEXT: v_or_b32_e32 v2, v1, v3 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_orn2_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3 +; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %not.src1 = xor <4 x i16> %src1, + %or = or <4 x i16> %src0, %not.src1 + ret <4 x i16> %or +} From f2942f9c26a39340f8604857c7c90e6ade02a381 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 31 Jul 2020 15:46:10 -0400 Subject: [PATCH 355/600] GlobalISel: Add node mappings for frameindex/blockaddress --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index d3ca771ad3adb..3bf9e63f29f35 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -52,6 +52,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; From 6cd50e7b75ed39ba1218c724401117cb6e12748d Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Tue, 4 Aug 2020 14:55:12 -0400 Subject: [PATCH 356/600] [libc] Add implementations for isblank, iscntrl, isgraph, ispunct. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D85059 --- libc/config/linux/aarch64/entrypoints.txt | 4 +++ libc/config/linux/api.td | 4 +++ libc/config/linux/x86_64/entrypoints.txt | 4 +++ libc/spec/stdc.td | 20 ++++++++++++ libc/src/ctype/CMakeLists.txt | 36 ++++++++++++++++++++ libc/src/ctype/ctype_utils.h | 16 ++++----- libc/src/ctype/isalnum.cpp | 4 +-- libc/src/ctype/isblank.cpp | 22 +++++++++++++ libc/src/ctype/isblank.h | 18 ++++++++++ libc/src/ctype/iscntrl.cpp | 22 +++++++++++++ libc/src/ctype/iscntrl.h | 18 ++++++++++ libc/src/ctype/isgraph.cpp | 20 ++++++++++++ libc/src/ctype/isgraph.h | 18 ++++++++++ libc/src/ctype/ispunct.cpp | 22 +++++++++++++ libc/src/ctype/ispunct.h | 18 ++++++++++ libc/test/src/ctype/CMakeLists.txt | 40 +++++++++++++++++++++++ libc/test/src/ctype/isblank_test.cpp | 21 ++++++++++++ libc/test/src/ctype/iscntrl_test.cpp | 21 ++++++++++++ libc/test/src/ctype/isgraph_test.cpp | 21 ++++++++++++ libc/test/src/ctype/ispunct_test.cpp | 34 +++++++++++++++++++ 20 files changed, 371 insertions(+), 12 deletions(-) create mode 100644 libc/src/ctype/isblank.cpp create mode 100644 libc/src/ctype/isblank.h create mode 100644 libc/src/ctype/iscntrl.cpp create mode 100644 libc/src/ctype/iscntrl.h create mode 100644 libc/src/ctype/isgraph.cpp create mode 100644 libc/src/ctype/isgraph.h create mode 100644 libc/src/ctype/ispunct.cpp create mode 100644 libc/src/ctype/ispunct.h create mode 100644 libc/test/src/ctype/isblank_test.cpp create mode 100644 libc/test/src/ctype/iscntrl_test.cpp create mode 100644 libc/test/src/ctype/isgraph_test.cpp create mode 100644 libc/test/src/ctype/ispunct_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 8314df89b0636..565fbf78fcb96 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -2,8 +2,12 @@ set(TARGET_LIBC_ENTRYPOINTS # ctype.h entrypoints libc.src.ctype.isalnum libc.src.ctype.isalpha + libc.src.ctype.isblank + libc.src.ctype.iscntrl libc.src.ctype.isdigit + libc.src.ctype.isgraph libc.src.ctype.islower + libc.src.ctype.ispunct libc.src.ctype.isupper # errno.h entrypoints diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 03d5d66e41e79..a57c703a699e7 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -90,8 +90,12 @@ def CTypeAPI : PublicAPI<"ctype.h"> { let Functions = [ "isalnum", "isalpha", + "isblank", + "iscntrl", "isdigit", + "isgraph", "islower", + "ispunct", "isupper", ]; } diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 56a99d00d7847..3cc243e426c27 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -5,8 +5,12 @@ set(TARGET_LIBC_ENTRYPOINTS # ctype.h entrypoints libc.src.ctype.isalnum libc.src.ctype.isalpha + libc.src.ctype.isblank + libc.src.ctype.iscntrl libc.src.ctype.isdigit + libc.src.ctype.isgraph libc.src.ctype.islower + libc.src.ctype.ispunct libc.src.ctype.isupper # errno.h entrypoints diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 4fffc5cdc8579..1f14f76553593 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -56,16 +56,36 @@ def StdC : StandardSpec<"stdc"> { RetValSpec, [ArgSpec] >, + FunctionSpec< + "isblank", + RetValSpec, + [ArgSpec] + >, + FunctionSpec< + "iscntrl", + RetValSpec, + [ArgSpec] + >, FunctionSpec< "isdigit", RetValSpec, [ArgSpec] >, + FunctionSpec< + "isgraph", + RetValSpec, + [ArgSpec] + >, FunctionSpec< "islower", RetValSpec, [ArgSpec] >, + FunctionSpec< + "ispunct", + RetValSpec, + [ArgSpec] + >, FunctionSpec< "isupper", RetValSpec, diff --git a/libc/src/ctype/CMakeLists.txt b/libc/src/ctype/CMakeLists.txt index 30995ab2f714c..c554e6cb78510 100644 --- a/libc/src/ctype/CMakeLists.txt +++ b/libc/src/ctype/CMakeLists.txt @@ -24,6 +24,22 @@ add_entrypoint_object( .ctype_utils ) +add_entrypoint_object( + isblank + SRCS + isblank.cpp + HDRS + isblank.h +) + +add_entrypoint_object( + iscntrl + SRCS + iscntrl.cpp + HDRS + iscntrl.h +) + add_entrypoint_object( isdigit SRCS @@ -34,6 +50,16 @@ add_entrypoint_object( .ctype_utils ) +add_entrypoint_object( + isgraph + SRCS + isgraph.cpp + HDRS + isgraph.h + DEPENDS + .ctype_utils +) + add_entrypoint_object( islower SRCS @@ -42,6 +68,16 @@ add_entrypoint_object( islower.h ) +add_entrypoint_object( + ispunct + SRCS + ispunct.cpp + HDRS + ispunct.h + DEPENDS + .ctype_utils +) + add_entrypoint_object( isupper SRCS diff --git a/libc/src/ctype/ctype_utils.h b/libc/src/ctype/ctype_utils.h index 4e8d3960bb704..787a19ebf1328 100644 --- a/libc/src/ctype/ctype_utils.h +++ b/libc/src/ctype/ctype_utils.h @@ -18,15 +18,13 @@ namespace internal { // of a function call by inlining them. // ------------------------------------------------------ -static inline int isdigit(int c) { - const unsigned ch = c; - return (ch - '0') < 10; -} - -static inline int isalpha(int c) { - const unsigned ch = c; - return (ch | 32) - 'a' < 26; -} +static inline int isdigit(unsigned ch) { return (ch - '0') < 10; } + +static inline int isalpha(unsigned ch) { return (ch | 32) - 'a' < 26; } + +static inline int isalnum(unsigned ch) { return isalpha(ch) || isdigit(ch); } + +static inline int isgraph(unsigned ch) { return 0x20 < ch && ch < 0x7f; } } // namespace internal } // namespace __llvm_libc diff --git a/libc/src/ctype/isalnum.cpp b/libc/src/ctype/isalnum.cpp index 08b6520e44267..54c4f80984195 100644 --- a/libc/src/ctype/isalnum.cpp +++ b/libc/src/ctype/isalnum.cpp @@ -15,8 +15,6 @@ namespace __llvm_libc { // TODO: Currently restricted to default locale. // These should be extended using locale information. -int LLVM_LIBC_ENTRYPOINT(isalnum)(int c) { - return internal::isalpha(c) || internal::isdigit(c); -} +int LLVM_LIBC_ENTRYPOINT(isalnum)(int c) { return internal::isalnum(c); } } // namespace __llvm_libc diff --git a/libc/src/ctype/isblank.cpp b/libc/src/ctype/isblank.cpp new file mode 100644 index 0000000000000..fa28d84c03bd0 --- /dev/null +++ b/libc/src/ctype/isblank.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of isblank------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isblank.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +// TODO: Currently restricted to default locale. +// These should be extended using locale information. +int LLVM_LIBC_ENTRYPOINT(isblank)(int c) { + const unsigned char ch = c; + return ch == ' ' || ch == '\t'; +} + +} // namespace __llvm_libc diff --git a/libc/src/ctype/isblank.h b/libc/src/ctype/isblank.h new file mode 100644 index 0000000000000..0554322d08251 --- /dev/null +++ b/libc/src/ctype/isblank.h @@ -0,0 +1,18 @@ +//===-- Implementation header for isblank -------------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_CTYPE_ISBLANK_H +#define LLVM_LIBC_SRC_CTYPE_ISBLANK_H + +namespace __llvm_libc { + +int isblank(int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_CTYPE_ISBLANK_H diff --git a/libc/src/ctype/iscntrl.cpp b/libc/src/ctype/iscntrl.cpp new file mode 100644 index 0000000000000..06ee7cc0d9703 --- /dev/null +++ b/libc/src/ctype/iscntrl.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of iscntrl------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/iscntrl.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +// TODO: Currently restricted to default locale. +// These should be extended using locale information. +int LLVM_LIBC_ENTRYPOINT(iscntrl)(int c) { + const unsigned char ch = c; + return ch < 0x20 || ch == 0x7f; +} + +} // namespace __llvm_libc diff --git a/libc/src/ctype/iscntrl.h b/libc/src/ctype/iscntrl.h new file mode 100644 index 0000000000000..26f094053a28a --- /dev/null +++ b/libc/src/ctype/iscntrl.h @@ -0,0 +1,18 @@ +//===-- Implementation header for iscntrl -------------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_CTYPE_ISCNTRL_H +#define LLVM_LIBC_SRC_CTYPE_ISCNTRL_H + +namespace __llvm_libc { + +int iscntrl(int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_CTYPE_ISCNTRL_H diff --git a/libc/src/ctype/isgraph.cpp b/libc/src/ctype/isgraph.cpp new file mode 100644 index 0000000000000..c7a488cbfdeab --- /dev/null +++ b/libc/src/ctype/isgraph.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of isgraph------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isgraph.h" + +#include "src/__support/common.h" +#include "src/ctype/ctype_utils.h" + +namespace __llvm_libc { + +// TODO: Currently restricted to default locale. +// These should be extended using locale information. +int LLVM_LIBC_ENTRYPOINT(isgraph)(int c) { return internal::isgraph(c); } + +} // namespace __llvm_libc diff --git a/libc/src/ctype/isgraph.h b/libc/src/ctype/isgraph.h new file mode 100644 index 0000000000000..421d0ffc4488b --- /dev/null +++ b/libc/src/ctype/isgraph.h @@ -0,0 +1,18 @@ +//===-- Implementation header for isgraph -------------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_CTYPE_ISGRAPH_H +#define LLVM_LIBC_SRC_CTYPE_ISGRAPH_H + +namespace __llvm_libc { + +int isgraph(int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_CTYPE_ISGRAPH_H diff --git a/libc/src/ctype/ispunct.cpp b/libc/src/ctype/ispunct.cpp new file mode 100644 index 0000000000000..a810c6471e796 --- /dev/null +++ b/libc/src/ctype/ispunct.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of ispunct------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/ispunct.h" + +#include "src/__support/common.h" +#include "src/ctype/ctype_utils.h" + +namespace __llvm_libc { + +// TODO: Currently restricted to default locale. +// These should be extended using locale information. +int LLVM_LIBC_ENTRYPOINT(ispunct)(int c) { + return !internal::isalnum(c) && internal::isgraph(c); +} + +} // namespace __llvm_libc diff --git a/libc/src/ctype/ispunct.h b/libc/src/ctype/ispunct.h new file mode 100644 index 0000000000000..23cc08a0bac9c --- /dev/null +++ b/libc/src/ctype/ispunct.h @@ -0,0 +1,18 @@ +//===-- Implementation header for ispunct -------------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_CTYPE_ISPUNCT_H +#define LLVM_LIBC_SRC_CTYPE_ISPUNCT_H + +namespace __llvm_libc { + +int ispunct(int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_CTYPE_ISPUNCT_H diff --git a/libc/test/src/ctype/CMakeLists.txt b/libc/test/src/ctype/CMakeLists.txt index c9959465c697c..3adf5739d72a5 100644 --- a/libc/test/src/ctype/CMakeLists.txt +++ b/libc/test/src/ctype/CMakeLists.txt @@ -20,6 +20,26 @@ add_libc_unittest( libc.src.ctype.isalpha ) +add_libc_unittest( + isblank + SUITE + libc_ctype_unittests + SRCS + isblank_test.cpp + DEPENDS + libc.src.ctype.isblank +) + +add_libc_unittest( + iscntrl + SUITE + libc_ctype_unittests + SRCS + iscntrl_test.cpp + DEPENDS + libc.src.ctype.iscntrl +) + add_libc_unittest( isdigit SUITE @@ -30,6 +50,16 @@ add_libc_unittest( libc.src.ctype.isdigit ) +add_libc_unittest( + isgraph + SUITE + libc_ctype_unittests + SRCS + isgraph_test.cpp + DEPENDS + libc.src.ctype.isgraph +) + add_libc_unittest( islower SUITE @@ -40,6 +70,16 @@ add_libc_unittest( libc.src.ctype.islower ) +add_libc_unittest( + ispunct + SUITE + libc_ctype_unittests + SRCS + ispunct_test.cpp + DEPENDS + libc.src.ctype.ispunct +) + add_libc_unittest( isupper SUITE diff --git a/libc/test/src/ctype/isblank_test.cpp b/libc/test/src/ctype/isblank_test.cpp new file mode 100644 index 0000000000000..f024ef67f9f62 --- /dev/null +++ b/libc/test/src/ctype/isblank_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for isblank----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isblank.h" +#include "utils/UnitTest/Test.h" + +TEST(IsBlank, DefaultLocale) { + // Loops through all characters, verifying that space and horizontal tab + // return a non-zero integer and everything else returns zero. + for (int ch = 0; ch < 255; ++ch) { + if (ch == ' ' || ch == '\t') + EXPECT_NE(__llvm_libc::isblank(ch), 0); + else + EXPECT_EQ(__llvm_libc::isblank(ch), 0); + } +} diff --git a/libc/test/src/ctype/iscntrl_test.cpp b/libc/test/src/ctype/iscntrl_test.cpp new file mode 100644 index 0000000000000..5af7457cbda44 --- /dev/null +++ b/libc/test/src/ctype/iscntrl_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for iscntrl----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/iscntrl.h" +#include "utils/UnitTest/Test.h" + +TEST(IsCntrl, DefaultLocale) { + // Loops through all characters, verifying that control characters + // return a non-zero integer, all others return zero. + for (int ch = 0; ch < 255; ++ch) { + if ((0 <= ch && ch <= 0x1f /*US*/) || ch == 0x7f /*DEL*/) + EXPECT_NE(__llvm_libc::iscntrl(ch), 0); + else + EXPECT_EQ(__llvm_libc::iscntrl(ch), 0); + } +} diff --git a/libc/test/src/ctype/isgraph_test.cpp b/libc/test/src/ctype/isgraph_test.cpp new file mode 100644 index 0000000000000..1ed1ec7145f66 --- /dev/null +++ b/libc/test/src/ctype/isgraph_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for isgraph----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/isgraph.h" +#include "utils/UnitTest/Test.h" + +TEST(IsGraph, DefaultLocale) { + // Loops through all characters, verifying that graphical characters + // return a non-zero integer, everything else returns zero. + for (int ch = 0; ch < 255; ++ch) { + if ('!' <= ch && ch <= '~') // A-Z, a-z, 0-9, punctuation. + EXPECT_NE(__llvm_libc::isgraph(ch), 0); + else + EXPECT_EQ(__llvm_libc::isgraph(ch), 0); + } +} diff --git a/libc/test/src/ctype/ispunct_test.cpp b/libc/test/src/ctype/ispunct_test.cpp new file mode 100644 index 0000000000000..07e83fb36c7cd --- /dev/null +++ b/libc/test/src/ctype/ispunct_test.cpp @@ -0,0 +1,34 @@ +//===-- Unittests for ispunct----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/ctype/ispunct.h" +#include "utils/UnitTest/Test.h" + +// Helper function to mark the sections of the ASCII table that are +// punctuation characters. These are listed below: +// Decimal | Symbol +// ----------------------------------------- +// 33 - 47 | ! " $ % & ' ( ) * + , - . / +// 58 - 64 | : ; < = > ? @ +// 91 - 96 | [ \ ] ^ _ ` +// 123 - 126 | { | } ~ +static inline int is_punctuation_character(int c) { + return ('!' <= c && c <= '/') || (':' <= c && c <= '@') || + ('[' <= c && c <= '`') || ('{' <= c && c <= '~'); +} + +TEST(IsPunct, DefaultLocale) { + // Loops through all characters, verifying that punctuation characters + // return a non-zero integer, and everything else returns zero. + for (int ch = 0; ch < 255; ++ch) { + if (is_punctuation_character(ch)) + EXPECT_NE(__llvm_libc::ispunct(ch), 0); + else + EXPECT_EQ(__llvm_libc::ispunct(ch), 0); + } +} From 3e16e2152cd1fb3914d4da47d83d5e023dd3f2cb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 29 Jul 2020 09:48:26 -0400 Subject: [PATCH 357/600] GlobalISel: Handle llvm.localescape This one is pretty easy and shrinks the list of unhandled intrinsics. I'm not sure how relevant the insert point is. Using the insert position of EntryBuilder will place this after constants. SelectionDAG seems to end up emitting these after argument copies and before anything else, but I don't think it really matters. This also ends up emitting these in the opposite order from SelectionDAG, but I don't think that matters either. This also needs a fix to stop the later passes dropping this as a dead instruction. DeadMachineInstructionElim's version of isDead special cases LOCAL_ESCAPE for some reason, and I'm not sure why it's excluded from MachineInstr::isLabel (or why isDead doesn't check it). I also noticed DeadMachineInstructionElim never considers inline asm as dead, but GlobalISel will drop asm with no constraints. --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 28 ++++++++ llvm/lib/CodeGen/GlobalISel/Utils.cpp | 8 +++ .../GlobalISel/irtranslator-localescape.ll | 70 +++++++++++++++++++ .../GlobalISel/labels-are-not-dead.mir | 34 +++++++++ 4 files changed, 140 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/labels-are-not-dead.mir diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index be669eca0f6fa..ac867b44b574e 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" @@ -1658,6 +1659,33 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; } + case Intrinsic::localescape: { + MachineBasicBlock &EntryMBB = MF->front(); + StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName()); + + // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) { + Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts(); + if (isa(Arg)) + continue; // Skip null pointers. They represent a hole in index space. + + int FI = getOrCreateFrameIndex(*cast(Arg)); + MCSymbol *FrameAllocSym = + MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName, + Idx); + + // This should be inserted at the start of the entry block. + auto LocalEscape = + MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + + EntryMBB.insert(EntryMBB.begin(), LocalEscape); + } + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 4a7513f23c6b9..7fc738adb3392 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -180,6 +180,14 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg, bool llvm::isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI) { + // FIXME: This logical is mostly duplicated with + // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in + // MachineInstr::isLabel? + + // Don't delete frame allocation labels. + if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) + return false; + // If we can move an instruction, we can remove it. Otherwise, it has // a side-effect of some sort. bool SawStore = false; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll new file mode 100644 index 0000000000000..60eaea4e287bf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-localescape.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -mtriple=arm64-windows -stop-after=irtranslator -o - %s | FileCheck %s + +define void @local_escape() { + ; CHECK-LABEL: name: local_escape + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: LOCAL_ESCAPE , %stack.1.b + ; CHECK: LOCAL_ESCAPE , %stack.0.a + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.b + ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %ir.a) + ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.b) + ; CHECK: RET_ReallyLR + %a = alloca i32 + %b = alloca i32, i32 2 + call void (...) @llvm.localescape(i32* %a, i32* %b) + store i32 42, i32* %a + store i32 13, i32* %b + ret void +} + +; Try some instructions before the localescape, and use a null +define void @local_escape_insert_point() { + ; CHECK-LABEL: name: local_escape_insert_point + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: LOCAL_ESCAPE , %stack.1.b + ; CHECK: LOCAL_ESCAPE , %stack.0.a + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.b + ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %ir.a) + ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.b) + ; CHECK: RET_ReallyLR + %a = alloca i32 + %b = alloca i32, i32 2 + store i32 42, i32* %a + store i32 13, i32* %b + call void (...) @llvm.localescape(i32* %a, i32* null, i32* %b) + ret void +} + +declare void @foo([128 x i32]*) + +; Check a cast of an alloca +define void @local_escape_strip_ptr_cast() { + ; CHECK-LABEL: name: local_escape_strip_ptr_cast + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: LOCAL_ESCAPE , %stack.0.a + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a + ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4 into %ir.cast) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x0 = COPY [[FRAME_INDEX]](p0) + ; CHECK: BL @foo, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %a = alloca [128 x i32] + %cast = bitcast [128 x i32]* %a to i32* + store i32 42, i32* %cast + call void (...) @llvm.localescape(i32* %cast, i32* null) + call void @foo([128 x i32]* %a) + ret void +} + +declare void @llvm.localescape(...) #0 + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/labels-are-not-dead.mir b/llvm/test/CodeGen/AArch64/GlobalISel/labels-are-not-dead.mir new file mode 100644 index 0000000000000..ae7c7d3d6c6dd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/labels-are-not-dead.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=aarch64 -run-pass=legalizer %s -o - | FileCheck %s + +# The LOCAL_ESCAPE instructions should not be deleted as dead. + +--- +name: no_erase_local_escape +tracksRegLiveness: true +stack: + - { id: 0, size: 4, alignment: 4 } + - { id: 1, size: 8, alignment: 4 } +body: | + bb.0: + ; CHECK-LABEL: name: no_erase_local_escape + ; CHECK: LOCAL_ESCAPE , %stack.0 + ; CHECK: LOCAL_ESCAPE , %stack.1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store 4) + ; CHECK: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store 4) + ; CHECK: RET_ReallyLR + LOCAL_ESCAPE , %stack.0 + LOCAL_ESCAPE , %stack.1 + %2:_(s32) = G_CONSTANT i32 42 + %3:_(s32) = G_CONSTANT i32 13 + %0:_(p0) = G_FRAME_INDEX %stack.0 + %1:_(p0) = G_FRAME_INDEX %stack.1 + G_STORE %2(s32), %0(p0) :: (store 4) + G_STORE %3(s32), %1(p0) :: (store 4) + RET_ReallyLR + +... From 0729a772806e5ae38603c164c2f60e5e9f9e65e5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 4 Aug 2020 12:22:47 -0700 Subject: [PATCH 358/600] [llvm-symbolizer][test] Fix pdb/pdb.test after D83530 This is a Windows only test which requires HAVE_DIA_SDK, so I failed to notice it. --- llvm/test/tools/llvm-symbolizer/pdb/pdb.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/tools/llvm-symbolizer/pdb/pdb.test b/llvm/test/tools/llvm-symbolizer/pdb/pdb.test index df0e2320268d4..d9e42416ad562 100644 --- a/llvm/test/tools/llvm-symbolizer/pdb/pdb.test +++ b/llvm/test/tools/llvm-symbolizer/pdb/pdb.test @@ -8,24 +8,24 @@ RUN: echo 0x4013F0 >> %t.input RUN: echo 0x401420 >> %t.input RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" < %t.input \ RUN: | FileCheck %s -RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false < %t.input \ +RUN: llvm-symbolizer --obj="%p/Inputs/test.exe" --no-demangle < %t.input \ RUN: | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE Test with native pdb reader. RUN: llvm-symbolizer -use-native-pdb-reader -obj="%p/Inputs/test.exe" < %t.input \ RUN: | FileCheck %s -RUN: llvm-symbolizer -use-native-pdb-reader -obj="%p/Inputs/test.exe" -demangle=false < %t.input \ +RUN: llvm-symbolizer --use-native-pdb-reader --obj="%p/Inputs/test.exe" --no-demangle < %t.input \ RUN: | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE Subtract ImageBase from all the offsets and run the test again with --relative-address. RUN: %python -c 'import sys;print("\n".join([hex(int(x, 16) - 0x400000) for x in sys.stdin]))' < %t.input \ -RUN: | llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false --relative-address \ +RUN: | llvm-symbolizer --obj="%p/Inputs/test.exe" --no-demangle --relative-address \ RUN: | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE RUN: %python -c 'import sys;print("\n".join([hex(int(x, 16) - 0x400000) for x in sys.stdin]))' < %t.input \ -RUN: | llvm-symbolizer -use-native-pdb-reader -obj="%p/Inputs/test.exe" -demangle=false --relative-address \ +RUN: | llvm-symbolizer --use-native-pdb-reader --obj="%p/Inputs/test.exe" --no-demangle --relative-address \ RUN: | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE CHECK: foo(void) From 3bfbc5df87cb0e736fe917e4a8d5166bc0b4ea79 Mon Sep 17 00:00:00 2001 From: Diego Caballero Date: Tue, 4 Aug 2020 11:22:19 -0700 Subject: [PATCH 359/600] [MLIR][Affine] Fix createPrivateMemRef in affine fusion Always define a remapping for the memref replacement (`indexRemap`) with the proper number of inputs, including all the `outerIVs`, so that the number of inputs and the operands provided for the map don't mismatch. Reviewed By: bondhugula, andydavis1 Differential Revision: https://reviews.llvm.org/D85177 --- mlir/lib/Transforms/LoopFusion.cpp | 12 ++++------- mlir/test/Transforms/loop-fusion.mlir | 29 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index dd7b7b83debda..ed79be02b8165 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -921,21 +921,17 @@ static Value createPrivateMemRef(AffineForOp forOp, Operation *srcStoreOpInst, // Build an AffineMap to remap access functions based on lower bound offsets. SmallVector remapExprs; remapExprs.reserve(rank); - unsigned zeroOffsetCount = 0; for (unsigned i = 0; i < rank; i++) { - if (auto constExpr = offsets[i].dyn_cast()) - if (constExpr.getValue() == 0) - ++zeroOffsetCount; auto dimExpr = b.getAffineDimExpr(outerIVs.size() + i); auto remapExpr = simplifyAffineExpr(dimExpr - offsets[i], outerIVs.size() + rank, 0); remapExprs.push_back(remapExpr); } - auto indexRemap = zeroOffsetCount == rank - ? AffineMap() - : AffineMap::get(outerIVs.size() + rank, 0, remapExprs, - forOp.getContext()); + + auto indexRemap = + AffineMap::get(outerIVs.size() + rank, 0, remapExprs, forOp.getContext()); + // Replace all users of 'oldMemRef' with 'newMemRef'. LogicalResult res = replaceAllMemRefUsesWith(oldMemRef, newMemRef, {}, indexRemap, diff --git a/mlir/test/Transforms/loop-fusion.mlir b/mlir/test/Transforms/loop-fusion.mlir index 7d91e780e9edb..b4eea34b41f1e 100644 --- a/mlir/test/Transforms/loop-fusion.mlir +++ b/mlir/test/Transforms/loop-fusion.mlir @@ -2634,3 +2634,32 @@ func @should_not_fuse_since_top_level_non_affine_users(%in0 : memref<32xf32>, // CHECK: affine.for // CHECK: mulf // CHECK: subf + +// ----- + +// MAXIMAL-LABEL: func @fuse_minor_affine_map +func @fuse_minor_affine_map(%in: memref<128xf32>, %out: memref<20x512xf32>) { + %tmp = alloc() : memref<128xf32> + + affine.for %arg4 = 0 to 128 { + %ld = affine.load %in[%arg4] : memref<128xf32> + affine.store %ld, %tmp[%arg4] : memref<128xf32> + } + + affine.for %arg3 = 0 to 20 { + affine.for %arg4 = 0 to 512 { + %ld = affine.load %tmp[%arg4 mod 128] : memref<128xf32> + affine.store %ld, %out[%arg3, %arg4] : memref<20x512xf32> + } + } + + return +} + +// TODO: The size of the private memref is not properly computed in the presence +// of the 'mod' operation. It should be memref<1xf32> instead of +// memref<128xf32>: https://bugs.llvm.org/show_bug.cgi?id=46973 +// MAXIMAL: alloc() : memref<128xf32> +// MAXIMAL: affine.for +// MAXIMAL-NEXT: affine.for +// MAXIMAL-NOT: affine.for From 6d218b4adb093ff2e9764febbbc89f429412006c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 29 Jul 2020 22:46:07 -0700 Subject: [PATCH 360/600] BPF: support type exist/size and enum exist/value relocations Four new CO-RE relocations are introduced: - TYPE_EXISTENCE: whether a typedef/record/enum type exists - TYPE_SIZE: the size of a typedef/record/enum type - ENUM_VALUE_EXISTENCE: whether an enum value of an enum type exists - ENUM_VALUE: the enum value of an enum type These additional relocations will make CO-RE bpf programs more adaptive for potential kernel internal data structure changes. Differential Revision: https://reviews.llvm.org/D83878 --- .../Target/BPF/BPFAbstractMemberAccess.cpp | 115 ++++++++++++++++-- llvm/lib/Target/BPF/BPFCORE.h | 18 +++ llvm/lib/Target/BPF/BTFDebug.cpp | 24 ++-- llvm/lib/Target/BPF/BTFDebug.h | 2 +- .../BPF/CORE/intrinsic-typeinfo-enum-value.ll | 99 +++++++++++++++ .../BPF/CORE/intrinsic-typeinfo-type-exist.ll | 98 +++++++++++++++ .../CORE/intrinsic-typeinfo-type-size-1.ll | 98 +++++++++++++++ .../CORE/intrinsic-typeinfo-type-size-2.ll | 114 +++++++++++++++++ 8 files changed, 551 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-enum-value.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-exist.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-size-1.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-size-2.ll diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 16708c4d1ce6f..f6f9855fbe421 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -165,6 +165,8 @@ class BPFAbstractMemberAccess final : public ModulePass { Value *computeBaseAndAccessKey(CallInst *Call, CallInfo &CInfo, std::string &AccessKey, MDNode *&BaseMeta); + MDNode *computeAccessKey(CallInst *Call, CallInfo &CInfo, + std::string &AccessKey, bool &IsInt32Ret); uint64_t getConstant(const Value *IndexValue); bool transformGEPChain(Module &M, CallInst *Call, CallInfo &CInfo); }; @@ -285,6 +287,34 @@ bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst *Call, CInfo.AccessIndex = InfoKind; return true; } + if (GV->getName().startswith("llvm.bpf.preserve.type.info")) { + CInfo.Kind = BPFPreserveFieldInfoAI; + CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); + if (!CInfo.Metadata) + report_fatal_error("Missing metadata for llvm.preserve.type.info intrinsic"); + uint64_t Flag = getConstant(Call->getArgOperand(1)); + if (Flag >= BPFCoreSharedInfo::MAX_PRESERVE_TYPE_INFO_FLAG) + report_fatal_error("Incorrect flag for llvm.bpf.preserve.type.info intrinsic"); + if (Flag == BPFCoreSharedInfo::PRESERVE_TYPE_INFO_EXISTENCE) + CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_EXISTENCE; + else + CInfo.AccessIndex = BPFCoreSharedInfo::TYPE_SIZE; + return true; + } + if (GV->getName().startswith("llvm.bpf.preserve.enum.value")) { + CInfo.Kind = BPFPreserveFieldInfoAI; + CInfo.Metadata = Call->getMetadata(LLVMContext::MD_preserve_access_index); + if (!CInfo.Metadata) + report_fatal_error("Missing metadata for llvm.preserve.enum.value intrinsic"); + uint64_t Flag = getConstant(Call->getArgOperand(2)); + if (Flag >= BPFCoreSharedInfo::MAX_PRESERVE_ENUM_VALUE_FLAG) + report_fatal_error("Incorrect flag for llvm.bpf.preserve.enum.value intrinsic"); + if (Flag == BPFCoreSharedInfo::PRESERVE_ENUM_VALUE_EXISTENCE) + CInfo.AccessIndex = BPFCoreSharedInfo::ENUM_VALUE_EXISTENCE; + else + CInfo.AccessIndex = BPFCoreSharedInfo::ENUM_VALUE; + return true; + } return false; } @@ -847,26 +877,92 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, return Base; } +MDNode *BPFAbstractMemberAccess::computeAccessKey(CallInst *Call, + CallInfo &CInfo, + std::string &AccessKey, + bool &IsInt32Ret) { + DIType *Ty = stripQualifiers(cast(CInfo.Metadata), false); + assert(!Ty->getName().empty()); + + int64_t PatchImm; + std::string AccessStr("0"); + if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_EXISTENCE) { + PatchImm = 1; + } else if (CInfo.AccessIndex == BPFCoreSharedInfo::TYPE_SIZE) { + // typedef debuginfo type has size 0, get the eventual base type. + DIType *BaseTy = stripQualifiers(Ty, true); + PatchImm = BaseTy->getSizeInBits() / 8; + } else { + // ENUM_VALUE_EXISTENCE and ENUM_VALUE + IsInt32Ret = false; + + const auto *CE = cast(Call->getArgOperand(1)); + const GlobalVariable *GV = cast(CE->getOperand(0)); + assert(GV->hasInitializer()); + const ConstantDataArray *DA = cast(GV->getInitializer()); + assert(DA->isString()); + StringRef ValueStr = DA->getAsString(); + + // ValueStr format: : + size_t Separator = ValueStr.find_first_of(':'); + StringRef EnumeratorStr = ValueStr.substr(0, Separator); + + // Find enumerator index in the debuginfo + DIType *BaseTy = stripQualifiers(Ty, true); + const auto *CTy = cast(BaseTy); + assert(CTy->getTag() == dwarf::DW_TAG_enumeration_type); + int EnumIndex = 0; + for (const auto Element : CTy->getElements()) { + const auto *Enum = cast(Element); + if (Enum->getName() == EnumeratorStr) { + AccessStr = std::to_string(EnumIndex); + break; + } + EnumIndex++; + } + + if (CInfo.AccessIndex == BPFCoreSharedInfo::ENUM_VALUE) { + StringRef EValueStr = ValueStr.substr(Separator + 1); + PatchImm = std::stoll(std::string(EValueStr)); + } else { + PatchImm = 1; + } + } + + AccessKey = "llvm." + Ty->getName().str() + ":" + + std::to_string(CInfo.AccessIndex) + std::string(":") + + std::to_string(PatchImm) + std::string("$") + AccessStr; + + return Ty; +} + /// Call/Kind is the base preserve_*_access_index() call. Attempts to do /// transformation to a chain of relocable GEPs. bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, CallInfo &CInfo) { std::string AccessKey; MDNode *TypeMeta; - Value *Base = - computeBaseAndAccessKey(Call, CInfo, AccessKey, TypeMeta); - if (!Base) - return false; + Value *Base = nullptr; + bool IsInt32Ret; + + IsInt32Ret = CInfo.Kind == BPFPreserveFieldInfoAI; + if (CInfo.Kind == BPFPreserveFieldInfoAI && CInfo.Metadata) { + TypeMeta = computeAccessKey(Call, CInfo, AccessKey, IsInt32Ret); + } else { + Base = computeBaseAndAccessKey(Call, CInfo, AccessKey, TypeMeta); + if (!Base) + return false; + } BasicBlock *BB = Call->getParent(); GlobalVariable *GV; if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { IntegerType *VarType; - if (CInfo.Kind == BPFPreserveFieldInfoAI) + if (IsInt32Ret) VarType = Type::getInt32Ty(BB->getContext()); // 32bit return value else - VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr arith + VarType = Type::getInt64Ty(BB->getContext()); // 64bit ptr or enum value GV = new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage, NULL, AccessKey); @@ -879,8 +975,11 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, if (CInfo.Kind == BPFPreserveFieldInfoAI) { // Load the global variable which represents the returned field info. - auto *LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV, "", - Call); + LoadInst *LDInst; + if (IsInt32Ret) + LDInst = new LoadInst(Type::getInt32Ty(BB->getContext()), GV, "", Call); + else + LDInst = new LoadInst(Type::getInt64Ty(BB->getContext()), GV, "", Call); Call->replaceAllUsesWith(LDInst); Call->eraseFromParent(); return true; diff --git a/llvm/lib/Target/BPF/BPFCORE.h b/llvm/lib/Target/BPF/BPFCORE.h index af6425b16fa01..ebc60baea4dcd 100644 --- a/llvm/lib/Target/BPF/BPFCORE.h +++ b/llvm/lib/Target/BPF/BPFCORE.h @@ -24,6 +24,10 @@ class BPFCoreSharedInfo { FIELD_RSHIFT_U64, BTF_TYPE_ID_LOCAL, BTF_TYPE_ID_REMOTE, + TYPE_EXISTENCE, + TYPE_SIZE, + ENUM_VALUE_EXISTENCE, + ENUM_VALUE, MAX_FIELD_RELOC_KIND, }; @@ -35,6 +39,20 @@ class BPFCoreSharedInfo { MAX_BTF_TYPE_ID_FLAG, }; + enum PreserveTypeInfo : uint32_t { + PRESERVE_TYPE_INFO_EXISTENCE = 0, + PRESERVE_TYPE_INFO_SIZE, + + MAX_PRESERVE_TYPE_INFO_FLAG, + }; + + enum PreserveEnumValue : uint32_t { + PRESERVE_ENUM_VALUE_EXISTENCE = 0, + PRESERVE_ENUM_VALUE, + + MAX_PRESERVE_ENUM_VALUE_FLAG, + }; + /// The attribute attached to globals representing a field access static constexpr StringRef AmaAttr = "btf_ama"; /// The attribute attached to globals representing a type id diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index 13999d800a800..709e599cd6b8f 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -994,12 +994,13 @@ void BTFDebug::generatePatchImmReloc(const MCSymbol *ORSym, uint32_t RootId, FieldReloc.OffsetNameOff = addString(IndexPattern); FieldReloc.RelocKind = std::stoull(std::string(RelocKindStr)); - PatchImms[GVar] = std::stoul(std::string(PatchImmStr)); + PatchImms[GVar] = std::make_pair(std::stoll(std::string(PatchImmStr)), + FieldReloc.RelocKind); } else { StringRef RelocStr = AccessPattern.substr(FirstDollar + 1); FieldReloc.OffsetNameOff = addString("0"); FieldReloc.RelocKind = std::stoull(std::string(RelocStr)); - PatchImms[GVar] = RootId; + PatchImms[GVar] = std::make_pair(RootId, FieldReloc.RelocKind); } FieldRelocTable[SecNameOff].push_back(FieldReloc); } @@ -1209,14 +1210,21 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) { auto *GVar = dyn_cast(GVal); if (GVar) { // Emit "mov ri, " - uint32_t Imm; + int64_t Imm; + uint32_t Reloc; if (GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr) || - GVar->hasAttribute(BPFCoreSharedInfo::TypeIdAttr)) - Imm = PatchImms[GVar]; - else + GVar->hasAttribute(BPFCoreSharedInfo::TypeIdAttr)) { + Imm = PatchImms[GVar].first; + Reloc = PatchImms[GVar].second; + } else { return false; + } - OutMI.setOpcode(BPF::MOV_ri); + if (Reloc == BPFCoreSharedInfo::ENUM_VALUE_EXISTENCE || + Reloc == BPFCoreSharedInfo::ENUM_VALUE) + OutMI.setOpcode(BPF::LD_imm64); + else + OutMI.setOpcode(BPF::MOV_ri); OutMI.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); OutMI.addOperand(MCOperand::createImm(Imm)); return true; @@ -1230,7 +1238,7 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) { const GlobalValue *GVal = MO.getGlobal(); auto *GVar = dyn_cast(GVal); if (GVar && GVar->hasAttribute(BPFCoreSharedInfo::AmaAttr)) { - uint32_t Imm = PatchImms[GVar]; + uint32_t Imm = PatchImms[GVar].first; OutMI.setOpcode(MI->getOperand(1).getImm()); if (MI->getOperand(0).isImm()) OutMI.addOperand(MCOperand::createImm(MI->getOperand(0).getImm())); diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index 2f39f665299a5..db5b5633f6d90 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -251,7 +251,7 @@ class BTFDebug : public DebugHandlerBase { StringMap> FileContent; std::map> DataSecEntries; std::vector StructTypes; - std::map PatchImms; + std::map> PatchImms; std::map>> FixupDerivedTypes; std::setProtoFunctions; diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-enum-value.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-enum-value.ll new file mode 100644 index 0000000000000..3a77f791fb6f9 --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-enum-value.ll @@ -0,0 +1,99 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s +; RUN: llc -march=bpfel -mattr=+alu32 -filetype=asm -o - %s | FileCheck %s +; +; Source: +; enum AA { VAL1 = -100, VAL2 = 0xffff8000 }; +; typedef enum { VAL10 = 0xffffFFFF80000000 } __BB; +; int test() { +; return __builtin_preserve_enum_value(*(enum AA *)VAL1, 0) + +; __builtin_preserve_enum_value(*(enum AA *)VAL2, 1) + +; __builtin_preserve_enum_value(*(__BB *)VAL10, 1); +; } +; Compiler flag to generate IR: +; clang -target bpf -S -O2 -g -emit-llvm t1.c + +@0 = private unnamed_addr constant [10 x i8] c"VAL1:-100\00", align 1 +@1 = private unnamed_addr constant [16 x i8] c"VAL2:4294934528\00", align 1 +@2 = private unnamed_addr constant [18 x i8] c"VAL10:-2147483648\00", align 1 + +; Function Attrs: nounwind readnone +define dso_local i32 @test() local_unnamed_addr #0 !dbg !18 { +entry: + %0 = tail call i64 @llvm.bpf.preserve.enum.value(i32 0, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @0, i64 0, i64 0), i64 0), !dbg !23, !llvm.preserve.access.index !3 + %1 = tail call i64 @llvm.bpf.preserve.enum.value(i32 1, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @1, i64 0, i64 0), i64 1), !dbg !24, !llvm.preserve.access.index !3 + %add = add i64 %1, %0, !dbg !25 + %2 = tail call i64 @llvm.bpf.preserve.enum.value(i32 2, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @2, i64 0, i64 0), i64 1), !dbg !26, !llvm.preserve.access.index !13 + %add1 = add i64 %add, %2, !dbg !27 + %conv = trunc i64 %add1 to i32, !dbg !23 + ret i32 %conv, !dbg !28 +} + +; CHECK: r{{[0-9]+}} = 1 ll +; CHECK: r{{[0-9]+}} = 4294934528 ll +; CHECK: r{{[0-9]+}} = -2147483648 ll +; CHECK: exit + +; CHECK: .long 16 # BTF_KIND_ENUM(id = 4) +; CHECK: .long 57 # BTF_KIND_TYPEDEF(id = 5) + +; CHECK: .ascii ".text" # string offset=10 +; CHECK: .ascii "AA" # string offset=16 +; CHECK: .byte 48 # string offset=29 +; CHECK: .byte 49 # string offset=55 +; CHECK: .ascii "__BB" # string offset=57 + +; CHECK: .long 16 # FieldReloc +; CHECK-NEXT: .long 10 # Field reloc section string offset=10 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 29 +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 55 +; CHECK-NEXT: .long 11 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 29 +; CHECK-NEXT: .long 11 + +; Function Attrs: nounwind readnone +declare i64 @llvm.bpf.preserve.enum.value(i32, i8*, i64) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!14, !15, !16} +!llvm.ident = !{!17} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project.git d8b1394a0f4bbf57c254f69f8d3aa5381a89b5cd)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !12, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "t1.c", directory: "/tmp/home/yhs/tmp1") +!2 = !{!3, !8} +!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "AA", file: !1, line: 1, baseType: !4, size: 64, elements: !5) +!4 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) +!5 = !{!6, !7} +!6 = !DIEnumerator(name: "VAL1", value: -100) +!7 = !DIEnumerator(name: "VAL2", value: 4294934528) +!8 = !DICompositeType(tag: DW_TAG_enumeration_type, file: !1, line: 2, baseType: !9, size: 64, elements: !10) +!9 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned) +!10 = !{!11} +!11 = !DIEnumerator(name: "VAL10", value: 18446744071562067968, isUnsigned: true) +!12 = !{!13} +!13 = !DIDerivedType(tag: DW_TAG_typedef, name: "__BB", file: !1, line: 2, baseType: !8) +!14 = !{i32 7, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!16 = !{i32 1, !"wchar_size", i32 4} +!17 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git d8b1394a0f4bbf57c254f69f8d3aa5381a89b5cd)"} +!18 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 3, type: !19, scopeLine: 3, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !22) +!19 = !DISubroutineType(types: !20) +!20 = !{!21} +!21 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!22 = !{} +!23 = !DILocation(line: 4, column: 10, scope: !18) +!24 = !DILocation(line: 5, column: 10, scope: !18) +!25 = !DILocation(line: 4, column: 61, scope: !18) +!26 = !DILocation(line: 6, column: 10, scope: !18) +!27 = !DILocation(line: 5, column: 61, scope: !18) +!28 = !DILocation(line: 4, column: 3, scope: !18) diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-exist.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-exist.ll new file mode 100644 index 0000000000000..1d5dcbb9ffa35 --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-exist.ll @@ -0,0 +1,98 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s +; RUN: llc -march=bpfel -mattr=+alu32 -filetype=asm -o - %s | FileCheck %s +; +; Source: +; enum AA { VAL = 100 }; +; typedef int (*func_t)(void); +; struct s2 { int a[10]; }; +; int test() { +; return __builtin_preserve_type_info(*(func_t *)0, 0) + +; __builtin_preserve_type_info(*(struct s2 *)0, 0) + +; __builtin_preserve_type_info(*(enum AA *)0, 0); +; } +; Compiler flag to generate IR: +; clang -target bpf -S -O2 -g -emit-llvm t1.c + +; Function Attrs: nounwind readnone +define dso_local i32 @test() local_unnamed_addr #0 !dbg !17 { +entry: + %0 = tail call i32 @llvm.bpf.preserve.type.info(i32 0, i64 0), !dbg !19, !llvm.preserve.access.index !8 + %1 = tail call i32 @llvm.bpf.preserve.type.info(i32 1, i64 0), !dbg !20, !llvm.preserve.access.index !21 + %add = add i32 %1, %0, !dbg !27 + %2 = tail call i32 @llvm.bpf.preserve.type.info(i32 2, i64 0), !dbg !28, !llvm.preserve.access.index !3 + %add1 = add i32 %add, %2, !dbg !29 + ret i32 %add1, !dbg !30 +} + +; CHECK: r{{[0-9]+}} = 1 +; CHECK: r{{[0-9]+}} = 1 +; CHECK: r{{[0-9]+}} = 1 +; CHECK: exit + +; CHECK: .long 16 # BTF_KIND_TYPEDEF(id = 4) +; CHECK: .long 49 # BTF_KIND_STRUCT(id = 7) +; CHECK: .long 74 # BTF_KIND_ENUM(id = 10) + +; CHECK: .ascii ".text" # string offset=10 +; CHECK: .ascii "func_t" # string offset=16 +; CHECK: .byte 48 # string offset=23 +; CHECK: .ascii "s2" # string offset=49 +; CHECK: .ascii "AA" # string offset=74 + +; CHECK: .long 16 # FieldReloc +; CHECK-NEXT: .long 10 # Field reloc section string offset=10 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 23 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .long 23 +; CHECK-NEXT: .long 8 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .long 23 +; CHECK-NEXT: .long 8 + +; Function Attrs: nounwind readnone +declare i32 @llvm.bpf.preserve.type.info(i32, i64) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!13, !14, !15} +!llvm.ident = !{!16} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project.git d8b1394a0f4bbf57c254f69f8d3aa5381a89b5cd)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !7, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "t1.c", directory: "/tmp/home/yhs/tmp1") +!2 = !{!3} +!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "AA", file: !1, line: 1, baseType: !4, size: 32, elements: !5) +!4 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!5 = !{!6} +!6 = !DIEnumerator(name: "VAL", value: 100, isUnsigned: true) +!7 = !{!8} +!8 = !DIDerivedType(tag: DW_TAG_typedef, name: "func_t", file: !1, line: 2, baseType: !9) +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{i32 7, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{i32 1, !"wchar_size", i32 4} +!16 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git d8b1394a0f4bbf57c254f69f8d3aa5381a89b5cd)"} +!17 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !10, scopeLine: 4, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18) +!18 = !{} +!19 = !DILocation(line: 5, column: 10, scope: !17) +!20 = !DILocation(line: 6, column: 10, scope: !17) +!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s2", file: !1, line: 3, size: 320, elements: !22) +!22 = !{!23} +!23 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !21, file: !1, line: 3, baseType: !24, size: 320) +!24 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, size: 320, elements: !25) +!25 = !{!26} +!26 = !DISubrange(count: 10) +!27 = !DILocation(line: 5, column: 56, scope: !17) +!28 = !DILocation(line: 7, column: 10, scope: !17) +!29 = !DILocation(line: 6, column: 59, scope: !17) +!30 = !DILocation(line: 5, column: 3, scope: !17) diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-size-1.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-size-1.ll new file mode 100644 index 0000000000000..ee80a1bd560dc --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-size-1.ll @@ -0,0 +1,98 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s +; RUN: llc -march=bpfel -mattr=+alu32 -filetype=asm -o - %s | FileCheck %s +; +; Source: +; enum AA { VAL = 100 }; +; typedef int (*func_t)(void); +; struct s2 { int a[10]; }; +; int test() { +; return __builtin_preserve_type_info(*(func_t *)0, 1) + +; __builtin_preserve_type_info(*(struct s2 *)0, 1) + +; __builtin_preserve_type_info(*(enum AA *)0, 1); +; } +; Compiler flag to generate IR: +; clang -target bpf -S -O2 -g -emit-llvm t1.c + +; Function Attrs: nounwind readnone +define dso_local i32 @test() local_unnamed_addr #0 !dbg !17 { +entry: + %0 = tail call i32 @llvm.bpf.preserve.type.info(i32 0, i64 1), !dbg !19, !llvm.preserve.access.index !8 + %1 = tail call i32 @llvm.bpf.preserve.type.info(i32 1, i64 1), !dbg !20, !llvm.preserve.access.index !21 + %add = add i32 %1, %0, !dbg !27 + %2 = tail call i32 @llvm.bpf.preserve.type.info(i32 2, i64 1), !dbg !28, !llvm.preserve.access.index !3 + %add1 = add i32 %add, %2, !dbg !29 + ret i32 %add1, !dbg !30 +} + +; CHECK: r{{[0-9]+}} = 8 +; CHECK: r{{[0-9]+}} = 40 +; CHECK: r{{[0-9]+}} = 4 +; CHECK: exit + +; CHECK: .long 16 # BTF_KIND_TYPEDEF(id = 4) +; CHECK: .long 49 # BTF_KIND_STRUCT(id = 7) +; CHECK: .long 74 # BTF_KIND_ENUM(id = 10) + +; CHECK: .ascii ".text" # string offset=10 +; CHECK: .ascii "func_t" # string offset=16 +; CHECK: .byte 48 # string offset=23 +; CHECK: .ascii "s2" # string offset=49 +; CHECK: .ascii "AA" # string offset=74 + +; CHECK: .long 16 # FieldReloc +; CHECK-NEXT: .long 10 # Field reloc section string offset=10 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 23 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .long 23 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .long 23 +; CHECK-NEXT: .long 9 + +; Function Attrs: nounwind readnone +declare i32 @llvm.bpf.preserve.type.info(i32, i64) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!13, !14, !15} +!llvm.ident = !{!16} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project.git d8b1394a0f4bbf57c254f69f8d3aa5381a89b5cd)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !7, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "t1.c", directory: "/tmp/home/yhs/tmp1") +!2 = !{!3} +!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "AA", file: !1, line: 1, baseType: !4, size: 32, elements: !5) +!4 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!5 = !{!6} +!6 = !DIEnumerator(name: "VAL", value: 100, isUnsigned: true) +!7 = !{!8} +!8 = !DIDerivedType(tag: DW_TAG_typedef, name: "func_t", file: !1, line: 2, baseType: !9) +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{i32 7, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{i32 1, !"wchar_size", i32 4} +!16 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git d8b1394a0f4bbf57c254f69f8d3aa5381a89b5cd)"} +!17 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !10, scopeLine: 4, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18) +!18 = !{} +!19 = !DILocation(line: 5, column: 10, scope: !17) +!20 = !DILocation(line: 6, column: 10, scope: !17) +!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s2", file: !1, line: 3, size: 320, elements: !22) +!22 = !{!23} +!23 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !21, file: !1, line: 3, baseType: !24, size: 320) +!24 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, size: 320, elements: !25) +!25 = !{!26} +!26 = !DISubrange(count: 10) +!27 = !DILocation(line: 5, column: 56, scope: !17) +!28 = !DILocation(line: 7, column: 10, scope: !17) +!29 = !DILocation(line: 6, column: 59, scope: !17) +!30 = !DILocation(line: 5, column: 3, scope: !17) diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-size-2.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-size-2.ll new file mode 100644 index 0000000000000..35f2ae5ce660d --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-typeinfo-type-size-2.ll @@ -0,0 +1,114 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s +; RUN: llc -march=bpfel -mattr=+alu32 -filetype=asm -o - %s | FileCheck %s +; +; Source: +; enum AA { VAL = 100 }; +; typedef int (*func_t)(void); +; struct s2 { int a[10]; }; +; int test() { +; func_t f; +; struct s2 s; +; enum AA a; +; return __builtin_preserve_type_info(f, 1) + +; __builtin_preserve_type_info(s, 1) + +; __builtin_preserve_type_info(a, 1); +; } +; Compiler flag to generate IR: +; clang -target bpf -S -O2 -g -emit-llvm t1.c + +; Function Attrs: nounwind readnone +define dso_local i32 @test() local_unnamed_addr #0 !dbg !17 { +entry: + call void @llvm.dbg.declare(metadata [10 x i32]* undef, metadata !20, metadata !DIExpression()), !dbg !28 + call void @llvm.dbg.declare(metadata i32 ()** undef, metadata !19, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.declare(metadata i32* undef, metadata !27, metadata !DIExpression()), !dbg !30 + %0 = tail call i32 @llvm.bpf.preserve.type.info(i32 0, i64 1), !dbg !31, !llvm.preserve.access.index !8 + %1 = tail call i32 @llvm.bpf.preserve.type.info(i32 1, i64 1), !dbg !32, !llvm.preserve.access.index !21 + %add = add i32 %1, %0, !dbg !33 + %2 = tail call i32 @llvm.bpf.preserve.type.info(i32 2, i64 1), !dbg !34, !llvm.preserve.access.index !3 + %add1 = add i32 %add, %2, !dbg !35 + ret i32 %add1, !dbg !36 +} + +; CHECK: r{{[0-9]+}} = 8 +; CHECK: r{{[0-9]+}} = 40 +; CHECK: r{{[0-9]+}} = 4 +; CHECK: exit + +; CHECK: .long 16 # BTF_KIND_TYPEDEF(id = 4) +; CHECK: .long 49 # BTF_KIND_STRUCT(id = 7) +; CHECK: .long 74 # BTF_KIND_ENUM(id = 10) + +; CHECK: .ascii ".text" # string offset=10 +; CHECK: .ascii "func_t" # string offset=16 +; CHECK: .byte 48 # string offset=23 +; CHECK: .ascii "s2" # string offset=49 +; CHECK: .ascii "AA" # string offset=74 + +; CHECK: .long 16 # FieldReloc +; CHECK-NEXT: .long 10 # Field reloc section string offset=10 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 23 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .long 23 +; CHECK-NEXT: .long 9 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .long 23 +; CHECK-NEXT: .long 9 + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind readnone +declare i32 @llvm.bpf.preserve.type.info(i32, i64) #2 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable willreturn } +attributes #2 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!13, !14, !15} +!llvm.ident = !{!16} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project.git d8b1394a0f4bbf57c254f69f8d3aa5381a89b5cd)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !7, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "t1.c", directory: "/tmp/home/yhs/tmp1") +!2 = !{!3} +!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "AA", file: !1, line: 1, baseType: !4, size: 32, elements: !5) +!4 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!5 = !{!6} +!6 = !DIEnumerator(name: "VAL", value: 100, isUnsigned: true) +!7 = !{!8} +!8 = !DIDerivedType(tag: DW_TAG_typedef, name: "func_t", file: !1, line: 2, baseType: !9) +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{i32 7, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{i32 1, !"wchar_size", i32 4} +!16 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git d8b1394a0f4bbf57c254f69f8d3aa5381a89b5cd)"} +!17 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !10, scopeLine: 4, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18) +!18 = !{!19, !20, !27} +!19 = !DILocalVariable(name: "f", scope: !17, file: !1, line: 5, type: !8) +!20 = !DILocalVariable(name: "s", scope: !17, file: !1, line: 6, type: !21) +!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s2", file: !1, line: 3, size: 320, elements: !22) +!22 = !{!23} +!23 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !21, file: !1, line: 3, baseType: !24, size: 320) +!24 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, size: 320, elements: !25) +!25 = !{!26} +!26 = !DISubrange(count: 10) +!27 = !DILocalVariable(name: "a", scope: !17, file: !1, line: 7, type: !3) +!28 = !DILocation(line: 6, column: 13, scope: !17) +!29 = !DILocation(line: 5, column: 10, scope: !17) +!30 = !DILocation(line: 7, column: 11, scope: !17) +!31 = !DILocation(line: 8, column: 10, scope: !17) +!32 = !DILocation(line: 9, column: 10, scope: !17) +!33 = !DILocation(line: 8, column: 45, scope: !17) +!34 = !DILocation(line: 10, column: 10, scope: !17) +!35 = !DILocation(line: 9, column: 45, scope: !17) +!36 = !DILocation(line: 8, column: 3, scope: !17) From 0f2b47b6da0be5e5a597a274bd5c2ae18cc406bd Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Tue, 4 Aug 2020 14:26:23 -0500 Subject: [PATCH 361/600] [FastISel] Don't transform FSUB(-0, X) -> FNEG(X) in FastISel This corresponds with the SelectionDAGISel change in D84056. Also, rename some poorly named tests in CodeGen/X86/fast-isel-fneg.ll with NFC. Differential Revision: https://reviews.llvm.org/D85149 --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 7 +- llvm/test/CodeGen/X86/fast-isel-fneg-kill.ll | 22 ------ llvm/test/CodeGen/X86/fast-isel-fneg.ll | 79 ++++---------------- 3 files changed, 17 insertions(+), 91 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/fast-isel-fneg-kill.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index fc6c3a145f132..1b924037c3be0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1845,13 +1845,8 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return selectBinaryOp(I, ISD::FADD); case Instruction::Sub: return selectBinaryOp(I, ISD::SUB); - case Instruction::FSub: { - // FNeg is currently represented in LLVM IR as a special case of FSub. - Value *X; - if (match(I, m_FNeg(m_Value(X)))) - return selectFNeg(I, X); + case Instruction::FSub: return selectBinaryOp(I, ISD::FSUB); - } case Instruction::Mul: return selectBinaryOp(I, ISD::MUL); case Instruction::FMul: diff --git a/llvm/test/CodeGen/X86/fast-isel-fneg-kill.ll b/llvm/test/CodeGen/X86/fast-isel-fneg-kill.ll deleted file mode 100644 index e55b3afb812be..0000000000000 --- a/llvm/test/CodeGen/X86/fast-isel-fneg-kill.ll +++ /dev/null @@ -1,22 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-apple-darwin10 -stop-after=finalize-isel | FileCheck %s - -; Make sure we output the right kill flag for the xor conversion. - -define void @goo(double* %x, double* %y) nounwind { -; CHECK: %[[REG2:.*]]:gr64 = COPY $rsi -; CHECK-NEXT: %[[REG0:.*]]:gr64 = COPY $rdi -; CHECK-NEXT: %[[REG1:.*]]:gr64 = COPY killed %[[REG0]] -; CHECK-NEXT: %[[REG3:.*]]:gr64 = COPY killed %[[REG2]] -; CHECK-NEXT: %[[REG10:.*]]:fr64 = MOVSDrm_alt %[[REG1]], 1, $noreg, 0, $noreg :: (load 8 from %ir.x) -; CHECK-NEXT: %[[REG6:.*]]:gr64 = MOVSDto64rr killed %[[REG10]] -; CHECK-NEXT: %[[REG7:.*]]:gr64 = MOV64ri -9223372036854775808 -; CHECK-NEXT: %[[REG8:.*]]:gr64 = XOR64rr killed %[[REG6]], killed %[[REG7]], implicit-def $eflags -; CHECK-NEXT: %[[REG9:.*]]:fr64 = MOV64toSDrr killed %[[REG8]] -; CHECK-NEXT: MOVSDmr %[[REG3]], 1, $noreg, 0, $noreg, killed %[[REG9]] :: (store 8 into %ir.y) -; CHECK-NEXT: RETQ - %a = load double, double* %x - %b = fsub double -0.0, %a - store double %b, double* %y - ret void -} diff --git a/llvm/test/CodeGen/X86/fast-isel-fneg.ll b/llvm/test/CodeGen/X86/fast-isel-fneg.ll index beb454ece26b7..d575a277cf0f9 100644 --- a/llvm/test/CodeGen/X86/fast-isel-fneg.ll +++ b/llvm/test/CodeGen/X86/fast-isel-fneg.ll @@ -2,8 +2,8 @@ ; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-apple-darwin10 | FileCheck %s ; RUN: llc < %s -fast-isel -mtriple=i686-- -mattr=+sse2 | FileCheck --check-prefix=SSE2 %s -define double @doo(double %x) nounwind { -; CHECK-LABEL: doo: +define double @fneg_f64(double %x) nounwind { +; CHECK-LABEL: fneg_f64: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq %xmm0, %rax ; CHECK-NEXT: movabsq $-9223372036854775808, %rcx ## imm = 0x8000000000000000 @@ -11,7 +11,7 @@ define double @doo(double %x) nounwind { ; CHECK-NEXT: movq %rcx, %xmm0 ; CHECK-NEXT: retq ; -; SSE2-LABEL: doo: +; SSE2-LABEL: fneg_f64: ; SSE2: # %bb.0: ; SSE2-NEXT: pushl %ebp ; SSE2-NEXT: movl %esp, %ebp @@ -24,19 +24,19 @@ define double @doo(double %x) nounwind { ; SSE2-NEXT: movl %ebp, %esp ; SSE2-NEXT: popl %ebp ; SSE2-NEXT: retl - %y = fsub double -0.0, %x + %y = fneg double %x ret double %y } -define float @foo(float %x) nounwind { -; CHECK-LABEL: foo: +define float @fneg_f32(float %x) nounwind { +; CHECK-LABEL: fneg_f32: ; CHECK: ## %bb.0: ; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: xorl $2147483648, %eax ## imm = 0x80000000 ; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: retq ; -; SSE2-LABEL: foo: +; SSE2-LABEL: fneg_f32: ; SSE2: # %bb.0: ; SSE2-NEXT: pushl %eax ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -45,12 +45,12 @@ define float @foo(float %x) nounwind { ; SSE2-NEXT: flds (%esp) ; SSE2-NEXT: popl %eax ; SSE2-NEXT: retl - %y = fsub float -0.0, %x + %y = fneg float %x ret float %y } -define void @goo(double* %x, double* %y) nounwind { -; CHECK-LABEL: goo: +define void @fneg_f64_mem(double* %x, double* %y) nounwind { +; CHECK-LABEL: fneg_f64_mem: ; CHECK: ## %bb.0: ; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movq %xmm0, %rax @@ -60,7 +60,7 @@ define void @goo(double* %x, double* %y) nounwind { ; CHECK-NEXT: movq %xmm0, (%rsi) ; CHECK-NEXT: retq ; -; SSE2-LABEL: goo: +; SSE2-LABEL: fneg_f64_mem: ; SSE2: # %bb.0: ; SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -69,13 +69,13 @@ define void @goo(double* %x, double* %y) nounwind { ; SSE2-NEXT: movsd %xmm0, (%eax) ; SSE2-NEXT: retl %a = load double, double* %x - %b = fsub double -0.0, %a + %b = fneg double %a store double %b, double* %y ret void } -define void @loo(float* %x, float* %y) nounwind { -; CHECK-LABEL: loo: +define void @fneg_f32_mem(float* %x, float* %y) nounwind { +; CHECK-LABEL: fneg_f32_mem: ; CHECK: ## %bb.0: ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movd %xmm0, %eax @@ -84,7 +84,7 @@ define void @loo(float* %x, float* %y) nounwind { ; CHECK-NEXT: movd %xmm0, (%rsi) ; CHECK-NEXT: retq ; -; SSE2-LABEL: loo: +; SSE2-LABEL: fneg_f32_mem: ; SSE2: # %bb.0: ; SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -95,54 +95,7 @@ define void @loo(float* %x, float* %y) nounwind { ; SSE2-NEXT: movd %xmm0, (%eax) ; SSE2-NEXT: retl %a = load float, float* %x - %b = fsub float -0.0, %a + %b = fneg float %a store float %b, float* %y ret void } - -define double @too(double %x) nounwind { -; CHECK-LABEL: too: -; CHECK: ## %bb.0: -; CHECK-NEXT: movq %xmm0, %rax -; CHECK-NEXT: movabsq $-9223372036854775808, %rcx ## imm = 0x8000000000000000 -; CHECK-NEXT: xorq %rax, %rcx -; CHECK-NEXT: movq %rcx, %xmm0 -; CHECK-NEXT: retq -; -; SSE2-LABEL: too: -; SSE2: # %bb.0: -; SSE2-NEXT: pushl %ebp -; SSE2-NEXT: movl %esp, %ebp -; SSE2-NEXT: andl $-8, %esp -; SSE2-NEXT: subl $8, %esp -; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: xorps {{\.LCPI.*}}, %xmm0 -; SSE2-NEXT: movlps %xmm0, (%esp) -; SSE2-NEXT: fldl (%esp) -; SSE2-NEXT: movl %ebp, %esp -; SSE2-NEXT: popl %ebp -; SSE2-NEXT: retl - %y = fneg double %x - ret double %y -} - -define float @zoo(float %x) nounwind { -; CHECK-LABEL: zoo: -; CHECK: ## %bb.0: -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: xorl $2147483648, %eax ## imm = 0x80000000 -; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: retq -; -; SSE2-LABEL: zoo: -; SSE2: # %bb.0: -; SSE2-NEXT: pushl %eax -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: xorps {{\.LCPI.*}}, %xmm0 -; SSE2-NEXT: movss %xmm0, (%esp) -; SSE2-NEXT: flds (%esp) -; SSE2-NEXT: popl %eax -; SSE2-NEXT: retl - %y = fneg float %x - ret float %y -} From 1d6a724aa1c11a37ff083cf637f91852e96ce11f Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 4 Aug 2020 11:46:26 -0700 Subject: [PATCH 362/600] [MLIR] Change FunctionType::get() and TupleType::get() to use TypeRange - Moved TypeRange into its own header/cpp file, and add hashing support. - Change FunctionType::get() and TupleType::get() to use TypeRange Differential Revision: https://reviews.llvm.org/D85075 --- flang/lib/Lower/RTBuilder.h | 2 +- mlir/include/mlir/IR/Builders.h | 4 +- mlir/include/mlir/IR/OperationSupport.h | 99 +---------- mlir/include/mlir/IR/StandardTypes.h | 4 +- mlir/include/mlir/IR/TypeRange.h | 181 +++++++++++++++++++++ mlir/include/mlir/IR/Types.h | 7 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 8 +- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 3 +- mlir/lib/IR/Builders.cpp | 5 +- mlir/lib/IR/CMakeLists.txt | 1 + mlir/lib/IR/OperationSupport.cpp | 39 ----- mlir/lib/IR/StandardTypes.cpp | 5 +- mlir/lib/IR/TypeDetail.h | 10 +- mlir/lib/IR/TypeRange.cpp | 50 ++++++ mlir/lib/IR/Types.cpp | 2 +- 15 files changed, 257 insertions(+), 163 deletions(-) create mode 100644 mlir/include/mlir/IR/TypeRange.h create mode 100644 mlir/lib/IR/TypeRange.cpp diff --git a/flang/lib/Lower/RTBuilder.h b/flang/lib/Lower/RTBuilder.h index 2f66fa8efac0f..3855f6816d6e6 100644 --- a/flang/lib/Lower/RTBuilder.h +++ b/flang/lib/Lower/RTBuilder.h @@ -168,7 +168,7 @@ constexpr TypeBuilderFunc getModel() { return [](mlir::MLIRContext *context) -> mlir::Type { // FIXME: a namelist group must be some well-defined data structure, use a // tuple as a proxy for the moment - return mlir::TupleType::get(llvm::None, context); + return mlir::TupleType::get(context); }; } template <> diff --git a/mlir/include/mlir/IR/Builders.h b/mlir/include/mlir/IR/Builders.h index 4256727905f57..c27585a6e3437 100644 --- a/mlir/include/mlir/IR/Builders.h +++ b/mlir/include/mlir/IR/Builders.h @@ -73,8 +73,8 @@ class Builder { IntegerType getI64Type(); IntegerType getIntegerType(unsigned width); IntegerType getIntegerType(unsigned width, bool isSigned); - FunctionType getFunctionType(ArrayRef inputs, ArrayRef results); - TupleType getTupleType(ArrayRef elementTypes); + FunctionType getFunctionType(TypeRange inputs, TypeRange results); + TupleType getTupleType(TypeRange elementTypes); NoneType getNoneType(); /// Get or construct an instance of the type 'ty' with provided arguments. diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index e3afaf3161540..23a37cc2e2a9e 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -17,6 +17,7 @@ #include "mlir/IR/Attributes.h" #include "mlir/IR/Identifier.h" #include "mlir/IR/Location.h" +#include "mlir/IR/TypeRange.h" #include "mlir/IR/Types.h" #include "mlir/IR/Value.h" #include "mlir/Support/InterfaceSupport.h" @@ -624,104 +625,6 @@ class OpPrintingFlags { // Operation Value-Iterators //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// TypeRange - -/// This class provides an abstraction over the various different ranges of -/// value types. In many cases, this prevents the need to explicitly materialize -/// a SmallVector/std::vector. This class should be used in places that are not -/// suitable for a more derived type (e.g. ArrayRef) or a template range -/// parameter. -class TypeRange - : public llvm::detail::indexed_accessor_range_base< - TypeRange, - llvm::PointerUnion, Type, - Type, Type> { -public: - using RangeBaseT::RangeBaseT; - TypeRange(ArrayRef types = llvm::None); - explicit TypeRange(OperandRange values); - explicit TypeRange(ResultRange values); - explicit TypeRange(ValueRange values); - explicit TypeRange(ArrayRef values); - explicit TypeRange(ArrayRef values) - : TypeRange(ArrayRef(values.data(), values.size())) {} - template - TypeRange(ValueTypeRange values) - : TypeRange(ValueRangeT(values.begin().getCurrent(), - values.end().getCurrent())) {} - template , Arg>::value>> - TypeRange(Arg &&arg) : TypeRange(ArrayRef(std::forward(arg))) {} - TypeRange(std::initializer_list types) - : TypeRange(ArrayRef(types)) {} - -private: - /// The owner of the range is either: - /// * A pointer to the first element of an array of values. - /// * A pointer to the first element of an array of types. - /// * A pointer to the first element of an array of operands. - using OwnerT = llvm::PointerUnion; - - /// See `llvm::detail::indexed_accessor_range_base` for details. - static OwnerT offset_base(OwnerT object, ptrdiff_t index); - /// See `llvm::detail::indexed_accessor_range_base` for details. - static Type dereference_iterator(OwnerT object, ptrdiff_t index); - - /// Allow access to `offset_base` and `dereference_iterator`. - friend RangeBaseT; -}; - -//===----------------------------------------------------------------------===// -// ValueTypeRange - -/// This class implements iteration on the types of a given range of values. -template -class ValueTypeIterator final - : public llvm::mapped_iterator { - static Type unwrap(Value value) { return value.getType(); } - -public: - using reference = Type; - - /// Provide a const dereference method. - Type operator*() const { return unwrap(*this->I); } - - /// Initializes the type iterator to the specified value iterator. - ValueTypeIterator(ValueIteratorT it) - : llvm::mapped_iterator(it, &unwrap) {} -}; - -/// This class implements iteration on the types of a given range of values. -template -class ValueTypeRange final - : public llvm::iterator_range< - ValueTypeIterator> { -public: - using llvm::iterator_range< - ValueTypeIterator>::iterator_range; - template - ValueTypeRange(Container &&c) : ValueTypeRange(c.begin(), c.end()) {} - - /// Compare this range with another. - template - bool operator==(const OtherT &other) const { - return llvm::size(*this) == llvm::size(other) && - std::equal(this->begin(), this->end(), other.begin()); - } - template - bool operator!=(const OtherT &other) const { - return !(*this == other); - } -}; - -template -inline bool operator==(ArrayRef lhs, const ValueTypeRange &rhs) { - return lhs.size() == static_cast(llvm::size(rhs)) && - std::equal(lhs.begin(), lhs.end(), rhs.begin()); -} - //===----------------------------------------------------------------------===// // OperandRange diff --git a/mlir/include/mlir/IR/StandardTypes.h b/mlir/include/mlir/IR/StandardTypes.h index 1ac24359cbb62..3daf226603a83 100644 --- a/mlir/include/mlir/IR/StandardTypes.h +++ b/mlir/include/mlir/IR/StandardTypes.h @@ -632,10 +632,10 @@ class TupleType /// Get or create a new TupleType with the provided element types. Assumes the /// arguments define a well-formed type. - static TupleType get(ArrayRef elementTypes, MLIRContext *context); + static TupleType get(TypeRange elementTypes, MLIRContext *context); /// Get or create an empty tuple type. - static TupleType get(MLIRContext *context) { return get({}, context); } + static TupleType get(MLIRContext *context); /// Return the elements types for this tuple. ArrayRef getTypes() const; diff --git a/mlir/include/mlir/IR/TypeRange.h b/mlir/include/mlir/IR/TypeRange.h new file mode 100644 index 0000000000000..8e41ad1665f9d --- /dev/null +++ b/mlir/include/mlir/IR/TypeRange.h @@ -0,0 +1,181 @@ +//===- TypeRange.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the TypeRange and ValueTypeRange classes. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_IR_TYPERANGE_H +#define MLIR_IR_TYPERANGE_H + +#include "mlir/IR/Types.h" +#include "mlir/IR/Value.h" +#include "llvm/ADT/PointerUnion.h" + +namespace mlir { +class OperandRange; +class ResultRange; +class Type; +class Value; +class ValueRange; +template +class ValueTypeRange; + +//===----------------------------------------------------------------------===// +// TypeRange + +/// This class provides an abstraction over the various different ranges of +/// value types. In many cases, this prevents the need to explicitly materialize +/// a SmallVector/std::vector. This class should be used in places that are not +/// suitable for a more derived type (e.g. ArrayRef) or a template range +/// parameter. +class TypeRange + : public llvm::detail::indexed_accessor_range_base< + TypeRange, + llvm::PointerUnion, Type, + Type, Type> { +public: + using RangeBaseT::RangeBaseT; + TypeRange(ArrayRef types = llvm::None); + explicit TypeRange(OperandRange values); + explicit TypeRange(ResultRange values); + explicit TypeRange(ValueRange values); + explicit TypeRange(ArrayRef values); + explicit TypeRange(ArrayRef values) + : TypeRange(ArrayRef(values.data(), values.size())) {} + template + TypeRange(ValueTypeRange values) + : TypeRange(ValueRangeT(values.begin().getCurrent(), + values.end().getCurrent())) {} + template , Arg>::value>> + TypeRange(Arg &&arg) : TypeRange(ArrayRef(std::forward(arg))) {} + TypeRange(std::initializer_list types) + : TypeRange(ArrayRef(types)) {} + +private: + /// The owner of the range is either: + /// * A pointer to the first element of an array of values. + /// * A pointer to the first element of an array of types. + /// * A pointer to the first element of an array of operands. + using OwnerT = llvm::PointerUnion; + + /// See `llvm::detail::indexed_accessor_range_base` for details. + static OwnerT offset_base(OwnerT object, ptrdiff_t index); + /// See `llvm::detail::indexed_accessor_range_base` for details. + static Type dereference_iterator(OwnerT object, ptrdiff_t index); + + /// Allow access to `offset_base` and `dereference_iterator`. + friend RangeBaseT; +}; + +/// Make TypeRange hashable. +inline ::llvm::hash_code hash_value(TypeRange arg) { + return ::llvm::hash_combine_range(arg.begin(), arg.end()); +} + +//===----------------------------------------------------------------------===// +// ValueTypeRange + +/// This class implements iteration on the types of a given range of values. +template +class ValueTypeIterator final + : public llvm::mapped_iterator { + static Type unwrap(Value value) { return value.getType(); } + +public: + using reference = Type; + + /// Provide a const dereference method. + Type operator*() const { return unwrap(*this->I); } + + /// Initializes the type iterator to the specified value iterator. + ValueTypeIterator(ValueIteratorT it) + : llvm::mapped_iterator(it, &unwrap) {} +}; + +/// This class implements iteration on the types of a given range of values. +template +class ValueTypeRange final + : public llvm::iterator_range< + ValueTypeIterator> { +public: + using llvm::iterator_range< + ValueTypeIterator>::iterator_range; + template + ValueTypeRange(Container &&c) : ValueTypeRange(c.begin(), c.end()) {} + + /// Compare this range with another. + template + bool operator==(const OtherT &other) const { + return llvm::size(*this) == llvm::size(other) && + std::equal(this->begin(), this->end(), other.begin()); + } + template + bool operator!=(const OtherT &other) const { + return !(*this == other); + } +}; + +template +inline bool operator==(ArrayRef lhs, const ValueTypeRange &rhs) { + return lhs.size() == static_cast(llvm::size(rhs)) && + std::equal(lhs.begin(), lhs.end(), rhs.begin()); +} + +} // namespace mlir + +namespace llvm { + +// Provide DenseMapInfo for TypeRange. +template <> +struct DenseMapInfo { + static mlir::TypeRange getEmptyKey() { + return mlir::TypeRange(getEmptyKeyPointer(), 0); + } + + static mlir::TypeRange getTombstoneKey() { + return mlir::TypeRange(getTombstoneKeyPointer(), 0); + } + + static unsigned getHashValue(mlir::TypeRange val) { return hash_value(val); } + + static bool isEqual(mlir::TypeRange lhs, mlir::TypeRange rhs) { + if (isEmptyKey(rhs)) + return isEmptyKey(lhs); + if (isTombstoneKey(rhs)) + return isTombstoneKey(lhs); + return lhs == rhs; + } + +private: + static const mlir::Type *getEmptyKeyPointer() { + return DenseMapInfo::getEmptyKey(); + } + + static const mlir::Type *getTombstoneKeyPointer() { + return DenseMapInfo::getTombstoneKey(); + } + + static bool isEmptyKey(mlir::TypeRange range) { + if (const auto *type = range.getBase().dyn_cast()) + return type == getEmptyKeyPointer(); + return false; + } + + static bool isTombstoneKey(mlir::TypeRange range) { + if (const auto *type = range.getBase().dyn_cast()) + return type == getTombstoneKeyPointer(); + return false; + } +}; + +} // namespace llvm + +#endif // MLIR_IR_TYPERANGE_H diff --git a/mlir/include/mlir/IR/Types.h b/mlir/include/mlir/IR/Types.h index 83636585c499f..ed63f696a84ca 100644 --- a/mlir/include/mlir/IR/Types.h +++ b/mlir/include/mlir/IR/Types.h @@ -21,6 +21,7 @@ class IndexType; class IntegerType; class MLIRContext; class TypeStorage; +class TypeRange; namespace detail { struct FunctionTypeStorage; @@ -259,21 +260,17 @@ class FunctionType public: using Base::Base; - static FunctionType get(ArrayRef inputs, ArrayRef results, + static FunctionType get(TypeRange inputs, TypeRange results, MLIRContext *context); // Input types. unsigned getNumInputs() const { return getSubclassData(); } - Type getInput(unsigned i) const { return getInputs()[i]; } - ArrayRef getInputs() const; // Result types. unsigned getNumResults() const; - Type getResult(unsigned i) const { return getResults()[i]; } - ArrayRef getResults() const; /// Methods for support type inquiry through isa, cast, and dyn_cast. diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index cc2200e84da57..47129d7bd6151 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -549,15 +549,13 @@ static void printCallOp(OpAsmPrinter &p, CallOp &op) { else p << op.getOperand(0); - p << '(' << op.getOperands().drop_front(isDirect ? 0 : 1) << ')'; + auto args = op.getOperands().drop_front(isDirect ? 0 : 1); + p << '(' << args << ')'; p.printOptionalAttrDict(op.getAttrs(), {"callee"}); // Reconstruct the function MLIR function type from operand and result types. - SmallVector argTypes( - llvm::drop_begin(op.getOperandTypes(), isDirect ? 0 : 1)); - p << " : " - << FunctionType::get(argTypes, op.getResultTypes(), op.getContext()); + << FunctionType::get(args.getTypes(), op.getResultTypes(), op.getContext()); } // ::= `llvm.call` (function-id | ssa-use) `(` ssa-use-list `)` diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index a67e79ac4a7ce..a78e2427b2fe0 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -749,8 +749,7 @@ static LogicalResult verify(CallOp op) { } FunctionType CallOp::getCalleeType() { - SmallVector argTypes(getOperandTypes()); - return FunctionType::get(argTypes, getResultTypes(), getContext()); + return FunctionType::get(getOperandTypes(), getResultTypes(), getContext()); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/Builders.cpp b/mlir/lib/IR/Builders.cpp index d89158ea5d873..69b1a0efb58d7 100644 --- a/mlir/lib/IR/Builders.cpp +++ b/mlir/lib/IR/Builders.cpp @@ -67,12 +67,11 @@ IntegerType Builder::getIntegerType(unsigned width, bool isSigned) { width, isSigned ? IntegerType::Signed : IntegerType::Unsigned, context); } -FunctionType Builder::getFunctionType(ArrayRef inputs, - ArrayRef results) { +FunctionType Builder::getFunctionType(TypeRange inputs, TypeRange results) { return FunctionType::get(inputs, results, context); } -TupleType Builder::getTupleType(ArrayRef elementTypes) { +TupleType Builder::getTupleType(TypeRange elementTypes) { return TupleType::get(elementTypes, context); } diff --git a/mlir/lib/IR/CMakeLists.txt b/mlir/lib/IR/CMakeLists.txt index d90db0832f565..553408f6fb368 100644 --- a/mlir/lib/IR/CMakeLists.txt +++ b/mlir/lib/IR/CMakeLists.txt @@ -22,6 +22,7 @@ add_mlir_library(MLIRIR StandardTypes.cpp SymbolTable.cpp Types.cpp + TypeRange.cpp TypeUtilities.cpp Value.cpp Verifier.cpp diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp index ef2b377cb1f32..b477a8a239003 100644 --- a/mlir/lib/IR/OperationSupport.cpp +++ b/mlir/lib/IR/OperationSupport.cpp @@ -360,45 +360,6 @@ Operation *detail::TrailingOpResult::getOwner() { // Operation Value-Iterators //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// TypeRange - -TypeRange::TypeRange(ArrayRef types) - : TypeRange(types.data(), types.size()) {} -TypeRange::TypeRange(OperandRange values) - : TypeRange(values.begin().getBase(), values.size()) {} -TypeRange::TypeRange(ResultRange values) - : TypeRange(values.getBase()->getResultTypes().slice(values.getStartIndex(), - values.size())) {} -TypeRange::TypeRange(ArrayRef values) - : TypeRange(values.data(), values.size()) {} -TypeRange::TypeRange(ValueRange values) : TypeRange(OwnerT(), values.size()) { - detail::ValueRangeOwner owner = values.begin().getBase(); - if (auto *op = reinterpret_cast(owner.ptr.dyn_cast())) - this->base = op->getResultTypes().drop_front(owner.startIndex).data(); - else if (auto *operand = owner.ptr.dyn_cast()) - this->base = operand; - else - this->base = owner.ptr.get(); -} - -/// See `llvm::detail::indexed_accessor_range_base` for details. -TypeRange::OwnerT TypeRange::offset_base(OwnerT object, ptrdiff_t index) { - if (auto *value = object.dyn_cast()) - return {value + index}; - if (auto *operand = object.dyn_cast()) - return {operand + index}; - return {object.dyn_cast() + index}; -} -/// See `llvm::detail::indexed_accessor_range_base` for details. -Type TypeRange::dereference_iterator(OwnerT object, ptrdiff_t index) { - if (auto *value = object.dyn_cast()) - return (value + index)->getType(); - if (auto *operand = object.dyn_cast()) - return (operand + index)->get().getType(); - return object.dyn_cast()[index]; -} - //===----------------------------------------------------------------------===// // OperandRange diff --git a/mlir/lib/IR/StandardTypes.cpp b/mlir/lib/IR/StandardTypes.cpp index 2d1f8d8eb6f05..70b00cf8963a5 100644 --- a/mlir/lib/IR/StandardTypes.cpp +++ b/mlir/lib/IR/StandardTypes.cpp @@ -638,10 +638,13 @@ LogicalResult mlir::getStridesAndOffset(MemRefType t, /// Get or create a new TupleType with the provided element types. Assumes the /// arguments define a well-formed type. -TupleType TupleType::get(ArrayRef elementTypes, MLIRContext *context) { +TupleType TupleType::get(TypeRange elementTypes, MLIRContext *context) { return Base::get(context, StandardTypes::Tuple, elementTypes); } +/// Get or create an empty tuple type. +TupleType TupleType::get(MLIRContext *context) { return get({}, context); } + /// Return the elements types for this tuple. ArrayRef TupleType::getTypes() const { return getImpl()->getTypes(); } diff --git a/mlir/lib/IR/TypeDetail.h b/mlir/lib/IR/TypeDetail.h index 72f1585be2d07..783983473a388 100644 --- a/mlir/lib/IR/TypeDetail.h +++ b/mlir/lib/IR/TypeDetail.h @@ -15,7 +15,9 @@ #include "mlir/IR/AffineMap.h" #include "mlir/IR/Identifier.h" #include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OperationSupport.h" #include "mlir/IR/StandardTypes.h" +#include "mlir/IR/TypeRange.h" #include "llvm/ADT/bit.h" #include "llvm/Support/TrailingObjects.h" @@ -105,7 +107,7 @@ struct FunctionTypeStorage : public TypeStorage { inputsAndResults(inputsAndResults) {} /// The hash key used for uniquing. - using KeyTy = std::pair, ArrayRef>; + using KeyTy = std::pair; bool operator==(const KeyTy &key) const { return key == KeyTy(getInputs(), getResults()); } @@ -113,7 +115,7 @@ struct FunctionTypeStorage : public TypeStorage { /// Construction. static FunctionTypeStorage *construct(TypeStorageAllocator &allocator, const KeyTy &key) { - ArrayRef inputs = key.first, results = key.second; + TypeRange inputs = key.first, results = key.second; // Copy the inputs and results into the bump pointer. SmallVector types; @@ -320,13 +322,13 @@ struct ComplexTypeStorage : public TypeStorage { struct TupleTypeStorage final : public TypeStorage, public llvm::TrailingObjects { - using KeyTy = ArrayRef; + using KeyTy = TypeRange; TupleTypeStorage(unsigned numTypes) : TypeStorage(numTypes) {} /// Construction. static TupleTypeStorage *construct(TypeStorageAllocator &allocator, - ArrayRef key) { + TypeRange key) { // Allocate a new storage instance. auto byteSize = TupleTypeStorage::totalSizeToAlloc(key.size()); auto rawMem = allocator.allocate(byteSize, alignof(TupleTypeStorage)); diff --git a/mlir/lib/IR/TypeRange.cpp b/mlir/lib/IR/TypeRange.cpp new file mode 100644 index 0000000000000..f3f6fb54c707b --- /dev/null +++ b/mlir/lib/IR/TypeRange.cpp @@ -0,0 +1,50 @@ +//===- TypeRange.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/IR/TypeRange.h" +#include "mlir/IR/Operation.h" +using namespace mlir; + +//===----------------------------------------------------------------------===// +// TypeRange + +TypeRange::TypeRange(ArrayRef types) + : TypeRange(types.data(), types.size()) {} +TypeRange::TypeRange(OperandRange values) + : TypeRange(values.begin().getBase(), values.size()) {} +TypeRange::TypeRange(ResultRange values) + : TypeRange(values.getBase()->getResultTypes().slice(values.getStartIndex(), + values.size())) {} +TypeRange::TypeRange(ArrayRef values) + : TypeRange(values.data(), values.size()) {} +TypeRange::TypeRange(ValueRange values) : TypeRange(OwnerT(), values.size()) { + detail::ValueRangeOwner owner = values.begin().getBase(); + if (auto *op = reinterpret_cast(owner.ptr.dyn_cast())) + this->base = op->getResultTypes().drop_front(owner.startIndex).data(); + else if (auto *operand = owner.ptr.dyn_cast()) + this->base = operand; + else + this->base = owner.ptr.get(); +} + +/// See `llvm::detail::indexed_accessor_range_base` for details. +TypeRange::OwnerT TypeRange::offset_base(OwnerT object, ptrdiff_t index) { + if (const auto *value = object.dyn_cast()) + return {value + index}; + if (auto *operand = object.dyn_cast()) + return {operand + index}; + return {object.dyn_cast() + index}; +} +/// See `llvm::detail::indexed_accessor_range_base` for details. +Type TypeRange::dereference_iterator(OwnerT object, ptrdiff_t index) { + if (const auto *value = object.dyn_cast()) + return (value + index)->getType(); + if (auto *operand = object.dyn_cast()) + return (operand + index)->get().getType(); + return object.dyn_cast()[index]; +} diff --git a/mlir/lib/IR/Types.cpp b/mlir/lib/IR/Types.cpp index 25902c2863bb4..fea2cc6648e3c 100644 --- a/mlir/lib/IR/Types.cpp +++ b/mlir/lib/IR/Types.cpp @@ -34,7 +34,7 @@ void Type::setSubclassData(unsigned val) { impl->setSubclassData(val); } // FunctionType //===----------------------------------------------------------------------===// -FunctionType FunctionType::get(ArrayRef inputs, ArrayRef results, +FunctionType FunctionType::get(TypeRange inputs, TypeRange results, MLIRContext *context) { return Base::get(context, Type::Kind::Function, inputs, results); } From 47f7174ffa71d339c1a65d1dd9a2ac5ff2abc95d Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 18 Mar 2020 08:07:33 -0700 Subject: [PATCH 363/600] [WebAssembly] Use "signed char" instead of "char" in SIMD intrinsics. This allows people to use `int8_t` instead of `char`, -funsigned-char, and generally decouples SIMD from the specialness of `char`. And it makes intrinsics like `__builtin_wasm_add_saturate_s_i8x16` and `__builtin_wasm_add_saturate_u_i8x16` use signed and unsigned element types, respectively. Differential Revision: https://reviews.llvm.org/D85074 --- .../clang/Basic/BuiltinsWebAssembly.def | 58 +++++++++---------- clang/lib/Headers/wasm_simd128.h | 17 +++--- clang/test/CodeGen/builtins-wasm.c | 34 +++++------ 3 files changed, 54 insertions(+), 55 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 39f29740cf56d..c0ac74686cf1c 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -66,67 +66,67 @@ TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i64_f64, "LLid", "nc", "nontrappi TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64_f64, "LLid", "nc", "nontrapping-fptoint") // SIMD builtins -TARGET_BUILTIN(__builtin_wasm_swizzle_v8x16, "V16cV16cV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_swizzle_v8x16, "V16ScV16ScV16Sc", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extract_lane_s_i8x16, "iV16cIi", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i8x16, "iV16cIi", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extract_lane_s_i8x16, "iV16ScIi", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i8x16, "iV16UcIUi", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_extract_lane_s_i16x8, "iV8sIi", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i16x8, "iV8sIi", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extract_lane_u_i16x8, "iV8UsIUi", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_extract_lane_i32x4, "iV4iIi", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_extract_lane_i64x2, "LLiV2LLiIi", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_extract_lane_f32x4, "fV4fIi", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_extract_lane_f64x2, "dV2dIi", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_replace_lane_i8x16, "V16cV16cIii", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_replace_lane_i8x16, "V16ScV16ScIii", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_replace_lane_i16x8, "V8sV8sIii", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_replace_lane_i32x4, "V4iV4iIii", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_replace_lane_i64x2, "V2LLiV2LLiIiLLi", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_replace_lane_f32x4, "V4fV4fIif", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_replace_lane_f64x2, "V2dV2dIid", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_add_saturate_s_i8x16, "V16cV16cV16c", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_add_saturate_u_i8x16, "V16cV16cV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_add_saturate_s_i8x16, "V16ScV16ScV16Sc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_add_saturate_u_i8x16, "V16UcV16UcV16Uc", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_add_saturate_s_i16x8, "V8sV8sV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_add_saturate_u_i16x8, "V8sV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_add_saturate_u_i16x8, "V8UsV8UsV8Us", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_sub_saturate_s_i8x16, "V16cV16cV16c", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_sub_saturate_u_i8x16, "V16cV16cV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_sub_saturate_s_i8x16, "V16ScV16ScV16Sc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_sub_saturate_u_i8x16, "V16UcV16UcV16Uc", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_sub_saturate_s_i16x8, "V8sV8sV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_sub_saturate_u_i16x8, "V8sV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_sub_saturate_u_i16x8, "V8UsV8UsV8Us", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_abs_i8x16, "V16cV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_abs_i8x16, "V16ScV16Sc", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_abs_i16x8, "V8sV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_abs_i32x4, "V4iV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_min_s_i8x16, "V16cV16cV16c", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_min_u_i8x16, "V16cV16cV16c", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_max_s_i8x16, "V16cV16cV16c", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_max_u_i8x16, "V16cV16cV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_min_s_i8x16, "V16ScV16ScV16Sc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_min_u_i8x16, "V16UcV16UcV16Uc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_max_s_i8x16, "V16ScV16ScV16Sc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_max_u_i8x16, "V16UcV16UcV16Uc", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_min_s_i16x8, "V8sV8sV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_min_u_i16x8, "V8sV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_min_u_i16x8, "V8UsV8UsV8Us", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_max_s_i16x8, "V8sV8sV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_max_u_i16x8, "V8sV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_max_u_i16x8, "V8UsV8UsV8Us", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_min_s_i32x4, "V4iV4iV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_min_u_i32x4, "V4iV4iV4i", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_min_u_i32x4, "V4UiV4UiV4Ui", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_max_s_i32x4, "V4iV4iV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_max_u_i32x4, "V4iV4iV4i", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_max_u_i32x4, "V4UiV4UiV4Ui", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_avgr_u_i8x16, "V16cV16cV16c", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_avgr_u_i16x8, "V8sV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_avgr_u_i8x16, "V16UcV16UcV16Uc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_avgr_u_i16x8, "V8UsV8UsV8Us", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16cV16cV16cIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16ScV16ScV16ScIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_any_true_i8x16, "iV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_any_true_i8x16, "iV16Sc", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_any_true_i16x8, "iV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_any_true_i32x4, "iV4i", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_any_true_i64x2, "iV2LLi", "nc", "unimplemented-simd128") -TARGET_BUILTIN(__builtin_wasm_all_true_i8x16, "iV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_all_true_i8x16, "iV16Sc", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_all_true_i16x8, "iV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_all_true_i32x4, "iV4i", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_all_true_i64x2, "iV2LLi", "nc", "unimplemented-simd128") -TARGET_BUILTIN(__builtin_wasm_bitmask_i8x16, "iV16c", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_bitmask_i8x16, "iV16Sc", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "iV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "iV4i", "nc", "simd128") @@ -164,10 +164,10 @@ TARGET_BUILTIN(__builtin_wasm_qfms_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-s TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_narrow_s_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_narrow_s_i8x16_i16x8, "V16ScV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16UcV8UsV8Us", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4UiV4Ui", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4ii*", "nU", "simd128") TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLi*", "nU", "simd128") diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h index 967008b555f42..b15264607d938 100644 --- a/clang/lib/Headers/wasm_simd128.h +++ b/clang/lib/Headers/wasm_simd128.h @@ -18,8 +18,7 @@ typedef int32_t v128_t __attribute__((__vector_size__(16), __aligned__(16))); // Internal types determined by clang builtin definitions typedef int32_t __v128_u __attribute__((__vector_size__(16), __aligned__(1))); -typedef char __i8x16 __attribute__((__vector_size__(16), __aligned__(16))); -typedef signed char __s8x16 +typedef signed char __i8x16 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned char __u8x16 __attribute__((__vector_size__(16), __aligned__(16))); @@ -340,17 +339,17 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_splat(double __a) { static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_eq(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a == (__s8x16)__b); + return (v128_t)((__i8x16)__a == (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ne(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a != (__s8x16)__b); + return (v128_t)((__i8x16)__a != (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_lt(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a < (__s8x16)__b); + return (v128_t)((__i8x16)__a < (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_lt(v128_t __a, @@ -360,7 +359,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_lt(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_gt(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a > (__s8x16)__b); + return (v128_t)((__i8x16)__a > (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_gt(v128_t __a, @@ -370,7 +369,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_gt(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_le(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a <= (__s8x16)__b); + return (v128_t)((__i8x16)__a <= (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_le(v128_t __a, @@ -380,7 +379,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_le(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ge(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a >= (__s8x16)__b); + return (v128_t)((__i8x16)__a >= (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_ge(v128_t __a, @@ -602,7 +601,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t __a, int32_t __b) { - return (v128_t)((__s8x16)__a >> __b); + return (v128_t)((__i8x16)__a >> __b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t __a, diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 14e0d0ac65ed2..01e9273e0fb63 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -3,7 +3,7 @@ // RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD // SIMD convenience types -typedef char i8x16 __attribute((vector_size(16))); +typedef signed char i8x16 __attribute((vector_size(16))); typedef short i16x8 __attribute((vector_size(16))); typedef int i32x4 __attribute((vector_size(16))); typedef long long i64x2 __attribute((vector_size(16))); @@ -201,7 +201,7 @@ int extract_lane_s_i8x16(i8x16 v) { // WEBASSEMBLY-NEXT: ret } -int extract_lane_u_i8x16(i8x16 v) { +int extract_lane_u_i8x16(u8x16 v) { return __builtin_wasm_extract_lane_u_i8x16(v, 13); // WEBASSEMBLY: extractelement <16 x i8> %v, i32 13 // WEBASSEMBLY-NEXT: zext @@ -215,7 +215,7 @@ int extract_lane_s_i16x8(i16x8 v) { // WEBASSEMBLY-NEXT: ret } -int extract_lane_u_i16x8(i16x8 v) { +int extract_lane_u_i16x8(u16x8 v) { return __builtin_wasm_extract_lane_u_i16x8(v, 7); // WEBASSEMBLY: extractelement <8 x i16> %v, i32 7 // WEBASSEMBLY-NEXT: zext @@ -291,7 +291,7 @@ i8x16 add_saturate_s_i8x16(i8x16 x, i8x16 y) { // WEBASSEMBLY-NEXT: ret } -i8x16 add_saturate_u_i8x16(i8x16 x, i8x16 y) { +u8x16 add_saturate_u_i8x16(u8x16 x, u8x16 y) { return __builtin_wasm_add_saturate_u_i8x16(x, y); // WEBASSEMBLY: call <16 x i8> @llvm.uadd.sat.v16i8( // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) @@ -305,7 +305,7 @@ i16x8 add_saturate_s_i16x8(i16x8 x, i16x8 y) { // WEBASSEMBLY-NEXT: ret } -i16x8 add_saturate_u_i16x8(i16x8 x, i16x8 y) { +u16x8 add_saturate_u_i16x8(u16x8 x, u16x8 y) { return __builtin_wasm_add_saturate_u_i16x8(x, y); // WEBASSEMBLY: call <8 x i16> @llvm.uadd.sat.v8i16( // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) @@ -319,7 +319,7 @@ i8x16 sub_saturate_s_i8x16(i8x16 x, i8x16 y) { // WEBASSEMBLY-NEXT: ret } -i8x16 sub_saturate_u_i8x16(i8x16 x, i8x16 y) { +u8x16 sub_saturate_u_i8x16(u8x16 x, u8x16 y) { return __builtin_wasm_sub_saturate_u_i8x16(x, y); // WEBASSEMBLY: call <16 x i8> @llvm.wasm.sub.saturate.unsigned.v16i8( // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) @@ -357,7 +357,7 @@ i8x16 min_s_i8x16(i8x16 x, i8x16 y) { // WEBASSEMBLY-NEXT: ret <16 x i8> %1 } -i8x16 min_u_i8x16(i8x16 x, i8x16 y) { +u8x16 min_u_i8x16(u8x16 x, u8x16 y) { return __builtin_wasm_min_u_i8x16(x, y); // WEBASSEMBLY: %0 = icmp ult <16 x i8> %x, %y // WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y @@ -371,7 +371,7 @@ i8x16 max_s_i8x16(i8x16 x, i8x16 y) { // WEBASSEMBLY-NEXT: ret <16 x i8> %1 } -i8x16 max_u_i8x16(i8x16 x, i8x16 y) { +u8x16 max_u_i8x16(u8x16 x, u8x16 y) { return __builtin_wasm_max_u_i8x16(x, y); // WEBASSEMBLY: %0 = icmp ugt <16 x i8> %x, %y // WEBASSEMBLY-NEXT: %1 = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y @@ -385,7 +385,7 @@ i16x8 min_s_i16x8(i16x8 x, i16x8 y) { // WEBASSEMBLY-NEXT: ret <8 x i16> %1 } -i16x8 min_u_i16x8(i16x8 x, i16x8 y) { +u16x8 min_u_i16x8(u16x8 x, u16x8 y) { return __builtin_wasm_min_u_i16x8(x, y); // WEBASSEMBLY: %0 = icmp ult <8 x i16> %x, %y // WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y @@ -399,7 +399,7 @@ i16x8 max_s_i16x8(i16x8 x, i16x8 y) { // WEBASSEMBLY-NEXT: ret <8 x i16> %1 } -i16x8 max_u_i16x8(i16x8 x, i16x8 y) { +u16x8 max_u_i16x8(u16x8 x, u16x8 y) { return __builtin_wasm_max_u_i16x8(x, y); // WEBASSEMBLY: %0 = icmp ugt <8 x i16> %x, %y // WEBASSEMBLY-NEXT: %1 = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y @@ -413,7 +413,7 @@ i32x4 min_s_i32x4(i32x4 x, i32x4 y) { // WEBASSEMBLY-NEXT: ret <4 x i32> %1 } -i32x4 min_u_i32x4(i32x4 x, i32x4 y) { +u32x4 min_u_i32x4(u32x4 x, u32x4 y) { return __builtin_wasm_min_u_i32x4(x, y); // WEBASSEMBLY: %0 = icmp ult <4 x i32> %x, %y // WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y @@ -427,7 +427,7 @@ i32x4 max_s_i32x4(i32x4 x, i32x4 y) { // WEBASSEMBLY-NEXT: ret <4 x i32> %1 } -i32x4 max_u_i32x4(i32x4 x, i32x4 y) { +u32x4 max_u_i32x4(u32x4 x, u32x4 y) { return __builtin_wasm_max_u_i32x4(x, y); // WEBASSEMBLY: %0 = icmp ugt <4 x i32> %x, %y // WEBASSEMBLY-NEXT: %1 = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y @@ -441,21 +441,21 @@ i16x8 sub_saturate_s_i16x8(i16x8 x, i16x8 y) { // WEBASSEMBLY-NEXT: ret } -i16x8 sub_saturate_u_i16x8(i16x8 x, i16x8 y) { +u16x8 sub_saturate_u_i16x8(u16x8 x, u16x8 y) { return __builtin_wasm_sub_saturate_u_i16x8(x, y); // WEBASSEMBLY: call <8 x i16> @llvm.wasm.sub.saturate.unsigned.v8i16( // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) // WEBASSEMBLY-NEXT: ret } -i8x16 avgr_u_i8x16(i8x16 x, i8x16 y) { +u8x16 avgr_u_i8x16(u8x16 x, u8x16 y) { return __builtin_wasm_avgr_u_i8x16(x, y); // WEBASSEMBLY: call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8( // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) // WEBASSEMBLY-NEXT: ret } -i16x8 avgr_u_i16x8(i16x8 x, i16x8 y) { +u16x8 avgr_u_i16x8(u16x8 x, u16x8 y) { return __builtin_wasm_avgr_u_i16x8(x, y); // WEBASSEMBLY: call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16( // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) @@ -716,7 +716,7 @@ i8x16 narrow_s_i8x16_i16x8(i16x8 low, i16x8 high) { // WEBASSEMBLY: ret } -i8x16 narrow_u_i8x16_i16x8(i16x8 low, i16x8 high) { +u8x16 narrow_u_i8x16_i16x8(u16x8 low, u16x8 high) { return __builtin_wasm_narrow_u_i8x16_i16x8(low, high); // WEBASSEMBLY: call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16( // WEBASSEMBLY-SAME: <8 x i16> %low, <8 x i16> %high) @@ -730,7 +730,7 @@ i16x8 narrow_s_i16x8_i32x4(i32x4 low, i32x4 high) { // WEBASSEMBLY: ret } -i16x8 narrow_u_i16x8_i32x4(i32x4 low, i32x4 high) { +u16x8 narrow_u_i16x8_i32x4(u32x4 low, u32x4 high) { return __builtin_wasm_narrow_u_i16x8_i32x4(low, high); // WEBASSEMBLY: call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32( // WEBASSEMBLY-SAME: <4 x i32> %low, <4 x i32> %high) From a4e537d9c47aa378a24636e2d90d208389ad93ab Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 30 Jul 2020 20:07:11 +0200 Subject: [PATCH 364/600] [libFuzzer] Fix endianness issue in ForEachNonZeroByte() The usage pattern of Bundle variable assumes the machine is little endian, which is not the case on SystemZ. Fix by converting Bundle to little-endian when necessary. --- compiler-rt/lib/fuzzer/FuzzerTracePC.h | 4 +++- compiler-rt/lib/fuzzer/FuzzerUtil.h | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.h b/compiler-rt/lib/fuzzer/FuzzerTracePC.h index 501f3b544971f..4601300cb9dcf 100644 --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.h +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.h @@ -194,10 +194,12 @@ size_t ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End, // Iterate by Step bytes at a time. for (; P < End; P += Step) - if (LargeType Bundle = *reinterpret_cast(P)) + if (LargeType Bundle = *reinterpret_cast(P)) { + Bundle = HostToLE(Bundle); for (size_t I = 0; I < Step; I++, Bundle >>= 8) if (uint8_t V = Bundle & 0xff) Handle8bitCounter(FirstFeature, P - Begin + I, V); + } // Iterate by 1 byte until the end. for (; P < End; P++) diff --git a/compiler-rt/lib/fuzzer/FuzzerUtil.h b/compiler-rt/lib/fuzzer/FuzzerUtil.h index 4ae35838306d2..e90be085008ee 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtil.h +++ b/compiler-rt/lib/fuzzer/FuzzerUtil.h @@ -106,6 +106,12 @@ inline uint8_t *RoundDownByPage(uint8_t *P) { return reinterpret_cast(X); } +#if __BYTE_ORDER == __LITTLE_ENDIAN +template T HostToLE(T X) { return X; } +#else +template T HostToLE(T X) { return Bswap(X); } +#endif + } // namespace fuzzer #endif // LLVM_FUZZER_UTIL_H From 153df1373e5d3e0f16464233a3c17feb8a9eb3e9 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 30 Jul 2020 20:08:08 +0200 Subject: [PATCH 365/600] [SanitizerCoverage] Fix types of __stop* and __start* symbols If a section is supposed to hold elements of type T, then the corresponding CreateSecStartEnd()'s Ty parameter represents T*. Forwarding it to GlobalVariable constructor causes the resulting GlobalVariable's type to be T*, and its SSA value type to be T**, which is one indirection too many. This issue is mostly masked by pointer casts, however, the global variable still gets an incorrect alignment, which causes SystemZ to choose wrong instructions to access the section. --- .../Instrumentation/SanitizerCoverage.cpp | 17 ++++++++--------- .../SanitizerCoverage/inline-8bit-counters.ll | 2 +- .../SanitizerCoverage/inline-bool-flag.ll | 2 +- .../SanitizerCoverage/tracing.ll | 4 ++-- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index b6a9df57e4315..0b27982acc02d 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -338,25 +338,24 @@ PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M, std::pair ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, Type *Ty) { - GlobalVariable *SecStart = - new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, - getSectionStart(Section)); + GlobalVariable *SecStart = new GlobalVariable( + M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage, + nullptr, getSectionStart(Section)); SecStart->setVisibility(GlobalValue::HiddenVisibility); - GlobalVariable *SecEnd = - new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, - nullptr, getSectionEnd(Section)); + GlobalVariable *SecEnd = new GlobalVariable( + M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage, + nullptr, getSectionEnd(Section)); SecEnd->setVisibility(GlobalValue::HiddenVisibility); IRBuilder<> IRB(M.getContext()); - Value *SecEndPtr = IRB.CreatePointerCast(SecEnd, Ty); if (!TargetTriple.isOSBinFormatCOFF()) - return std::make_pair(IRB.CreatePointerCast(SecStart, Ty), SecEndPtr); + return std::make_pair(SecStart, SecEnd); // Account for the fact that on windows-msvc __start_* symbols actually // point to a uint64_t before the start of the array. auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy); auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr, ConstantInt::get(IntptrTy, sizeof(uint64_t))); - return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr); + return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEnd); } Function *ModuleSanitizerCoverage::CreateInitCallsForSections( diff --git a/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll b/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll index 775ce4fd772d3..4f905428769a4 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll @@ -12,4 +12,4 @@ entry: ; CHECK: store i8 %1, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__sancov_gen_, i64 0, i64 0), align 1, !nosanitize ret void } -; CHECK: call void @__sanitizer_cov_8bit_counters_init(i8* bitcast (i8** @__start___sancov_cntrs to i8*), i8* bitcast (i8** @__stop___sancov_cntrs to i8*)) +; CHECK: call void @__sanitizer_cov_8bit_counters_init(i8* @__start___sancov_cntrs, i8* @__stop___sancov_cntrs) diff --git a/llvm/test/Instrumentation/SanitizerCoverage/inline-bool-flag.ll b/llvm/test/Instrumentation/SanitizerCoverage/inline-bool-flag.ll index 8b05aac813c6a..c6845363a4062 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/inline-bool-flag.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/inline-bool-flag.ll @@ -20,4 +20,4 @@ define void @foo() { entry: ret void } -; CHECK: call void @__sanitizer_cov_bool_flag_init(i1* bitcast (i1** @__start___sancov_bools to i1*), i1* bitcast (i1** @__stop___sancov_bools to i1*)) +; CHECK: call void @__sanitizer_cov_bool_flag_init(i1* @__start___sancov_bools, i1* @__stop___sancov_bools) diff --git a/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll index 75a30d6b2b2b2..163271cdefa79 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll @@ -74,7 +74,7 @@ lj: ; CHECK_PC_GUARD: call void @longjmp ; CHECK_PC_GUARD: unreachable -; CHECK_PC_GUARD: call void @__sanitizer_cov_trace_pc_guard_init(i32* bitcast (i32** @__start___sancov_guards to i32*), i32* bitcast (i32** @__stop___sancov_guards to i32*)) +; CHECK_PC_GUARD: call void @__sanitizer_cov_trace_pc_guard_init(i32* @__start___sancov_guards, i32* @__stop___sancov_guards) ; CHECK_PC_GUARD_DARWIN-LABEL: define void @foo ; CHECK_PC_GUARD_DARWIN: call void @__sanitizer_cov_trace_pc_guard @@ -82,4 +82,4 @@ lj: ; CHECK_PC_GUARD_DARWIN: call void @__sanitizer_cov_trace_pc_guard ; CHECK_PC_GUARD_DARWIN-NOT: call void @__sanitizer_cov_trace_pc ; CHECK_PC_GUARD_DARWIN: ret void -; CHECK_PC_GUARD_DARWIN: call void @__sanitizer_cov_trace_pc_guard_init(i32* bitcast (i32** @"\01section$start$__DATA$__sancov_guards" to i32*), i32* bitcast (i32** @"\01section$end$__DATA$__sancov_guards" to i32*)) +; CHECK_PC_GUARD_DARWIN: call void @__sanitizer_cov_trace_pc_guard_init(i32* @"\01section$start$__DATA$__sancov_guards", i32* @"\01section$end$__DATA$__sancov_guards") From ea9b82da4180550c3ddc5e883c589474af9462b7 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 30 Jul 2020 20:08:40 +0200 Subject: [PATCH 366/600] [libFuzzer] Enable for SystemZ * Add SystemZ to the list of supported architectures. * XFAIL a few tests. Coverage reporting is broken, and is not easy to fix (see comment in coverage.test). Interaction with sanitizers needs to be investigated more thoroughly, since they appear to reduce coverage in certain cases. --- compiler-rt/cmake/config-ix.cmake | 2 +- compiler-rt/test/fuzzer/coverage.test | 2 ++ compiler-rt/test/fuzzer/msan.test | 2 ++ compiler-rt/test/fuzzer/swap-cmp.test | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 74fef8933ef90..5f9e868de5fd8 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -305,7 +305,7 @@ else() endif() if(OS_NAME MATCHES "Linux") - set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64}) + set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${S390X}) elseif (OS_NAME MATCHES "Windows") set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64}) elseif(OS_NAME MATCHES "Android") diff --git a/compiler-rt/test/fuzzer/coverage.test b/compiler-rt/test/fuzzer/coverage.test index db15c7a66c6dd..07a10ba169f01 100644 --- a/compiler-rt/test/fuzzer/coverage.test +++ b/compiler-rt/test/fuzzer/coverage.test @@ -1,5 +1,7 @@ # FIXME: Disabled on Windows because -fPIC cannot be used to compile for Windows. UNSUPPORTED: windows +# FIXME: CreatePCArray() emits PLT stub addresses for entry blocks, which are ignored by TracePC::PrintCoverage(). +XFAIL: s390x RUN: %cpp_compiler -mllvm -use-unknown-locations=Disable %S/NullDerefTest.cpp -o %t-NullDerefTest RUN: %cpp_compiler -mllvm -use-unknown-locations=Disable %S/DSO1.cpp -fPIC %ld_flags_rpath_so1 -O0 -shared -o %dynamiclib1 RUN: %cpp_compiler -mllvm -use-unknown-locations=Disable %S/DSO2.cpp -fPIC %ld_flags_rpath_so2 -O0 -shared -o %dynamiclib2 diff --git a/compiler-rt/test/fuzzer/msan.test b/compiler-rt/test/fuzzer/msan.test index 2e0339bb8ff7b..ae1c449878657 100644 --- a/compiler-rt/test/fuzzer/msan.test +++ b/compiler-rt/test/fuzzer/msan.test @@ -1,3 +1,5 @@ +FIXME: Fails to find BINGO on s390x. +XFAIL: s390x REQUIRES: msan RUN: %msan_compiler %S/SimpleTestStdio.cpp -o %t RUN: not %run %t -seed=1 -runs=10000000 2>&1 | FileCheck %s --check-prefix=NO-REPORT diff --git a/compiler-rt/test/fuzzer/swap-cmp.test b/compiler-rt/test/fuzzer/swap-cmp.test index 7f7e2f60fa633..5c4112c356750 100644 --- a/compiler-rt/test/fuzzer/swap-cmp.test +++ b/compiler-rt/test/fuzzer/swap-cmp.test @@ -1,3 +1,5 @@ +# FIXME: Fails to find BINGO with ASAN on s390x, work fine without ASAN. +XFAIL: s390x RUN: %cpp_compiler %S/SwapCmpTest.cpp -o %t-SwapCmpTest CHECK: BINGO RUN: not %run %t-SwapCmpTest -seed=1 -runs=10000000 2>&1 | FileCheck %s From bb33f925a673f3bb9793d2157c9d3d46d9ad7f25 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 4 Aug 2020 13:07:46 -0700 Subject: [PATCH 367/600] [lldb/Test] Add missing stdio.h includes Fixes error: implicit declaration of function 'printf' is invalid in C99 [-Werror,-Wimplicit-function-declaration] --- lldb/test/API/commands/target/basic/a.c | 2 ++ lldb/test/API/lang/c/global_variables/main.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/lldb/test/API/commands/target/basic/a.c b/lldb/test/API/commands/target/basic/a.c index b8157275f1a98..9ce587e414537 100644 --- a/lldb/test/API/commands/target/basic/a.c +++ b/lldb/test/API/commands/target/basic/a.c @@ -1,3 +1,5 @@ +#include + int main(int argc, const char* argv[]) { int *null_ptr = 0; diff --git a/lldb/test/API/lang/c/global_variables/main.c b/lldb/test/API/lang/c/global_variables/main.c index 864f094edd482..eb7720a6b0484 100644 --- a/lldb/test/API/lang/c/global_variables/main.c +++ b/lldb/test/API/lang/c/global_variables/main.c @@ -1,3 +1,5 @@ +#include + int g_common_1; // Not initialized on purpose to cause it to be undefined external in .o file int g_file_global_int = 42; static const int g_file_static_int = 2; From bf2aa74e51997ee190f3b34dd26a1b564e59e267 Mon Sep 17 00:00:00 2001 From: Adrian Pop Date: Tue, 4 Aug 2020 23:15:17 +0300 Subject: [PATCH 368/600] [OpenMP] support build on msys2/mingw with clang or gcc RTM Adaptive Locks are supported on msys2/mingw for clang and gcc. Differential Revision: https://reviews.llvm.org/D81776 --- openmp/runtime/src/kmp_lock.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index 8bf7ef2deb716..775693253db24 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -1706,7 +1706,8 @@ static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, #if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \ (KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \ - (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT) + (KMP_COMPILER_CLANG && (KMP_MSVC_COMPAT || __MINGW32__)) || \ + (KMP_COMPILER_GCC && __MINGW32__) #include #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) From f0f68c6e6c5e0064c0196e4f1528e910a47766e0 Mon Sep 17 00:00:00 2001 From: AK <1894981+hiraditya@users.noreply.github.com> Date: Tue, 4 Aug 2020 11:16:56 -0700 Subject: [PATCH 369/600] [HotColdSplit] Add test case for unlikely attribute in outlined function Differential Revision: https://reviews.llvm.org/D85232 --- .../Transforms/HotColdSplit/coldentrycount.ll | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll index d63acc188f544..89642763eede3 100644 --- a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll +++ b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll @@ -1,13 +1,15 @@ +; REQUIRES: x86-registered-target +; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -codegenprepare -S < %s | FileCheck %s + ; Test to ensure that split cold function gets 0 entry count profile ; metadata when compiling with pgo. -; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s - target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" -; CHECK-LABEL: @fun +; CHECK: define {{.*}} @fun{{.*}} ![[HOTPROF:[0-9]+]] {{.*}}section_prefix ![[LIKELY:[0-9]+]] ; CHECK: call void @fun.cold.1 + define void @fun() !prof !14 { entry: br i1 undef, label %if.then, label %if.else @@ -22,8 +24,12 @@ if.else: declare void @sink() cold -; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] +; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] {{.*}}section_prefix ![[UNLIKELY:[0-9]+]] + +; CHECK: ![[HOTPROF]] = !{!"function_entry_count", i64 100} +; CHECK: ![[LIKELY]] = !{!"function_section_prefix", !".hot"} ; CHECK: ![[PROF]] = !{!"function_entry_count", i64 0} +; CHECK: ![[UNLIKELY]] = !{!"function_section_prefix", !".unlikely"} !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} @@ -41,3 +47,6 @@ declare void @sink() cold !12 = !{i32 999000, i64 100, i32 1} !13 = !{i32 999999, i64 1, i32 2} !14 = !{!"function_entry_count", i64 100} +!15 = !{!"function_section_prefix", !".hot"} +!16 = !{!"function_entry_count", i64 0} +!17 = !{!"function_section_prefix", !".unlikely"} From 95efea4b9310bb204a42fbf29abd4efa65647000 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 3 Aug 2020 15:59:14 -0700 Subject: [PATCH 370/600] [AArch64][SVE] Widen narrow sdiv/udiv operations. The SVE instruction set only supports sdiv/udiv for 32-bit and 64-bit integers. If we see an 8-bit or 16-bit divide, widen the operands to 32 bits, and narrow the result. Differential Revision: https://reviews.llvm.org/D85170 --- .../Target/AArch64/AArch64ISelLowering.cpp | 32 ++- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 + .../CodeGen/AArch64/llvm-ir-to-intrinsic.ll | 190 ++++++++++++++++++ 3 files changed, 221 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 402d7656ca215..03b33086e0c85 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3536,9 +3536,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG); case ISD::SDIV: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED); case ISD::UDIV: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED); + return LowerDIV(Op, DAG); case ISD::SMIN: return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED); case ISD::UMIN: @@ -8791,6 +8790,35 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, return SDValue(); } +SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + bool Signed = Op.getOpcode() == ISD::SDIV; + unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED; + if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64) + return LowerToPredicatedOp(Op, DAG, PredOpcode); + + // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit + // operations, and truncate the result. + EVT WidenedVT; + if (VT == MVT::nxv16i8) + WidenedVT = MVT::nxv8i16; + else if (VT == MVT::nxv8i16) + WidenedVT = MVT::nxv4i32; + else + llvm_unreachable("Unexpected Custom DIV operation"); + + SDLoc dl(Op); + unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO; + unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI; + SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0)); + SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1)); + SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0)); + SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1)); + SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo); + SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi); + return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi); +} + bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { // Currently no fixed length shuffles that require SVE are legal. if (useSVEForFixedLengthVectorVT(VT)) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index c346debb823aa..72c9e69ce7b84 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -863,6 +863,7 @@ class AArch64TargetLowering : public TargetLowering { unsigned NewOp) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll index 9f3a77c8fe92b..a2ab019247e2e 100644 --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -5,6 +5,50 @@ ; SDIV ; +define @sdiv_i8( %a, %b) { +; CHECK-LABEL: sdiv_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpkhi z2.h, z1.b +; CHECK-NEXT: sunpkhi z3.h, z0.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z1.h, z1.b +; CHECK-NEXT: sunpklo z0.h, z0.b +; CHECK-NEXT: sunpkhi z4.s, z2.h +; CHECK-NEXT: sunpkhi z5.s, z3.h +; CHECK-NEXT: sunpklo z2.s, z2.h +; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: sunpkhi z5.s, z1.h +; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: sunpkhi z3.s, z0.h +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z5.s +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: uzp1 z1.h, z2.h, z4.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %div = sdiv %a, %b + ret %div +} + +define @sdiv_i16( %a, %b) { +; CHECK-LABEL: sdiv_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpkhi z2.s, z1.h +; CHECK-NEXT: sunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %div = sdiv %a, %b + ret %div +} + define @sdiv_i32( %a, %b) { ; CHECK-LABEL: sdiv_i32: ; CHECK: // %bb.0: @@ -63,6 +107,57 @@ define @sdiv_split_i64( %a, @srem_i8( %a, %b) { +; CHECK-LABEL: srem_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpkhi z2.h, z1.b +; CHECK-NEXT: sunpkhi z3.h, z0.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z4.h, z1.b +; CHECK-NEXT: sunpklo z5.h, z0.b +; CHECK-NEXT: sunpkhi z6.s, z2.h +; CHECK-NEXT: sunpkhi z7.s, z3.h +; CHECK-NEXT: sunpklo z2.s, z2.h +; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s +; CHECK-NEXT: sunpkhi z7.s, z4.h +; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: sunpkhi z3.s, z5.h +; CHECK-NEXT: sunpklo z4.s, z4.h +; CHECK-NEXT: sunpklo z5.s, z5.h +; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z7.s +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: uzp1 z2.h, z2.h, z6.h +; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h +; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mul z2.b, p0/m, z2.b, z1.b +; CHECK-NEXT: sub z0.b, z0.b, z2.b +; CHECK-NEXT: ret + %div = srem %a, %b + ret %div +} + +define @srem_i16( %a, %b) { +; CHECK-LABEL: srem_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpkhi z2.s, z1.h +; CHECK-NEXT: sunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: sunpklo z4.s, z1.h +; CHECK-NEXT: sunpklo z5.s, z0.h +; CHECK-NEXT: movprfx z3, z5 +; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h +; CHECK-NEXT: sub z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %div = srem %a, %b + ret %div +} + define @srem_i32( %a, %b) { ; CHECK-LABEL: srem_i32: ; CHECK: // %bb.0: @@ -93,6 +188,50 @@ define @srem_i64( %a, %b ; UDIV ; +define @udiv_i8( %a, %b) { +; CHECK-LABEL: udiv_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.h, z1.b +; CHECK-NEXT: uunpkhi z3.h, z0.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpklo z1.h, z1.b +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uunpkhi z4.s, z2.h +; CHECK-NEXT: uunpkhi z5.s, z3.h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: uunpkhi z5.s, z1.h +; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z5.s +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: uzp1 z1.h, z2.h, z4.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %div = udiv %a, %b + ret %div +} + +define @udiv_i16( %a, %b) { +; CHECK-LABEL: udiv_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %div = udiv %a, %b + ret %div +} + define @udiv_i32( %a, %b) { ; CHECK-LABEL: udiv_i32: ; CHECK: // %bb.0: @@ -152,6 +291,57 @@ define @udiv_split_i64( %a, @urem_i8( %a, %b) { +; CHECK-LABEL: urem_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.h, z1.b +; CHECK-NEXT: uunpkhi z3.h, z0.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpklo z4.h, z1.b +; CHECK-NEXT: uunpklo z5.h, z0.b +; CHECK-NEXT: uunpkhi z6.s, z2.h +; CHECK-NEXT: uunpkhi z7.s, z3.h +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s +; CHECK-NEXT: uunpkhi z7.s, z4.h +; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: uunpkhi z3.s, z5.h +; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: uunpklo z5.s, z5.h +; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z7.s +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: uzp1 z2.h, z2.h, z6.h +; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h +; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mul z2.b, p0/m, z2.b, z1.b +; CHECK-NEXT: sub z0.b, z0.b, z2.b +; CHECK-NEXT: ret + %div = urem %a, %b + ret %div +} + +define @urem_i16( %a, %b) { +; CHECK-LABEL: urem_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: uunpklo z4.s, z1.h +; CHECK-NEXT: uunpklo z5.s, z0.h +; CHECK-NEXT: movprfx z3, z5 +; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h +; CHECK-NEXT: sub z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %div = urem %a, %b + ret %div +} + define @urem_i32( %a, %b) { ; CHECK-LABEL: urem_i32: ; CHECK: // %bb.0: From e7af98680ad52ffc6d308ef63667ac6fb4bb16a7 Mon Sep 17 00:00:00 2001 From: Yifan Shen Date: Tue, 4 Aug 2020 13:31:44 -0700 Subject: [PATCH 371/600] [lldb-vscode ]Add Syntax Highlighting to Disassembly View When lldb cannot find source file thus IDE renders a disassembly view, add syntax highlighting for constants, registers and final line comments for better debugging experience. The original plain disassembly view looks like: {F12401687} An ideal view is like the screenshot attached. {F12401515} In this diff, the mimeType is a kind of media type for formatting the content in the response to a source request. Elements in the disassembly view, like constants, registers and final line comments are colored for highlighting. A built-in support in the VSCode IDE for syntax highlighting will identify the which mimeType to apply and render the disassembly view as expected. Reviewed By: wallace, clayborg Differential Revision: https://reviews.llvm.org/D84555 --- lldb/tools/lldb-vscode/lldb-vscode.cpp | 1 + lldb/tools/lldb-vscode/package.json | 18 ++++ lldb/tools/lldb-vscode/syntaxes/arm.disasm | 45 +++++++++ lldb/tools/lldb-vscode/syntaxes/arm64.disasm | 91 +++++++++++++++++++ .../lldb-vscode/syntaxes/disassembly.json | 64 +++++++++++++ lldb/tools/lldb-vscode/syntaxes/x86.disasm | 28 ++++++ 6 files changed, 247 insertions(+) create mode 100644 lldb/tools/lldb-vscode/syntaxes/arm.disasm create mode 100644 lldb/tools/lldb-vscode/syntaxes/arm64.disasm create mode 100644 lldb/tools/lldb-vscode/syntaxes/disassembly.json create mode 100644 lldb/tools/lldb-vscode/syntaxes/x86.disasm diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 27ee832677d72..355a5ebf356bb 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -2188,6 +2188,7 @@ void request_source(const llvm::json::Object &request) { } else { response["success"] = llvm::json::Value(false); } + EmplaceSafeString(body, "mimeType", "text/x-lldb.disassembly"); response.try_emplace("body", std::move(body)); g_vsc.SendJSON(llvm::json::Value(std::move(response))); } diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-vscode/package.json index f4408d3607d9e..29ca06dd17d63 100644 --- a/lldb/tools/lldb-vscode/package.json +++ b/lldb/tools/lldb-vscode/package.json @@ -32,6 +32,24 @@ "vsce": "^1.36.3" }, "contributes": { + "languages": [ + { + "id": "lldb.disassembly", + "aliases": [ + "Disassembly" + ], + "extensions": [ + ".disasm" + ] + } + ], + "grammars": [ + { + "language": "lldb.disassembly", + "scopeName": "source.disassembly", + "path": "./syntaxes/disassembly.json" + } + ], "debuggers": [ { "type": "lldb-vscode", diff --git a/lldb/tools/lldb-vscode/syntaxes/arm.disasm b/lldb/tools/lldb-vscode/syntaxes/arm.disasm new file mode 100644 index 0000000000000..436a78bfc2127 --- /dev/null +++ b/lldb/tools/lldb-vscode/syntaxes/arm.disasm @@ -0,0 +1,45 @@ +(lldb) +libIGL.so`igl::RenderPipelineDesc::TargetDesc::ColorAttachment::operator==: +libIGL.so[0x7694] <+0>: ldr r2, [r1] +libIGL.so[0x7696] <+2>: ldr r3, [r0] +libIGL.so[0x7698] <+4>: cmp r3, r2 +libIGL.so[0x769a] <+6>: bne 0x76da ; <+70> at RenderPipelineState.cpp +libIGL.so[0x769c] <+8>: ldrb r2, [r1, #0x5] +libIGL.so[0x769e] <+10>: ldrb r3, [r0, #0x5] +libIGL.so[0x76a0] <+12>: cmp r3, r2 +libIGL.so[0x76a2] <+14>: bne 0x76da ; <+70> at RenderPipelineState.cpp +libIGL.so[0x76a4] <+16>: ldr r2, [r1, #0x8] +libIGL.so[0x76a6] <+18>: ldr r3, [r0, #0x8] +libIGL.so[0x76a8] <+20>: cmp r3, r2 +libIGL.so[0x76aa] <+22>: bne 0x76da ; <+70> at RenderPipelineState.cpp +libIGL.so[0x76ac] <+24>: ldr r2, [r1, #0xc] +libIGL.so[0x76ae] <+26>: ldr r3, [r0, #0xc] +libIGL.so[0x76b0] <+28>: cmp r3, r2 +libIGL.so[0x76b2] <+30>: bne 0x76da ; <+70> at RenderPipelineState.cpp +libIGL.so[0x76b4] <+32>: ldr r2, [r1, #0x10] +libIGL.so[0x76b6] <+34>: ldr r3, [r0, #0x10] +libIGL.so[0x76b8] <+36>: cmp r3, r2 +libIGL.so[0x76ba] <+38>: bne 0x76da ; <+70> at RenderPipelineState.cpp +libIGL.so[0x76bc] <+40>: ldr r2, [r1, #0x14] +libIGL.so[0x76be] <+42>: ldr r3, [r0, #0x14] +libIGL.so[0x76c0] <+44>: cmp r3, r2 +libIGL.so[0x76c2] <+46>: bne 0x76da ; <+70> at RenderPipelineState.cpp +libIGL.so[0x76c4] <+48>: ldr r2, [r1, #0x18] +libIGL.so[0x76c6] <+50>: ldr r3, [r0, #0x18] +libIGL.so[0x76c8] <+52>: cmp r3, r2 +libIGL.so[0x76ca] <+54>: bne 0x76da ; <+70> at RenderPipelineState.cpp +libIGL.so[0x76cc] <+56>: ldr r1, [r1, #0x1c] +libIGL.so[0x76ce] <+58>: ldr r0, [r0, #0x1c] +libIGL.so[0x76d0] <+60>: subs r0, r0, r1 +libIGL.so[0x76d2] <+62>: clz r0, r0 +libIGL.so[0x76d6] <+66>: lsrs r0, r0, #0x5 +libIGL.so[0x76d8] <+68>: bx lr +libIGL.so[0x76da] <+70>: movs r0, #0x0 +libIGL.so[0x76dc] <+72>: bx lr +(lldb) disassemble --name _ZN3igl20VertexInputStateDesc28sizeForVertexAttributeFormatENS_21VertexAttributeFormatE +libIGL.so`igl::VertexInputStateDesc::sizeForVertexAttributeFormat: +libIGL.so[0x787c] <+0>: ldr r1, [pc, #0x8] ; <+12> at VertexInputState.cpp +libIGL.so[0x787e] <+2>: add r1, pc +libIGL.so[0x7880] <+4>: ldr.w r0, [r1, r0, lsl #2] +libIGL.so[0x7884] <+8>: bx lr +libIGL.so[0x7886] <+10>: nop \ No newline at end of file diff --git a/lldb/tools/lldb-vscode/syntaxes/arm64.disasm b/lldb/tools/lldb-vscode/syntaxes/arm64.disasm new file mode 100644 index 0000000000000..dfe201d907dd8 --- /dev/null +++ b/lldb/tools/lldb-vscode/syntaxes/arm64.disasm @@ -0,0 +1,91 @@ +(lldb) disassemble --name __android_log_config_read +liblog.so`::__android_log_config_read(): +liblog.so[0x6014] <+0>: stp x22, x21, [sp, #-0x30]! +liblog.so[0x6018] <+4>: stp x20, x19, [sp, #0x10] +liblog.so[0x601c] <+8>: stp x29, x30, [sp, #0x20] +liblog.so[0x6020] <+12>: add x29, sp, #0x20 ; =0x20 +liblog.so[0x6024] <+16>: adrp x8, 15 +liblog.so[0x6028] <+20>: ldr x8, [x8, #0x230] +liblog.so[0x602c] <+24>: ldr w8, [x8] +liblog.so[0x6030] <+28>: cbz w8, 0x6038 ; <+36> at config_read.cpp +liblog.so[0x6034] <+32>: tbz w8, #0x0, 0x6168 ; <+340> at config_read.cpp:65:1 +liblog.so[0x6038] <+36>: adrp x20, 15 +liblog.so[0x603c] <+40>: adrp x21, 15 +liblog.so[0x6040] <+44>: ldr x20, [x20, #0x238] +liblog.so[0x6044] <+48>: ldr x21, [x21, #0x240] +liblog.so[0x6048] <+52>: mov w19, wzr +liblog.so[0x604c] <+56>: ldr x22, [x20] +liblog.so[0x6050] <+60>: cmp x22, x20 +liblog.so[0x6054] <+64>: b.eq 0x609c ; <+136> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 80 at config_read.cpp:61 +liblog.so[0x6058] <+68>: ldr x8, [x22] +liblog.so[0x605c] <+72>: cmp x22, x8 +liblog.so[0x6060] <+76>: b.eq 0x60b0 ; <+156> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 100 at config_read.cpp:61 +liblog.so[0x6064] <+80>: ldr x8, [x22, #0x18] +liblog.so[0x6068] <+84>: cbz x8, 0x60d0 ; <+188> at config_read.cpp +liblog.so[0x606c] <+88>: mov w0, w19 +liblog.so[0x6070] <+92>: blr x8 +liblog.so[0x6074] <+96>: tbz w0, #0x1f, 0x608c ; <+120> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 64 at config_read.cpp:61 +liblog.so[0x6078] <+100>: ldr x8, [x21, #0x18] +liblog.so[0x607c] <+104>: cbz x8, 0x60c0 ; <+172> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 116 at config_read.cpp:61 +liblog.so[0x6080] <+108>: mov w0, w19 +liblog.so[0x6084] <+112>: blr x8 +liblog.so[0x6088] <+116>: tbz w0, #0x1f, 0x60c0 ; <+172> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 116 at config_read.cpp:61 +liblog.so[0x608c] <+120>: ldr x22, [x22] +liblog.so[0x6090] <+124>: cmp x22, x20 +liblog.so[0x6094] <+128>: b.ne 0x6058 ; <+68> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 12 at config_read.cpp:61 +liblog.so[0x6098] <+132>: b 0x60b0 ; <+156> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 100 at config_read.cpp:61 +liblog.so[0x609c] <+136>: ldr x8, [x21, #0x18] +liblog.so[0x60a0] <+140>: cbz x8, 0x60c0 ; <+172> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 116 at config_read.cpp:61 +liblog.so[0x60a4] <+144>: mov w0, w19 +liblog.so[0x60a8] <+148>: blr x8 +liblog.so[0x60ac] <+152>: tbz w0, #0x1f, 0x60c0 ; <+172> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 116 at config_read.cpp:61 +liblog.so[0x60b0] <+156>: add w19, w19, #0x1 ; =0x1 +liblog.so[0x60b4] <+160>: cmp w19, #0x8 ; =0x8 +liblog.so[0x60b8] <+164>: b.lo 0x604c ; <+56> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) at config_read.cpp:61 +liblog.so[0x60bc] <+168>: b 0x60d0 ; <+188> at config_read.cpp +liblog.so[0x60c0] <+172>: ldr x8, [x20, #0x8] +liblog.so[0x60c4] <+176>: stp x20, x8, [x21] +liblog.so[0x60c8] <+180>: str x21, [x8] +liblog.so[0x60cc] <+184>: str x21, [x20, #0x8] +liblog.so[0x60d0] <+188>: adrp x20, 15 +liblog.so[0x60d4] <+192>: adrp x21, 15 +liblog.so[0x60d8] <+196>: ldr x20, [x20, #0x248] +liblog.so[0x60dc] <+200>: ldr x21, [x21, #0x250] +liblog.so[0x60e0] <+204>: mov w19, wzr +liblog.so[0x60e4] <+208>: ldr x22, [x20] +liblog.so[0x60e8] <+212>: cmp x22, x20 +liblog.so[0x60ec] <+216>: b.eq 0x6134 ; <+288> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 80 at config_read.cpp:62 +liblog.so[0x60f0] <+220>: ldr x8, [x22] +liblog.so[0x60f4] <+224>: cmp x22, x8 +liblog.so[0x60f8] <+228>: b.eq 0x6148 ; <+308> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 100 at config_read.cpp:62 +liblog.so[0x60fc] <+232>: ldr x8, [x22, #0x18] +liblog.so[0x6100] <+236>: cbz x8, 0x6168 ; <+340> at config_read.cpp:65:1 +liblog.so[0x6104] <+240>: mov w0, w19 +liblog.so[0x6108] <+244>: blr x8 +liblog.so[0x610c] <+248>: tbz w0, #0x1f, 0x6124 ; <+272> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 64 at config_read.cpp:62 +liblog.so[0x6110] <+252>: ldr x8, [x21, #0x18] +liblog.so[0x6114] <+256>: cbz x8, 0x6158 ; <+324> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 116 at config_read.cpp:62 +liblog.so[0x6118] <+260>: mov w0, w19 +liblog.so[0x611c] <+264>: blr x8 +liblog.so[0x6120] <+268>: tbz w0, #0x1f, 0x6158 ; <+324> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 116 at config_read.cpp:62 +liblog.so[0x6124] <+272>: ldr x22, [x22] +liblog.so[0x6128] <+276>: cmp x22, x20 +liblog.so[0x612c] <+280>: b.ne 0x60f0 ; <+220> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 12 at config_read.cpp:62 +liblog.so[0x6130] <+284>: b 0x6148 ; <+308> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 100 at config_read.cpp:62 +liblog.so[0x6134] <+288>: ldr x8, [x21, #0x18] +liblog.so[0x6138] <+292>: cbz x8, 0x6158 ; <+324> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 116 at config_read.cpp:62 +liblog.so[0x613c] <+296>: mov w0, w19 +liblog.so[0x6140] <+300>: blr x8 +liblog.so[0x6144] <+304>: tbz w0, #0x1f, 0x6158 ; <+324> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) + 116 at config_read.cpp:62 +liblog.so[0x6148] <+308>: add w19, w19, #0x1 ; =0x1 +liblog.so[0x614c] <+312>: cmp w19, #0x8 ; =0x8 +liblog.so[0x6150] <+316>: b.lo 0x60e4 ; <+208> [inlined] __android_log_add_transport(listnode*, android_log_transport_read*) at config_read.cpp:62 +liblog.so[0x6154] <+320>: b 0x6168 ; <+340> at config_read.cpp:65:1 +liblog.so[0x6158] <+324>: ldr x8, [x20, #0x8] +liblog.so[0x615c] <+328>: stp x20, x8, [x21] +liblog.so[0x6160] <+332>: str x21, [x8] +liblog.so[0x6164] <+336>: str x21, [x20, #0x8] +liblog.so[0x6168] <+340>: ldp x29, x30, [sp, #0x20] +liblog.so[0x616c] <+344>: ldp x20, x19, [sp, #0x10] +liblog.so[0x6170] <+348>: ldp x22, x21, [sp], #0x30 +liblog.so[0x6174] <+352>: ret diff --git a/lldb/tools/lldb-vscode/syntaxes/disassembly.json b/lldb/tools/lldb-vscode/syntaxes/disassembly.json new file mode 100644 index 0000000000000..cd086fe2f49f5 --- /dev/null +++ b/lldb/tools/lldb-vscode/syntaxes/disassembly.json @@ -0,0 +1,64 @@ +{ + "name": "Disassembly", + "scopeName": "source.disassembly", + "uuid": "9ade615f-5d82-4ac5-b22f-a1998c356ebe", + "patterns": [ + { + "comment": "x86 Address, bytes and opcode", + "name": "meta.instruction", + "match": "^([A-Za-z0-9]+):\\s([A-Z0-9]{2}\\s)+>?\\s+(\\w+)", + "captures": { + "1": {"name": "constant.numeric"}, + "3": {"name": "keyword.opcode"} + } + }, + { + "comment": "ARM Address, bytes and opcode", + "name": "meta.instruction", + "match": "^libIGL.so\\[([A-Za-z0-9]+)\\]\\s+(\\<\\+[0-9]*\\>):\\s+([A-Za-z]+.?[A-Za-z]*)", + "captures": { + "1": {"name": "constant.numeric"}, + "3": {"name": "keyword.opcode"} + } + }, + { + "comment": "ARM64 Address, bytes and opcode", + "name": "meta.instruction", + "match": "^liblog.so\\[([A-Za-z0-9]+)\\]\\s+(\\<\\+[0-9]*\\>):\\s+([A-Za-z]+.?[A-Za-z]*)", + "captures": { + "1": {"name": "constant.numeric"}, + "3": {"name": "keyword.opcode"} + } + }, + { + "comment": "Numeric constant", + "name": "constant.numeric", + "match": "(\\$|\\b)((0x)|[0-9])[A-Za-z0-9]+\\b" + }, + { + "comment": "x86 Register", + "name": "variable.language", + "match": "%[A-Za-z][A-Za-z0-9]*" + }, + { + "comment": "ARM Register", + "name": "variable.language", + "match": "r\\d+" + }, + { + "comment": "ARM Register Shortnames", + "name": "variable.language", + "match": "(fp|sp|lr|pc|wzr|xzr)" + }, + { + "comment": "ARM64 Register", + "name": "variable.language", + "match": "(x|w)[0-9]+" + }, + { + "comment": "End of line comment", + "name": "comment.line.semicolon", + "match": ";.*$" + } + ] +} diff --git a/lldb/tools/lldb-vscode/syntaxes/x86.disasm b/lldb/tools/lldb-vscode/syntaxes/x86.disasm new file mode 100644 index 0000000000000..d86a798cb9828 --- /dev/null +++ b/lldb/tools/lldb-vscode/syntaxes/x86.disasm @@ -0,0 +1,28 @@ +0x100008000: <0> popq %rdi +0x100008001: <1> pushq $0x0 +0x100008003: <3> movq %rsp, %rbp +0x100008006: <6> andq $-0x10, %rsp +0x10000800A: <10> subq $0x10, %rsp +0x10000800E: <14> movl 0x8(%rbp), %esi +0x100008011: <17> leaq 0x10(%rbp), %rdx +0x100008015: <21> leaq -0x101c(%rip), %rcx +0x10000801C: <28> leaq -0x8(%rbp), %r8 +0x100008020: <32> callq 0x100008062 # dyldbootstrap::start(dyld3::MachOLoaded const*, int, char const**, dyld3::MachOLoaded const*, unsigned long*) +0x100008025: <37> movq -0x8(%rbp), %rdi +0x100008029: <41> cmpq $0x0, %rdi +0x10000802D: <45> jne 0x10000803f # <+63> +0x10000802F: <47> movq %rbp, %rsp +0x100008032: <50> addq $0x8, %rsp +0x100008036: <54> movq $0x0, %rbp +0x10000803D: <61> jmpq *%rax +0x10000803F: <63> addq $0x10, %rsp +0x100008043: <67> pushq %rdi +0x100008044: <68> movq 0x8(%rbp), %rdi +0x100008048: <72> leaq 0x10(%rbp), %rsi +0x10000804C: <76> leaq 0x8(%rsi,%rdi,8), %rdx +0x100008051: <81> movq %rdx, %rcx +0x100008054: <84> movq (%rcx), %r8 +0x100008057: <87> addq $0x8, %rcx +0x10000805B: <91> testq %r8, %r8 +0x10000805E: <94> jne 0x100008054 # <+84> +0x100008060: <96> jmpq *%rax From f8fb7835d6a5e2a75d412e1482fc1c039efef1f0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 20 Jul 2020 08:26:33 -0400 Subject: [PATCH 372/600] GlobalISel: Add utilty for getting function argument live ins Get the argument register and ensure there's a copy to the virtual register. AMDGPU and AArch64 have similarish code to get the livein value, and I also want to use this in multiple places. This is a bit more aggressive about setting the register class than the original function, but that's probably OK. I think we're missing a few verifier checks for function live ins. I noticed AArch64's calling convention code is not actually adding liveins to functions, only the entry block (which apparently might not matter that much?). There should probably be a verifier check that entry block live ins are also live into the function. We also might need a verifier check that the copy to the livein virtual register is in the entry block. --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 11 ++++ llvm/lib/CodeGen/GlobalISel/Utils.cpp | 34 ++++++++++ .../GISel/AArch64InstructionSelector.cpp | 13 ++-- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 62 +++---------------- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 5 -- .../select-returnaddress-liveins.mir | 10 +-- .../irtranslator-call-implicit-args.ll | 8 +-- .../AMDGPU/GlobalISel/irtranslator-call.ll | 10 +-- .../GlobalISel/legalize-addrspacecast.mir | 6 +- 9 files changed, 77 insertions(+), 82 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 35add316b5b67..a44e936ef5d64 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -190,6 +190,17 @@ inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) { Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO); +/// Return a virtual register corresponding to the incoming argument register \p +/// PhysReg. This register is expected to have class \p RC, and optional type \p +/// RegTy. This assumes all references to the register will use the same type. +/// +/// If there is an existing live-in argument register, it will be returned. +/// This will also ensure there is a valid copy +Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, + MCRegister PhysReg, + const TargetRegisterClass &RC, + LLT RegTy = LLT()); + /// Return the least common multiple type of \p OrigTy and \p TargetTy, by changing the /// number of vector elements or scalar bitwidth. The intent is a /// G_MERGE_VALUES, G_BUILD_VECTOR, or G_CONCAT_VECTORS can be constructed from diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 7fc738adb3392..b59064ecf868b 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -497,6 +497,40 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF, return Align(1); } +Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF, + const TargetInstrInfo &TII, + MCRegister PhysReg, + const TargetRegisterClass &RC, + LLT RegTy) { + DebugLoc DL; // FIXME: Is no location the right choice? + MachineBasicBlock &EntryMBB = MF.front(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + Register LiveIn = MRI.getLiveInVirtReg(PhysReg); + if (LiveIn) { + MachineInstr *Def = MRI.getVRegDef(LiveIn); + if (Def) { + // FIXME: Should the verifier check this is in the entry block? + assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block"); + return LiveIn; + } + + // It's possible the incoming argument register and copy was added during + // lowering, but later deleted due to being/becoming dead. If this happens, + // re-insert the copy. + } else { + // The live in register was not present, so add it. + LiveIn = MF.addLiveIn(PhysReg, &RC); + if (RegTy.isValid()) + MRI.setType(LiveIn, RegTy); + } + + BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn) + .addReg(PhysReg); + if (!EntryMBB.isLiveIn(PhysReg)) + EntryMBB.addLiveIn(PhysReg); + return LiveIn; +} + Optional llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 9f7950851f65c..8721a535154af 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -4784,16 +4784,15 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, I.eraseFromParent(); return true; } + MFI.setReturnAddressIsTaken(true); - MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass); + // Insert the copy from LR/X30 into the entry block, before it can be // clobbered by anything. - MachineBasicBlock &EntryBlock = *MF.begin(); - if (!EntryBlock.isLiveIn(AArch64::LR)) - EntryBlock.addLiveIn(AArch64::LR); - MachineIRBuilder EntryBuilder(MF); - EntryBuilder.setInstr(*EntryBlock.begin()); - EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); + Register LiveInLR = getFunctionLiveInPhysReg(MF, TII, AArch64::LR, + AArch64::GPR64spRegClass); + MIRBuilder.buildCopy(DstReg, LiveInLR); + MFReturnAddr = DstReg; I.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index b40870024cc49..33992cacddb71 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2494,53 +2494,6 @@ static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, return &UseMI; } -Register AMDGPULegalizerInfo::insertLiveInCopy(MachineIRBuilder &B, - MachineRegisterInfo &MRI, - Register LiveIn, - Register PhyReg) const { - assert(PhyReg.isPhysical() && "Physical register expected"); - - // Insert the live-in copy, if required, by defining destination virtual - // register. - // FIXME: It seems EmitLiveInCopies isn't called anywhere? - if (!MRI.getVRegDef(LiveIn)) { - // FIXME: Should have scoped insert pt - MachineBasicBlock &OrigInsBB = B.getMBB(); - auto OrigInsPt = B.getInsertPt(); - - MachineBasicBlock &EntryMBB = B.getMF().front(); - EntryMBB.addLiveIn(PhyReg); - B.setInsertPt(EntryMBB, EntryMBB.begin()); - B.buildCopy(LiveIn, PhyReg); - - B.setInsertPt(OrigInsBB, OrigInsPt); - } - - return LiveIn; -} - -Register AMDGPULegalizerInfo::getLiveInRegister(MachineIRBuilder &B, - MachineRegisterInfo &MRI, - Register PhyReg, LLT Ty, - bool InsertLiveInCopy) const { - assert(PhyReg.isPhysical() && "Physical register expected"); - - // Get or create virtual live-in regester - Register LiveIn = MRI.getLiveInVirtReg(PhyReg); - if (!LiveIn) { - LiveIn = MRI.createGenericVirtualRegister(Ty); - MRI.addLiveIn(PhyReg, LiveIn); - } - - // When the actual true copy required is from virtual register to physical - // register (to be inserted later), live-in copy insertion from physical - // to register virtual register is not required - if (!InsertLiveInCopy) - return LiveIn; - - return insertLiveInCopy(B, MRI, LiveIn, PhyReg); -} - bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg, const TargetRegisterClass *ArgRC, @@ -2549,9 +2502,8 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, assert(SrcReg.isPhysical() && "Physical register expected"); assert(DstReg.isVirtual() && "Virtual register expected"); - MachineRegisterInfo &MRI = *B.getMRI(); - Register LiveIn = getLiveInRegister(B, MRI, SrcReg, ArgTy); - + Register LiveIn = getFunctionLiveInPhysReg(B.getMF(), B.getTII(), SrcReg, *ArgRC, + ArgTy); if (Arg->isMasked()) { // TODO: Should we try to emit this once in the entry block? const LLT S32 = LLT::scalar(32); @@ -4195,6 +4147,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad( return true; } +// TODO: Move to selection bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -4206,12 +4159,13 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, // Pass queue pointer to trap handler as input, and insert trap instruction // Reference: https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi MachineRegisterInfo &MRI = *B.getMRI(); - Register SGPR01(AMDGPU::SGPR0_SGPR1); - Register LiveIn = getLiveInRegister( - B, MRI, SGPR01, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64), - /*InsertLiveInCopy=*/false); + + Register LiveIn = + MRI.createGenericVirtualRegister(LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); if (!loadInputValue(LiveIn, B, AMDGPUFunctionArgInfo::QUEUE_PTR)) return false; + + Register SGPR01(AMDGPU::SGPR0_SGPR1); B.buildCopy(SGPR01, LiveIn); B.buildInstr(AMDGPU::S_TRAP) .addImm(GCNSubtarget::TrapIDLLVMTrap) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 332d675c1a88e..99191487f90d8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -86,11 +86,6 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; - Register getLiveInRegister(MachineIRBuilder &B, MachineRegisterInfo &MRI, - Register PhyReg, LLT Ty, - bool InsertLiveInCopy = true) const; - Register insertLiveInCopy(MachineIRBuilder &B, MachineRegisterInfo &MRI, - Register LiveIn, Register PhyReg) const; bool loadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg, const TargetRegisterClass *ArgRC, LLT ArgTy) const; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir index 433c7848433fb..745752dcc3429 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-returnaddress-liveins.mir @@ -17,10 +17,11 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $w0, $x0, $lr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $lr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $lr ; CHECK: B %bb.1 ; CHECK: bb.1: - ; CHECK: $x0 = COPY [[COPY]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] + ; CHECK: $x0 = COPY [[COPY1]] ; CHECK: RET_ReallyLR implicit $x0 ; LR should be added as a livein to the entry block. @@ -44,10 +45,11 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $w0, $x0, $lr - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $lr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $lr ; CHECK: B %bb.1 ; CHECK: bb.1: - ; CHECK: $x0 = COPY [[COPY]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] + ; CHECK: $x0 = COPY [[COPY1]] ; CHECK: RET_ReallyLR implicit $x0 ; We should not have LR listed as a livein twice. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll index aa0850a5dbe5b..0cb51aef3fa1a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -115,7 +115,7 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_i32 ; GFX900: bb.1 (%ir-block.0): ; GFX900: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GFX900: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; GFX900: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX900: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; GFX900: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -153,7 +153,7 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX908-LABEL: name: test_func_call_external_void_func_i32 ; GFX908: bb.1 (%ir-block.0): ; GFX908: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GFX908: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; GFX908: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX908: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; GFX908: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -373,7 +373,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX900: bb.1 (%ir-block.1): ; GFX900: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GFX900: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; GFX900: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX900: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; GFX900: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -498,7 +498,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): ; GFX908: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GFX908: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; GFX908: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; GFX908: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; GFX908: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 6b29697ca086e..b3a8aac96df94 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -142,7 +142,7 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_func_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -4369,7 +4369,7 @@ define void @stack_12xv3i32() #0 { ; CHECK-LABEL: name: stack_12xv3i32 ; CHECK: bb.1.entry: ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -4510,7 +4510,7 @@ define void @stack_12xv3f32() #0 { ; CHECK-LABEL: name: stack_12xv3f32 ; CHECK: bb.1.entry: ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -4651,7 +4651,7 @@ define void @stack_8xv5i32() #0 { ; CHECK-LABEL: name: stack_8xv5i32 ; CHECK: bb.1.entry: ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 @@ -4792,7 +4792,7 @@ define void @stack_8xv5f32() #0 { ; CHECK-LABEL: name: stack_8xv5f32 ; CHECK: bb.1.entry: ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir index 395d34a00081d..8798e9f858263 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -171,7 +171,7 @@ body: | liveins: $vgpr0 ; VI-LABEL: name: test_addrspacecast_p5_to_p0 - ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; VI: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 @@ -254,7 +254,7 @@ body: | liveins: $vgpr0 ; VI-LABEL: name: test_addrspacecast_p3_to_p0 - ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; VI: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 @@ -459,7 +459,7 @@ body: | liveins: $vgpr0_vgpr1 ; VI-LABEL: name: test_addrspacecast_v2p3_to_v2p0 - ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; VI: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 ; VI: [[COPY1:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 ; VI: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY1]](<2 x p3>) ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 From ba3d84d82b750296c11e843365aa85962a561ad4 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 4 Aug 2020 14:00:30 -0700 Subject: [PATCH 373/600] [lldb/Test] Skip tests that try to get the remote environment We don't support getting the remote environment. The gdb remote protocol has no packet for that. --- lldb/test/API/python_api/sbenvironment/TestSBEnvironment.py | 2 ++ lldb/test/API/python_api/sbplatform/TestSBPlatform.py | 1 + 2 files changed, 3 insertions(+) diff --git a/lldb/test/API/python_api/sbenvironment/TestSBEnvironment.py b/lldb/test/API/python_api/sbenvironment/TestSBEnvironment.py index 6389854ce58f5..62c3ac2e50224 100644 --- a/lldb/test/API/python_api/sbenvironment/TestSBEnvironment.py +++ b/lldb/test/API/python_api/sbenvironment/TestSBEnvironment.py @@ -31,6 +31,7 @@ def assertEqualEntries(self, env, entries): @add_test_categories(['pyapi']) + @skipIfRemote # Remote environment not supported. def test_platform_environment(self): env = self.dbg.GetSelectedPlatform().GetEnvironment() # We assume at least PATH is set @@ -67,6 +68,7 @@ def test_launch_info(self): @add_test_categories(['pyapi']) + @skipIfRemote # Remote environment not supported. def test_target_environment(self): env = self.dbg.GetSelectedTarget().GetEnvironment() # There is no target, so env should be empty diff --git a/lldb/test/API/python_api/sbplatform/TestSBPlatform.py b/lldb/test/API/python_api/sbplatform/TestSBPlatform.py index 4735f6ea3b492..3fa4c10b401e1 100644 --- a/lldb/test/API/python_api/sbplatform/TestSBPlatform.py +++ b/lldb/test/API/python_api/sbplatform/TestSBPlatform.py @@ -9,6 +9,7 @@ class SBPlatformAPICase(TestBase): NO_DEBUG_INFO_TESTCASE = True @add_test_categories(['pyapi']) + @skipIfRemote # Remote environment not supported. def test_run(self): self.build() plat = lldb.SBPlatform.GetHostPlatform() From 89011fc3c965811bbc3b6a0770c9d94bb264ad3f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 19 Jul 2020 21:26:02 -0400 Subject: [PATCH 374/600] AMDGPU/GlobalISel: Select llvm.returnaddress --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 50 +++++++ .../Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 3 +- .../GlobalISel/inst-select-returnaddress.mir | 122 ++++++++++++++++++ llvm/test/CodeGen/AMDGPU/returnaddress.ll | 24 +++- 5 files changed, 196 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 43f5e534411c9..0beff5c5f571b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -872,6 +872,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { return selectBallot(I); case Intrinsic::amdgcn_reloc_constant: return selectRelocConstant(I); + case Intrinsic::returnaddress: + return selectReturnAddress(I); default: return selectImpl(I, *CoverageInfo); } @@ -1077,6 +1079,54 @@ bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { + MachineBasicBlock *MBB = I.getParent(); + MachineFunction &MF = *MBB->getParent(); + const DebugLoc &DL = I.getDebugLoc(); + + MachineOperand &Dst = I.getOperand(0); + Register DstReg = Dst.getReg(); + unsigned Depth = I.getOperand(2).getImm(); + + const TargetRegisterClass *RC + = TRI.getConstrainedRegClassForOperand(Dst, *MRI); + if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) || + !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) + return false; + + MachineBasicBlock &EntryMBB = MF.front(); + + // Check for kernel and shader functions + if (Depth != 0 || + MF.getInfo()->isEntryFunction()) { + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg) + .addImm(0); + I.eraseFromParent(); + return true; + } + + Register ReturnAddrReg = TRI.getReturnAddressReg(MF); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + // There is a call to @llvm.returnaddress in this function + MFI.setReturnAddressIsTaken(true); + + // Get the return address reg and mark it as an implicit live-in + Register LiveIn = MRI->getLiveInVirtReg(ReturnAddrReg); + if (!LiveIn) { + LiveIn = MF.addLiveIn(ReturnAddrReg, RC); + BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(AMDGPU::COPY), LiveIn) + .addReg(ReturnAddrReg); + if (!EntryMBB.isLiveIn(ReturnAddrReg)) + EntryMBB.addLiveIn(ReturnAddrReg); + } + + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg) + .addReg(LiveIn); + I.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const { // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 1a0a9cefacf98..c86f5f786295c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -109,6 +109,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool selectIntrinsicIcmp(MachineInstr &MI) const; bool selectBallot(MachineInstr &I) const; bool selectRelocConstant(MachineInstr &I) const; + bool selectReturnAddress(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const; bool selectEndCfIntrinsic(MachineInstr &MI) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 9674474cd3cf9..f71e1be577136 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4023,7 +4023,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_kernarg_segment_ptr: case Intrinsic::amdgcn_s_getpc: case Intrinsic::amdgcn_groupstaticsize: - case Intrinsic::amdgcn_reloc_constant: { + case Intrinsic::amdgcn_reloc_constant: + case Intrinsic::returnaddress: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir new file mode 100644 index 0000000000000..72927e7dbef76 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir @@ -0,0 +1,122 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: return_address_already_live_in_copy +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%0' } + +body: | + bb.0: + liveins: $sgpr30_sgpr31 + ; CHECK-LABEL: name: return_address_already_live_in_copy + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + %0:sgpr(p0) = COPY $sgpr30_sgpr31 + %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0, implicit %1 +... + +--- +name: return_address_already_block_live_in_copy_not_mf_life_in +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr30_sgpr31 + ; CHECK-LABEL: name: return_address_already_block_live_in_copy_not_mf_life_in + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY]] + %0:sgpr(p0) = COPY $sgpr30_sgpr31 + %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0, implicit %1 +... + +--- +name: return_address_no_live_in +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + + ; CHECK-LABEL: name: return_address_no_live_in + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_ENDPGM 0, implicit [[COPY]] + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0 +... + +--- +name: return_address_no_live_in_non_entry_block +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: return_address_no_live_in_non_entry_block + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.1: + ; CHECK: S_ENDPGM 0, implicit [[COPY]] + bb.0: + G_BR %bb.1 + + bb.1: + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0 +... + +--- +name: return_address_multi_use +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: return_address_multi_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.1: + ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + bb.0: + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + G_BR %bb.1 + + bb.1: + %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0, implicit %1 +... + +--- +name: return_address_kernel_is_null +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + +body: | + bb.0: + ; CHECK-LABEL: name: return_address_kernel_is_null + ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_]] + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/returnaddress.ll b/llvm/test/CodeGen/AMDGPU/returnaddress.ll index 1db6e3e0a8575..7937ba63c43b5 100644 --- a/llvm/test/CodeGen/AMDGPU/returnaddress.ll +++ b/llvm/test/CodeGen/AMDGPU/returnaddress.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s ; Test with zero frame ; GCN-LABEL: {{^}}func1 @@ -25,7 +26,7 @@ entry: ; Test with amdgpu_kernel ; GCN-LABEL: {{^}}func3 ; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 +; GCN: v_mov_b32_e32 v1, {{v0|0}} define amdgpu_kernel void @func3(i8** %out) nounwind { entry: %tmp = tail call i8* @llvm.returnaddress(i32 0) @@ -36,7 +37,7 @@ entry: ; Test with use outside the entry-block ; GCN-LABEL: {{^}}func4 ; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 +; GCN: v_mov_b32_e32 v1, {{v0|0}} define amdgpu_kernel void @func4(i8** %out, i32 %val) nounwind { entry: %cmp = icmp ne i32 %val, 0 @@ -61,5 +62,22 @@ entry: unreachable } +declare void @callee() + +; GCN-LABEL: {{^}}multi_use: +; GCN-DAG: v_mov_b32_e32 v[[LO:4[0-9]+]], s30 +; GCN-DAG: v_mov_b32_e32 v[[HI:4[0-9]+]], s31 +; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +; GCN: s_swappc_b64 +; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @multi_use() nounwind { +entry: + %ret0 = tail call i8* @llvm.returnaddress(i32 0) + store volatile i8* %ret0, i8* addrspace(1)* undef + call void @callee() + %ret1 = tail call i8* @llvm.returnaddress(i32 0) + store volatile i8* %ret1, i8* addrspace(1)* undef + ret void +} declare i8* @llvm.returnaddress(i32) nounwind readnone From 65b6dbf9395f8c0b2560f7ffe8dac4b0b32dc2d4 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 4 Aug 2020 14:32:07 -0700 Subject: [PATCH 375/600] [llvm][NFC] Moved implementation of TrainingLogger outside of its decl Also renamed a method - printTensor - to print; and added comments. --- .../Analysis/DevelopmentModeInlineAdvisor.cpp | 95 +++++++++++-------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp index 32fd16810dc35..35a2b3a7829cb 100644 --- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp +++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp @@ -71,54 +71,19 @@ struct InlineEvent { /// lines up with how TF SequenceExample represents it. class TrainingLogger final { public: - TrainingLogger() { - for (size_t I = 0; I < NumberOfFeatures; ++I) { - Features.push_back(InlineFeatures()); - } - } + TrainingLogger(); /// Log one inlining event. void logInlineEvent(const InlineEvent &Event, - const MLModelRunner &ModelRunner) { - for (size_t I = 0; I < NumberOfFeatures; ++I) { - Features[I].push_back(ModelRunner.getFeature(I)); - } - Decisions.push_back(Event.AdvisedDecision); - Effects.push_back(Event.Effect); - Rewards.push_back(Event.Reward); - DefaultDecisions.push_back(Event.DefaultDecision); - } + const MLModelRunner &ModelRunner); - void printTensor(raw_fd_ostream &OutFile) { - if (DefaultDecisions.empty()) - return; - OutFile << "feature_lists: {\n"; - - for (size_t I = 0; I < Features.size(); I++) { - writeTensor(OutFile, FeatureNameMap.at(I), Features[I]); - } - writeTensor(OutFile, DefaultDecisionName, DefaultDecisions); - writeTensor(OutFile, DecisionName, Decisions); - writeTensor(OutFile, RewardName, Rewards); - - OutFile << "}\n"; - } + /// Print the stored tensors. + void print(raw_fd_ostream &OutFile); private: template void writeTensor(raw_fd_ostream &OutFile, StringRef TensorName, - const std::vector &Tensor) { - OutFile << " feature_list: {\n"; - OutFile << " key: " - << "\"" << TensorName << "\" "; - OutFile << "value: {\n"; - for (const auto &Feature : Tensor) { - OutFile << " feature: { int64_list: { value: [" << Feature - << "] } }\n"; - } - OutFile << " }\n"; - OutFile << " }\n"; - } + const std::vector &Tensor); std::vector Features; std::vector DefaultDecisions; @@ -307,6 +272,54 @@ class ModelUnderTrainingRunner final : public MLModelRunner { }; } // namespace +TrainingLogger::TrainingLogger() { + for (size_t I = 0; I < NumberOfFeatures; ++I) { + Features.push_back(InlineFeatures()); + } +} + +/// Log one inlining event. +void TrainingLogger::logInlineEvent(const InlineEvent &Event, + const MLModelRunner &ModelRunner) { + for (size_t I = 0; I < NumberOfFeatures; ++I) { + Features[I].push_back(ModelRunner.getFeature(I)); + } + Decisions.push_back(Event.AdvisedDecision); + Effects.push_back(Event.Effect); + Rewards.push_back(Event.Reward); + DefaultDecisions.push_back(Event.DefaultDecision); +} + +void TrainingLogger::print(raw_fd_ostream &OutFile) { + if (DefaultDecisions.empty()) + return; + OutFile << "feature_lists: {\n"; + + for (size_t I = 0; I < Features.size(); I++) { + writeTensor(OutFile, FeatureNameMap.at(I), Features[I]); + } + writeTensor(OutFile, DefaultDecisionName, DefaultDecisions); + writeTensor(OutFile, DecisionName, Decisions); + writeTensor(OutFile, RewardName, Rewards); + + OutFile << "}\n"; +} + +template +void TrainingLogger::writeTensor(raw_fd_ostream &OutFile, StringRef TensorName, + const std::vector &Tensor) { + OutFile << " feature_list: {\n"; + OutFile << " key: " + << "\"" << TensorName << "\" "; + OutFile << "value: {\n"; + for (const auto &Feature : Tensor) { + OutFile << " feature: { int64_list: { value: [" << Feature + << "] } }\n"; + } + OutFile << " }\n"; + OutFile << " }\n"; +} + DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor( Module &M, ModuleAnalysisManager &MAM, std::unique_ptr ModelRunner, @@ -324,7 +337,7 @@ DevelopmentModeMLInlineAdvisor::~DevelopmentModeMLInlineAdvisor() { return; std::error_code ErrorCode; raw_fd_ostream OutFile(TrainingLog, ErrorCode); - Logger.printTensor(OutFile); + Logger.print(OutFile); } size_t From 486e84dfa4923ba506e715098699fc17aa7f9e40 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 20 Jul 2020 17:10:42 -0400 Subject: [PATCH 376/600] AMDGPU/GlobalISel: Use live in helper function for returnaddress --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 0beff5c5f571b..d3e792b5832b5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1094,8 +1094,6 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) return false; - MachineBasicBlock &EntryMBB = MF.front(); - // Check for kernel and shader functions if (Depth != 0 || MF.getInfo()->isEntryFunction()) { @@ -1105,22 +1103,14 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { return true; } - Register ReturnAddrReg = TRI.getReturnAddressReg(MF); - MachineFrameInfo &MFI = MF.getFrameInfo(); // There is a call to @llvm.returnaddress in this function MFI.setReturnAddressIsTaken(true); // Get the return address reg and mark it as an implicit live-in - Register LiveIn = MRI->getLiveInVirtReg(ReturnAddrReg); - if (!LiveIn) { - LiveIn = MF.addLiveIn(ReturnAddrReg, RC); - BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(AMDGPU::COPY), LiveIn) - .addReg(ReturnAddrReg); - if (!EntryMBB.isLiveIn(ReturnAddrReg)) - EntryMBB.addLiveIn(ReturnAddrReg); - } - + Register ReturnAddrReg = TRI.getReturnAddressReg(MF); + Register LiveIn = getFunctionLiveInPhysReg(MF, TII, ReturnAddrReg, + AMDGPU::SReg_64RegClass); BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg) .addReg(LiveIn); I.eraseFromParent(); From e8b7edafc3dd0ab85903eebdfdb3bb7cc2d66743 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 4 Aug 2020 14:39:39 -0700 Subject: [PATCH 377/600] [lldb/Test] Add @skipIfRemote decorator to TestProcessList.py lldb-platform contains a very minimal support for the qfProcessInfo packet, only allowing the simplest query to get most of the testsuite running, and returning very little information about the matched processes. --- lldb/test/API/commands/platform/process/list/TestProcessList.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/commands/platform/process/list/TestProcessList.py b/lldb/test/API/commands/platform/process/list/TestProcessList.py index 9fc84d4f26e0f..fe2ed74916ebe 100644 --- a/lldb/test/API/commands/platform/process/list/TestProcessList.py +++ b/lldb/test/API/commands/platform/process/list/TestProcessList.py @@ -18,6 +18,7 @@ class ProcessListTestCase(TestBase): NO_DEBUG_INFO_TESTCASE = True @skipIfWindows # https://bugs.llvm.org/show_bug.cgi?id=43702 + @skipIfRemote # rdar://problem/66542336 def test_process_list_with_args(self): """Test process list show process args""" self.build() From 09897b146a8a7cb934279e630c0027d5d4de7399 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 4 Aug 2020 16:13:08 -0500 Subject: [PATCH 378/600] [RDF] Remove uses of RDFRegisters::normalize (deprecate) This function has been reduced to an identity function for some time. --- llvm/include/llvm/CodeGen/RDFRegisters.h | 6 +++++- llvm/lib/CodeGen/RDFRegisters.cpp | 4 ---- llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp | 2 +- llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h index 4afaf80e46595..e8ba0103777ab 100644 --- a/llvm/include/llvm/CodeGen/RDFRegisters.h +++ b/llvm/include/llvm/CodeGen/RDFRegisters.h @@ -110,7 +110,11 @@ namespace rdf { return RegMasks.get(Register::stackSlot2Index(R)); } - RegisterRef normalize(RegisterRef RR) const; + LLVM_ATTRIBUTE_DEPRECATED(RegisterRef normalize(RegisterRef RR), + "This function is now an identity function"); + RegisterRef normalize(RegisterRef RR) const { + return RR; + } bool alias(RegisterRef RA, RegisterRef RB) const { if (!isRegMaskId(RA.Reg)) diff --git a/llvm/lib/CodeGen/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp index bd8661816e718..9f8d6b9f61ce2 100644 --- a/llvm/lib/CodeGen/RDFRegisters.cpp +++ b/llvm/lib/CodeGen/RDFRegisters.cpp @@ -94,10 +94,6 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri, } } -RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const { - return RR; -} - std::set PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { // Do not include RR in the alias set. std::set AS; diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index c718e5f2d9fbe..2cdfbe7845b63 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -246,7 +246,7 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr SA, for (NodeAddr DA : SA.Addr->members_if(DFG->IsDef, *DFG)) { LLVM_DEBUG(dbgs() << "\t\t[DefNode]: " << Print>(DA, *DFG) << "\n"); - RegisterRef DR = DFG->getPRI().normalize(DA.Addr->getRegRef(*DFG)); + RegisterRef DR = DA.Addr->getRegRef(*DFG); auto UseSet = LV->getAllReachedUses(DR, DA); diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp index 18fcc48bc9cdd..12aaabcc79645 100644 --- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp +++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -369,7 +369,7 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( // Use RDF to find all the uses of `Def` rdf::NodeSet Uses; - RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG)); + RegisterRef DefReg = Def.Addr->getRegRef(DFG); for (auto UseID : L.getAllReachedUses(DefReg, Def)) { auto Use = DFG.addr(UseID); if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node From 4a47f1c4cedb497915bba14f5288708cedb62767 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 4 Aug 2020 14:17:30 -0700 Subject: [PATCH 379/600] [SelectionDAG][SVE] Support scalable vectors in getConstantFP() Differential Revision: https://reviews.llvm.org/D85249 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +++- llvm/test/CodeGen/AArch64/sve-vector-splat.ll | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 7fdf8a82bae85..cd54834cf6ace 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1380,7 +1380,9 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, } SDValue Result(N, 0); - if (VT.isVector()) + if (VT.isScalableVector()) + Result = getSplatVector(VT, DL, Result); + else if (VT.isVector()) Result = getSplatBuildVector(VT, DL, Result); NewSDValueDbgMsg(Result, "Creating fp constant: ", this); return Result; diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll index 043e4ea135f34..7a765002ac9f8 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -355,5 +355,22 @@ define @splat_nxv2f64_imm() { ret %2 } +define @splat_nxv4i32_fold( %x) { +; CHECK-LABEL: splat_nxv4i32_fold: +; CHECK: mov z0.s, #0 +; CHECK-NEXT: ret + %r = sub %x, %x + ret %r +} + + +define @splat_nxv4f32_fold( %x) { +; CHECK-LABEL: splat_nxv4f32_fold: +; CHECK: mov z0.s, #0 +; CHECK-NEXT: ret + %r = fsub nnan %x, %x + ret %r +} + ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" } From f50b3ff02e3245b44b5452b409ca1df22e91be45 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 29 Jul 2020 13:54:07 -0700 Subject: [PATCH 380/600] [Hexagon] Use InstSimplify instead of ConstantProp This is the last remaining use of ConstantProp, migrate it to InstSimplify in the goal of removing ConstantProp. Add -hexagon-instsimplify option to enable skipping of instsimplify in tests that can't handle the extra optimization. Differential Revision: https://reviews.llvm.org/D85047 --- llvm/include/llvm/Transforms/Scalar.h | 7 +++++++ llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h | 4 ---- llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp | 7 ++++++- llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp | 1 + llvm/test/CodeGen/Hexagon/autohvx/isel-qfalse.ll | 2 +- llvm/test/CodeGen/Hexagon/autohvx/isel-setcc-pair.ll | 2 +- llvm/test/CodeGen/Hexagon/bit-bitsplit-regclass.ll | 2 +- llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll | 2 +- llvm/test/CodeGen/Hexagon/isel-simplify-crash.ll | 2 +- llvm/test/CodeGen/Hexagon/loop_correctness.ll | 2 +- llvm/test/CodeGen/Hexagon/newvaluejump-kill.ll | 2 +- llvm/test/CodeGen/Hexagon/packetize-impdef-1.ll | 2 +- llvm/test/CodeGen/Hexagon/store-vector-pred.ll | 2 +- llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll | 2 +- 14 files changed, 24 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 07d968efbcbbd..8dd59e018061b 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -530,6 +530,13 @@ Pass *createLoopSimplifyCFGPass(); // transformations. // Pass *createWarnMissedTransformationsPass(); + +//===----------------------------------------------------------------------===// +// +// This pass does instruction simplification on each +// instruction in a function. +// +FunctionPass *createInstSimplifyLegacyPass(); } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h b/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h index 0c30b62605366..f36695a8c2b73 100644 --- a/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h +++ b/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h @@ -36,10 +36,6 @@ class InstSimplifyPass : public PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -/// Create a legacy pass that does instruction simplification on each -/// instruction in a function. -FunctionPass *createInstSimplifyLegacyPass(); - } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_INSTSIMPLIFYPASS_H diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 03cc7d240aa66..9bcdc89f29567 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -101,6 +101,10 @@ static cl::opt EnableInitialCFGCleanup("hexagon-initial-cfg-cleanup", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Simplify the CFG after atomic expansion pass")); +static cl::opt EnableInstSimplify("hexagon-instsimplify", cl::Hidden, + cl::ZeroOrMore, cl::init(true), + cl::desc("Enable instsimplify")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -312,7 +316,8 @@ void HexagonPassConfig::addIRPasses() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) { - addPass(createConstantPropagationPass()); + if (EnableInstSimplify) + addPass(createInstSimplifyLegacyPass()); addPass(createDeadCodeEliminationPass()); } diff --git a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp index e87b622ab19f7..7380c9df9b15d 100644 --- a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp +++ b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-qfalse.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-qfalse.ll index 9324f524d7a39..03f9f81da3d19 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-qfalse.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-qfalse.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 -hexagon-instsimplify=0 < %s | FileCheck %s ; Make sure we can select QFALSE. ; CHECK: vcmp.gt(v0.w,v0.w) diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-setcc-pair.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-setcc-pair.ll index 4e49162dec465..fb3be22f8e260 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-setcc-pair.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-setcc-pair.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 -hexagon-instsimplify=0 < %s | FileCheck %s ; Check that a setcc of a vector pair is handled (without crashing). ; CHECK: vcmp diff --git a/llvm/test/CodeGen/Hexagon/bit-bitsplit-regclass.ll b/llvm/test/CodeGen/Hexagon/bit-bitsplit-regclass.ll index 57f7c8636814d..82eb87fec3eaf 100644 --- a/llvm/test/CodeGen/Hexagon/bit-bitsplit-regclass.ll +++ b/llvm/test/CodeGen/Hexagon/bit-bitsplit-regclass.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 -hexagon-instsimplify=0 < %s | FileCheck %s ; Check for successful compilation. ; CHECK: r{{[0-9]+}} = insert(r{{[0-9]+}},#1,#31) diff --git a/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll b/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll index 0834424ee4dcc..c44e7a863840e 100644 --- a/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll +++ b/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-instsimplify=0 < %s | FileCheck %s ; Test that LLVM does not assert and bitcast v64i1 to i64 is lowered ; without crashing. diff --git a/llvm/test/CodeGen/Hexagon/isel-simplify-crash.ll b/llvm/test/CodeGen/Hexagon/isel-simplify-crash.ll index 33a9b0ea16aca..c13f59a7fcdb5 100644 --- a/llvm/test/CodeGen/Hexagon/isel-simplify-crash.ll +++ b/llvm/test/CodeGen/Hexagon/isel-simplify-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-instsimplify=0 < %s | FileCheck %s ; This used to crash in SimplifyDemandedBits due to a type mismatch ; caused by a missing bitcast in vectorizing mul. diff --git a/llvm/test/CodeGen/Hexagon/loop_correctness.ll b/llvm/test/CodeGen/Hexagon/loop_correctness.ll index df2c3c5814b45..efe74c03fa1f2 100644 --- a/llvm/test/CodeGen/Hexagon/loop_correctness.ll +++ b/llvm/test/CodeGen/Hexagon/loop_correctness.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -O3 < %s | FileCheck %s +; RUN: llc -march=hexagon -O3 -hexagon-instsimplify=0 < %s | FileCheck %s ; CHECK-LABEL: f0: ; CHECK: loop0(.LBB{{[0-9]+}}_{{[0-9]+}},#3) diff --git a/llvm/test/CodeGen/Hexagon/newvaluejump-kill.ll b/llvm/test/CodeGen/Hexagon/newvaluejump-kill.ll index e41fcff85d60a..9f4ab9daefde3 100644 --- a/llvm/test/CodeGen/Hexagon/newvaluejump-kill.ll +++ b/llvm/test/CodeGen/Hexagon/newvaluejump-kill.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -O3 -verify-machineinstrs -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O3 -verify-machineinstrs -hexagon-initial-cfg-cleanup=0 -hexagon-instsimplify=0 < %s | FileCheck %s ; ; Check that this testcase compiles successfully and that a new-value jump ; has been created. diff --git a/llvm/test/CodeGen/Hexagon/packetize-impdef-1.ll b/llvm/test/CodeGen/Hexagon/packetize-impdef-1.ll index 8cb5cffc0ed9e..47f9a860e80c9 100644 --- a/llvm/test/CodeGen/Hexagon/packetize-impdef-1.ll +++ b/llvm/test/CodeGen/Hexagon/packetize-impdef-1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-initial-cfg-cleanup=0 -hexagon-instsimplify=0 < %s | FileCheck %s ; REQUIRES: asserts ; Test that the compiler doesn't assert because IMPLICIT_DEF instructions are diff --git a/llvm/test/CodeGen/Hexagon/store-vector-pred.ll b/llvm/test/CodeGen/Hexagon/store-vector-pred.ll index 407a30cb738d5..a177f87ddfbd5 100644 --- a/llvm/test/CodeGen/Hexagon/store-vector-pred.ll +++ b/llvm/test/CodeGen/Hexagon/store-vector-pred.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-instsimplify=0 < %s | FileCheck %s ; This test checks that store a vector predicate of type v128i1 is lowered ; without crashing. diff --git a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll index bcec15437e915..e5394d929bb1e 100644 --- a/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll +++ b/llvm/test/CodeGen/Hexagon/vect/zext-v4i1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-instsimplify=0 < %s | FileCheck %s ; Check that this compiles successfully. ; CHECK: vcmph.eq From bf82ff61a6dbfec5295d9f24e6b66dcb9a038c6c Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Mon, 3 Aug 2020 16:15:22 -0700 Subject: [PATCH 381/600] Teach SROA to handle allocas with more than one dbg.declare. It is technically legal for optimizations to create an alloca that is used by more than one dbg.declare, if one or both of them are inlined instances of aliasing variables. Differential Revision: https://reviews.llvm.org/D85172 --- llvm/lib/Transforms/Scalar/SROA.cpp | 26 +++++---- llvm/test/Transforms/SROA/dbg-inline.ll | 72 +++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/SROA/dbg-inline.ll diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index a95a7aba0d38d..93ea6e67ee432 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -4476,10 +4476,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // Migrate debug information from the old alloca to the new alloca(s) // and the individual partitions. TinyPtrVector DbgDeclares = FindDbgAddrUses(&AI); - if (!DbgDeclares.empty()) { - auto *Var = DbgDeclares.front()->getVariable(); - auto *Expr = DbgDeclares.front()->getExpression(); - auto VarSize = Var->getSizeInBits(); + for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) { + auto *Expr = DbgDeclare->getExpression(); DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false); uint64_t AllocaSize = DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedSize(); @@ -4510,6 +4508,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } // The alloca may be larger than the variable. + auto VarSize = DbgDeclare->getVariable()->getSizeInBits(); if (VarSize) { if (Size > *VarSize) Size = *VarSize; @@ -4527,12 +4526,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } } - // Remove any existing intrinsics describing the same alloca. - for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) - OldDII->eraseFromParent(); + // Remove any existing intrinsics on the new alloca describing + // the variable fragment. + for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) { + auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS, + const DbgVariableIntrinsic *RHS) { + return LHS->getVariable() == RHS->getVariable() && + LHS->getDebugLoc()->getInlinedAt() == + RHS->getDebugLoc()->getInlinedAt(); + }; + if (SameVariableFragment(OldDII, DbgDeclare)) + OldDII->eraseFromParent(); + } - DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr, - DbgDeclares.front()->getDebugLoc(), &AI); + DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr, + DbgDeclare->getDebugLoc(), &AI); } } return Changed; diff --git a/llvm/test/Transforms/SROA/dbg-inline.ll b/llvm/test/Transforms/SROA/dbg-inline.ll new file mode 100644 index 0000000000000..85e4d29667e23 --- /dev/null +++ b/llvm/test/Transforms/SROA/dbg-inline.ll @@ -0,0 +1,72 @@ +; Test that SROA can deal with allocas that have more than one +; dbg.declare hanging off of it. + +; RUN: opt < %s -sroa -S | FileCheck %s +source_filename = "/tmp/inlinesplit.cpp" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.15.0" + +%struct.pair = type { i64, i64 } + +; Function Attrs: noinline optnone ssp uwtable +define i64 @_Z1g4pair(i64 %p.coerce0, i64 %p.coerce1) #0 !dbg !8 { +entry: + %p = alloca %struct.pair, align 8 + %0 = getelementptr inbounds %struct.pair, %struct.pair* %p, i32 0, i32 0 + store i64 %p.coerce0, i64* %0, align 8 + %1 = getelementptr inbounds %struct.pair, %struct.pair* %p, i32 0, i32 1 + store i64 %p.coerce1, i64* %1, align 8 + ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce0, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg ![[LOC:[0-9]+]] + ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce1, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg ![[LOC]] + ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce0, metadata ![[INLINED_VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg ![[INLINED_LOC:[0-9]+]] + ; CHECK-DAG: call void @llvm.dbg.value(metadata i64 %p.coerce1, metadata ![[INLINED_VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg ![[INLINED_LOC]] + call void @llvm.dbg.declare(metadata %struct.pair* %p, metadata !17, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.declare(metadata %struct.pair* %p, metadata !21, metadata !DIExpression()), !dbg !23 + %a.i = getelementptr inbounds %struct.pair, %struct.pair* %p, i32 0, i32 0, !dbg !25 + %x2 = load i64, i64* %0, align 8, !dbg !25 + ret i64 %x2, !dbg !26 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #2 + +attributes #0 = { noinline ssp uwtable } +attributes #1 = { nounwind readnone speculatable willreturn } +attributes #2 = { argmemonly nounwind willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 12.0.0 (git@github.com:llvm/llvm-project 5110fd0343c2d06c8ae538741fbef13ece5e68de)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None, sysroot: "/") +!1 = !DIFile(filename: "/tmp/inlinesplit.cpp", directory: "/Volumes/Data/llvm-project") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"PIC Level", i32 2} +!8 = distinct !DISubprogram(name: "g", linkageName: "_Z1g4pair", scope: !9, file: !9, line: 9, type: !10, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!9 = !DIFile(filename: "/tmp/inlinesplit.cpp", directory: "") +!10 = !DISubroutineType(types: !11) +!11 = !{!12, !13} +!12 = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned) +!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "pair", file: !9, line: 1, size: 128, flags: DIFlagTypePassByValue, elements: !14, identifier: "_ZTS4pair") +!14 = !{!15, !16} +!15 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !13, file: !9, line: 1, baseType: !12, size: 64) +!16 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !13, file: !9, line: 1, baseType: !12, size: 64, offset: 64) +!17 = !DILocalVariable(name: "p", arg: 1, scope: !8, file: !9, line: 9, type: !13) +; CHECK: ![[LOC]] = !DILocation +; CHECK-NOT: inlinedAt +; CHECK: = +!18 = !DILocation(line: 9, column: 27, scope: !8) +!19 = !DILocation(line: 10, column: 12, scope: !8) +!20 = !DILocation(line: 10, column: 10, scope: !8) +!21 = !DILocalVariable(name: "p", arg: 1, scope: !22, file: !9, line: 5, type: !13) +!22 = distinct !DISubprogram(name: "f", linkageName: "_ZL1f4pair", scope: !9, file: !9, line: 5, type: !10, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !2) +; CHECK: ![[INLINED_LOC]] = !DILocation({{.*}}inlinedAt +!23 = !DILocation(line: 5, column: 27, scope: !22, inlinedAt: !24) +!24 = distinct !DILocation(line: 10, column: 10, scope: !8) +!25 = !DILocation(line: 6, column: 12, scope: !22, inlinedAt: !24) +!26 = !DILocation(line: 10, column: 3, scope: !8) From 47fe1b63f449827e1171f944b07d0cbebad1de63 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 4 Aug 2020 18:03:18 -0500 Subject: [PATCH 382/600] [RDF] Lower the sorting complexity in RDFLiveness::getAllReachingDefs The sorting is needed, because reaching defs are (logically) ordered, but are not collected in that order. This change will break up the single call to std::sort into a series of smaller sorts, each of which should use a cheaper comparison function than the original. --- llvm/lib/CodeGen/RDFLiveness.cpp | 116 +++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 38 deletions(-) diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index 0bcd27f8ea452..67ebc2303f99d 100644 --- a/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -23,8 +23,10 @@ // <10.1145/2086696.2086706>. // #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" @@ -108,7 +110,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, const RegisterAggr &DefRRs) { NodeList RDefs; // Return value. SetVector DefQ; - SetVector Owners; + DenseMap OrdMap; // Dead defs will be treated as if they were live, since they are actually // on the data-flow path. They cannot be ignored because even though they @@ -151,18 +153,9 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA)) if (NodeId RD = NodeAddr(S).Addr->getReachingDef()) DefQ.insert(RD); - } - - // Remove all non-phi defs that are not aliased to RefRR, and collect - // the owners of the remaining defs. - SetVector Defs; - for (NodeId N : DefQ) { - auto TA = DFG.addr(N); - bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef; - if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG))) - continue; - Defs.insert(TA.Id); - Owners.insert(TA.Addr->getOwner(DFG).Id); + // Don't visit sibling defs. They share the same reaching def (which + // will be visited anyway), but they define something not aliased to + // this ref. } // Return the MachineBasicBlock containing a given instruction. @@ -174,38 +167,81 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, NodeAddr BA = PA.Addr->getOwner(DFG); return BA.Addr->getCode(); }; - // Less(A,B) iff instruction A is further down in the dominator tree than B. - auto Less = [&Block,this] (NodeId A, NodeId B) -> bool { + + SmallSet Defs; + + // Remove all non-phi defs that are not aliased to RefRR, and segregate + // the the remaining defs into buckets for containing blocks. + std::map> Owners; + std::map> Blocks; + for (NodeId N : DefQ) { + auto TA = DFG.addr(N); + bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef; + if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG))) + continue; + Defs.insert(TA.Id); + NodeAddr IA = TA.Addr->getOwner(DFG); + Owners[TA.Id] = IA; + Blocks[Block(IA)].push_back(IA.Id); + } + + auto Precedes = [this,&OrdMap] (NodeId A, NodeId B) { if (A == B) return false; - auto OA = DFG.addr(A), OB = DFG.addr(B); - MachineBasicBlock *BA = Block(OA), *BB = Block(OB); - if (BA != BB) - return MDT.dominates(BB, BA); - // They are in the same block. + NodeAddr OA = DFG.addr(A); + NodeAddr OB = DFG.addr(B); bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt; bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt; - if (StmtA) { - if (!StmtB) // OB is a phi and phis dominate statements. - return true; - MachineInstr *CA = NodeAddr(OA).Addr->getCode(); - MachineInstr *CB = NodeAddr(OB).Addr->getCode(); - // The order must be linear, so tie-break such equalities. - if (CA == CB) - return A < B; - return MDT.dominates(CB, CA); - } else { - // OA is a phi. - if (StmtB) - return false; - // Both are phis. There is no ordering between phis (in terms of - // the data-flow), so tie-break this via node id comparison. + if (StmtA && StmtB) { + const MachineInstr *InA = NodeAddr(OA).Addr->getCode(); + const MachineInstr *InB = NodeAddr(OB).Addr->getCode(); + assert(InA->getParent() == InB->getParent()); + auto FA = OrdMap.find(InA); + if (FA != OrdMap.end()) + return FA->second < OrdMap.find(InB)->second; + const MachineBasicBlock *BB = InA->getParent(); + for (auto It = BB->begin(), E = BB->end(); It != E; ++It) { + if (It == InA->getIterator()) + return true; + if (It == InB->getIterator()) + return false; + } + llvm_unreachable("InA and InB should be in the same block"); + } + // One of them is a phi node. + if (!StmtA && !StmtB) { + // Both are phis, which are unordered. Break the tie by id numbers. return A < B; } + // Only one of them is a phi. Phis always precede statements. + return !StmtA; }; - std::vector Tmp(Owners.begin(), Owners.end()); - llvm::sort(Tmp, Less); + auto GetOrder = [&OrdMap] (MachineBasicBlock &B) { + uint32_t Pos = 0; + for (MachineInstr &In : B) + OrdMap.insert({&In, ++Pos}); + }; + + // For each block, sort the nodes in it. + std::vector TmpBB; + for (auto &Bucket : Blocks) { + TmpBB.push_back(Bucket.first); + if (Bucket.second.size() > 2) + GetOrder(*Bucket.first); + std::sort(Bucket.second.begin(), Bucket.second.end(), Precedes); + } + + // Sort the blocks with respect to dominance. + std::sort(TmpBB.begin(), TmpBB.end(), [this](auto A, auto B) { + return MDT.dominates(A, B); + }); + + std::vector TmpInst; + for (auto I = TmpBB.rbegin(), E = TmpBB.rend(); I != E; ++I) { + auto &Bucket = Blocks[*I]; + TmpInst.insert(TmpInst.end(), Bucket.rbegin(), Bucket.rend()); + } // The vector is a list of instructions, so that defs coming from // the same instruction don't need to be artificially ordered. @@ -220,6 +256,9 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, // *d3 If A \incl BuC, and B \incl AuC, then *d2 would be // covered if we added A first, and A would be covered // if we added B first. + // In this example we want both A and B, because we don't want to give + // either one priority over the other, since they belong to the same + // statement. RegisterAggr RRs(DefRRs); @@ -227,7 +266,8 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, return TA.Addr->getKind() == NodeAttrs::Def && Defs.count(TA.Id); }; - for (NodeId T : Tmp) { + + for (NodeId T : TmpInst) { if (!FullChain && RRs.hasCoverOf(RefRR)) break; auto TA = DFG.addr(T); From f0f467aeecfc615a5055d8f2edd903996c11727e Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 4 Aug 2020 17:46:38 -0500 Subject: [PATCH 383/600] [RDF] Cache register aliases in PhysicalRegisterInfo This improves performance of PhysicalRegisterInfo::makeRegRef. --- llvm/include/llvm/CodeGen/RDFRegisters.h | 8 ++++++++ llvm/lib/CodeGen/RDFRegisters.cpp | 22 +++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h index e8ba0103777ab..82388dc1e61fe 100644 --- a/llvm/include/llvm/CodeGen/RDFRegisters.h +++ b/llvm/include/llvm/CodeGen/RDFRegisters.h @@ -132,6 +132,10 @@ namespace rdf { return MaskInfos[Register::stackSlot2Index(MaskId)].Units; } + const BitVector &getUnitAliases(uint32_t U) const { + return AliasInfos[U].Regs; + } + RegisterRef mapTo(RegisterRef RR, unsigned R) const; const TargetRegisterInfo &getTRI() const { return TRI; } @@ -146,12 +150,16 @@ namespace rdf { struct MaskInfo { BitVector Units; }; + struct AliasInfo { + BitVector Regs; + }; const TargetRegisterInfo &TRI; IndexedSet RegMasks; std::vector RegInfos; std::vector UnitInfos; std::vector MaskInfos; + std::vector AliasInfos; bool aliasRR(RegisterRef RA, RegisterRef RB) const; bool aliasRM(RegisterRef RR, RegisterRef RM) const; diff --git a/llvm/lib/CodeGen/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp index 9f8d6b9f61ce2..c76447d95444a 100644 --- a/llvm/lib/CodeGen/RDFRegisters.cpp +++ b/llvm/lib/CodeGen/RDFRegisters.cpp @@ -92,6 +92,15 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri, } MaskInfos[M].Units = PU.flip(); } + + AliasInfos.resize(TRI.getNumRegUnits()); + for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) { + BitVector AS(TRI.getNumRegs()); + for (MCRegUnitRootIterator R(U, &TRI); R.isValid(); ++R) + for (MCSuperRegIterator S(*R, &TRI, true); S.isValid(); ++S) + AS.set(*S); + AliasInfos[U].Regs = AS; + } } std::set PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { @@ -317,26 +326,17 @@ RegisterRef RegisterAggr::makeRegRef() const { if (U < 0) return RegisterRef(); - auto AliasedRegs = [this] (uint32_t Unit, BitVector &Regs) { - for (MCRegUnitRootIterator R(Unit, &PRI.getTRI()); R.isValid(); ++R) - for (MCSuperRegIterator S(*R, &PRI.getTRI(), true); S.isValid(); ++S) - Regs.set(*S); - }; - // Find the set of all registers that are aliased to all the units // in this aggregate. // Get all the registers aliased to the first unit in the bit vector. - BitVector Regs(PRI.getTRI().getNumRegs()); - AliasedRegs(U, Regs); + BitVector Regs = PRI.getUnitAliases(U); U = Units.find_next(U); // For each other unit, intersect it with the set of all registers // aliased that unit. while (U >= 0) { - BitVector AR(PRI.getTRI().getNumRegs()); - AliasedRegs(U, AR); - Regs &= AR; + Regs &= PRI.getUnitAliases(U); U = Units.find_next(U); } From 4b25f672998fde5cc5bb02411e9268b2bb35655f Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 4 Aug 2020 18:20:30 -0500 Subject: [PATCH 384/600] [RDF] Really remove remaining uses of PhysicalRegisterInfo::normalize --- llvm/lib/CodeGen/RDFGraph.cpp | 5 ----- llvm/lib/CodeGen/RDFLiveness.cpp | 6 +++--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index 437a6b0300967..cebb902f0a4a0 100644 --- a/llvm/lib/CodeGen/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -984,11 +984,6 @@ RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const { LaneBitmask M = AR.Mask & BR.Mask; return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef(); } -#ifndef NDEBUG -// RegisterRef NAR = PRI.normalize(AR); -// RegisterRef NBR = PRI.normalize(BR); -// assert(NAR.Reg != NBR.Reg); -#endif // This isn't strictly correct, because the overlap may happen in the // part masked out. if (PRI.alias(AR, BR)) diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index 67ebc2303f99d..95d647fbf9ac3 100644 --- a/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -514,7 +514,7 @@ void Liveness::computePhiInfo() { NodeAddr A = DFG.addr(UN); uint16_t F = A.Addr->getFlags(); if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) { - RegisterRef R = PRI.normalize(A.Addr->getRegRef(DFG)); + RegisterRef R = A.Addr->getRegRef(DFG); RealUses[R.Reg].insert({A.Id,R.Mask}); } UN = A.Addr->getSibling(); @@ -659,7 +659,7 @@ void Liveness::computePhiInfo() { for (NodeAddr UA : PUs) { std::map &PUM = PhiUp[UA.Id]; - RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG)); + RegisterRef UR = UA.Addr->getRegRef(DFG); for (const std::pair &P : PUM) { bool Changed = false; const RegisterAggr &MidDefs = P.second; @@ -1113,7 +1113,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { for (NodeAddr UA : IA.Addr->members_if(DFG.IsUse, DFG)) { if (UA.Addr->getFlags() & NodeAttrs::Undef) continue; - RegisterRef RR = PRI.normalize(UA.Addr->getRegRef(DFG)); + RegisterRef RR = UA.Addr->getRegRef(DFG); for (NodeAddr D : getAllReachingDefs(UA)) if (getBlockWithRef(D.Id) != B) LiveIn[RR.Reg].insert({D.Id,RR.Mask}); From 00602ee7ef0bf6c68d690a2bd729c12b95c95c99 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 3 Aug 2020 16:12:19 -0700 Subject: [PATCH 385/600] BPF: simplify IR generation for __builtin_btf_type_id() This patch simplified IR generation for __builtin_btf_type_id(). For __builtin_btf_type_id(obj, flag), previously IR builtin looks like if (obj is a lvalue) llvm.bpf.btf.type.id(obj.ptr, 1, flag) !type else llvm.bpf.btf.type.id(obj, 0, flag) !type The purpose of the 2nd argument is to differentiate __builtin_btf_type_id(obj, flag) where obj is a lvalue vs. __builtin_btf_type_id(obj.ptr, flag) Note that obj or obj.ptr is never used by the backend and the `obj` argument is only used to derive the type. This code sequence is subject to potential llvm CSE when - obj is the same .e.g., nullptr - flag is the same - metadata type is different, e.g., typedef of struct "s" and strust "s". In the above, we don't want CSE since their metadata is different. This patch change IR builtin to llvm.bpf.btf.type.id(seq_num, flag) !type and seq_num is always increasing. This will prevent potential llvm CSE. Also report an error if the type name is empty for remote relocation since remote relocation needs non-empty type name to do relocation against vmlinux. Differential Revision: https://reviews.llvm.org/D85174 --- clang/lib/CodeGen/CGBuiltin.cpp | 75 ++------------ clang/test/CodeGen/builtin-bpf-btf-type-id.c | 16 ++- llvm/include/llvm/IR/IntrinsicsBPF.td | 2 +- llvm/lib/Target/BPF/BPFPreserveDIType.cpp | 13 ++- .../CodeGen/BPF/BTF/builtin-btf-type-id.ll | 97 +++++++++---------- 5 files changed, 80 insertions(+), 123 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 18911184aa414..1797dfa052346 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10961,68 +10961,7 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, {FieldAddr->getType()}); return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); } - case BPF::BI__builtin_btf_type_id: { - Value *FieldVal = nullptr; - - // The LValue cannot be converted Value in order to be used as the function - // parameter. If it is a structure, it is the "alloca" result of the LValue - // (a pointer) is used in the parameter. If it is a simple type, - // the value will be loaded from its corresponding "alloca" and used as - // the parameter. In our case, let us just get a pointer of the LValue - // since we do not really use the parameter. The purpose of parameter - // is to prevent the generated IR llvm.bpf.btf.type.id intrinsic call, - // which carries metadata, from being changed. - bool IsLValue = E->getArg(0)->isLValue(); - if (IsLValue) - FieldVal = EmitLValue(E->getArg(0)).getPointer(*this); - else - FieldVal = EmitScalarExpr(E->getArg(0)); - - if (!getDebugInfo()) { - CGM.Error(E->getExprLoc(), "using __builtin_btf_type_id() without -g"); - return nullptr; - } - - // Generate debuginfo type for the first argument. - llvm::DIType *DbgInfo = - getDebugInfo()->getOrCreateStandaloneType(E->getArg(0)->getType(), - E->getArg(0)->getExprLoc()); - - ConstantInt *Flag = cast(EmitScalarExpr(E->getArg(1))); - Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); - - // Built the IR for the btf_type_id intrinsic. - // - // In the above, we converted LValue argument to a pointer to LValue. - // For example, the following - // int v; - // C1: __builtin_btf_type_id(v, flag); - // will be converted to - // L1: llvm.bpf.btf.type.id(&v, flag) - // This makes it hard to differentiate from - // C2: __builtin_btf_type_id(&v, flag); - // to - // L2: llvm.bpf.btf.type.id(&v, flag) - // - // If both C1 and C2 are present in the code, the llvm may later - // on do CSE on L1 and L2, which will result in incorrect tagged types. - // - // The C1->L1 transformation only happens if the argument of - // __builtin_btf_type_id() is a LValue. So Let us put whether - // the argument is an LValue or not into generated IR. This should - // prevent potential CSE from causing debuginfo type loss. - // - // The generated IR intrinsics will hence look like - // L1: llvm.bpf.btf.type.id(&v, 1, flag) !di_type_for_{v}; - // L2: llvm.bpf.btf.type.id(&v, 0, flag) !di_type_for_{&v}; - Constant *CV = ConstantInt::get(IntTy, IsLValue); - llvm::Function *FnBtfTypeId = llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, - {FieldVal->getType(), CV->getType()}); - CallInst *Fn = Builder.CreateCall(FnBtfTypeId, {FieldVal, CV, FlagValue}); - Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); - return Fn; - } + case BPF::BI__builtin_btf_type_id: case BPF::BI__builtin_preserve_type_info: { if (!getDebugInfo()) { CGM.Error(E->getExprLoc(), "using builtin function without -g"); @@ -11037,10 +10976,14 @@ Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); - llvm::Function *FnPreserveTypeInfo = llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {}); - CallInst *Fn = - Builder.CreateCall(FnPreserveTypeInfo, {SeqNumVal, FlagValue}); + llvm::Function *FnDecl; + if (BuiltinID == BPF::BI__builtin_btf_type_id) + FnDecl = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {}); + else + FnDecl = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {}); + CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue}); Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } diff --git a/clang/test/CodeGen/builtin-bpf-btf-type-id.c b/clang/test/CodeGen/builtin-bpf-btf-type-id.c index f60e3ca68a43c..5143deeff3c62 100644 --- a/clang/test/CodeGen/builtin-bpf-btf-type-id.c +++ b/clang/test/CodeGen/builtin-bpf-btf-type-id.c @@ -4,10 +4,22 @@ unsigned test1(int a) { return __builtin_btf_type_id(a, 0); } unsigned test2(int a) { return __builtin_btf_type_id(&a, 0); } +struct t1 { int a; }; +typedef struct t1 __t1; +unsigned test3() { + return __builtin_btf_type_id(*(struct t1 *)0, 1) + + __builtin_btf_type_id(*(__t1 *)0, 1); +} + // CHECK: define dso_local i32 @test1 -// CHECK: call i32 @llvm.bpf.btf.type.id.p0i32.i32(i32* %{{[0-9a-z.]+}}, i32 1, i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[INT:[0-9]+]] +// CHECK: call i32 @llvm.bpf.btf.type.id(i32 0, i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[INT:[0-9]+]] // CHECK: define dso_local i32 @test2 -// CHECK: call i32 @llvm.bpf.btf.type.id.p0i32.i32(i32* %{{[0-9a-z.]+}}, i32 0, i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[INT_POINTER:[0-9]+]] +// CHECK: call i32 @llvm.bpf.btf.type.id(i32 1, i64 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[INT_POINTER:[0-9]+]] +// CHECK: define dso_local i32 @test3 +// CHECK: call i32 @llvm.bpf.btf.type.id(i32 2, i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_T1:[0-9]+]] +// CHECK: call i32 @llvm.bpf.btf.type.id(i32 3, i64 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[TYPEDEF_T1:[0-9]+]] // // CHECK: ![[INT]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed // CHECK: ![[INT_POINTER]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[INT]], size: 64 +// CHECK: ![[TYPEDEF_T1]] = !DIDerivedType(tag: DW_TAG_typedef, name: "__t1" +// CHECK: ![[STRUCT_T1]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1" diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td index f25f631c9b147..0a0a7f3aca2f7 100644 --- a/llvm/include/llvm/IR/IntrinsicsBPF.td +++ b/llvm/include/llvm/IR/IntrinsicsBPF.td @@ -24,7 +24,7 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf." Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i64_ty], [IntrNoMem, ImmArg>]>; def int_bpf_btf_type_id : GCCBuiltin<"__builtin_bpf_btf_type_id">, - Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_any_ty, llvm_i64_ty], + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; def int_bpf_preserve_type_info : GCCBuiltin<"__builtin_bpf_preserve_type_info">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp index c3cb7647aa794..271ae5642ef96 100644 --- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp @@ -95,18 +95,24 @@ bool BPFPreserveDIType::doTransformation(Module &M) { std::string BaseName = "llvm.btf_type_id."; int Count = 0; for (auto Call : PreserveDITypeCalls) { - const ConstantInt *Flag = dyn_cast(Call->getArgOperand(2)); + const ConstantInt *Flag = dyn_cast(Call->getArgOperand(1)); assert(Flag); uint64_t FlagValue = Flag->getValue().getZExtValue(); if (FlagValue >= BPFCoreSharedInfo::MAX_BTF_TYPE_ID_FLAG) report_fatal_error("Incorrect flag for llvm.bpf.btf.type.id intrinsic"); + MDNode *MD = Call->getMetadata(LLVMContext::MD_preserve_access_index); + uint32_t Reloc; - if (FlagValue == BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL_RELOC) + if (FlagValue == BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL_RELOC) { Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_LOCAL; - else + } else { Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE; + DIType *Ty = cast(MD); + if (Ty->getName().empty()) + report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc"); + } BasicBlock *BB = Call->getParent(); IntegerType *VarType = Type::getInt32Ty(BB->getContext()); @@ -116,7 +122,6 @@ bool BPFPreserveDIType::doTransformation(Module &M) { new GlobalVariable(M, VarType, false, GlobalVariable::ExternalLinkage, NULL, GVName); GV->addAttribute(BPFCoreSharedInfo::TypeIdAttr); - MDNode *MD = Call->getMetadata(LLVMContext::MD_preserve_access_index); GV->setMetadata(LLVMContext::MD_preserve_access_index, MD); // Load the global variable which represents the type info. diff --git a/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id.ll b/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id.ll index 4ea3b272f1eb6..b5d5cec3fbdfd 100644 --- a/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id.ll +++ b/llvm/test/CodeGen/BPF/BTF/builtin-btf-type-id.ll @@ -13,7 +13,7 @@ ; bpf_log(__builtin_btf_type_id(tmp__abc, 0), &tmp__abc, sizeof(tmp__abc)); ; } ; void prog2() { -; bpf_log(__builtin_btf_type_id(&tmp__abc, 1), &tmp__abc, sizeof(tmp__abc)); +; bpf_log(__builtin_btf_type_id(&tmp__abc, 0), &tmp__abc, sizeof(tmp__abc)); ; } ; void prog3() { ; bpf_log(__builtin_btf_type_id(tmp__abc.f1[3], 1), &tmp__abc, sizeof(tmp__abc)); @@ -21,25 +21,23 @@ ; Compilation flag: ; clang -target bpf -O2 -g -S -emit-llvm test.c -%struct.anon = type { [100 x i8], i32 } - @tmp__abc = dso_local global { <{ i8, i8, [98 x i8] }>, i32 } { <{ i8, i8, [98 x i8] }> <{ i8 1, i8 3, [98 x i8] zeroinitializer }>, i32 0 }, align 4, !dbg !0 ; Function Attrs: nounwind define dso_local void @prog1() local_unnamed_addr #0 !dbg !28 { entry: - %0 = tail call i32 @llvm.bpf.btf.type.id.p0s_struct.anons.i32(%struct.anon* bitcast ({ <{ i8, i8, [98 x i8] }>, i32 }* @tmp__abc to %struct.anon*), i32 1, i64 0), !dbg !31, !llvm.preserve.access.index !7 + %0 = tail call i32 @llvm.bpf.btf.type.id(i32 0, i64 0), !dbg !31, !llvm.preserve.access.index !7 %call = tail call i32 inttoptr (i64 999 to i32 (i32, i8*, i32)*)(i32 %0, i8* getelementptr inbounds ({ <{ i8, i8, [98 x i8] }>, i32 }, { <{ i8, i8, [98 x i8] }>, i32 }* @tmp__abc, i64 0, i32 0, i32 0), i32 104) #2, !dbg !32 ret void, !dbg !33 } ; Function Attrs: nounwind readnone -declare i32 @llvm.bpf.btf.type.id.p0s_struct.anons.i32(%struct.anon*, i32, i64) #1 +declare i32 @llvm.bpf.btf.type.id(i32, i64) #1 ; Function Attrs: nounwind define dso_local void @prog2() local_unnamed_addr #0 !dbg !34 { entry: - %0 = tail call i32 @llvm.bpf.btf.type.id.p0s_struct.anons.i32(%struct.anon* bitcast ({ <{ i8, i8, [98 x i8] }>, i32 }* @tmp__abc to %struct.anon*), i32 0, i64 1), !dbg !35, !llvm.preserve.access.index !6 + %0 = tail call i32 @llvm.bpf.btf.type.id(i32 1, i64 0), !dbg !35, !llvm.preserve.access.index !6 %call = tail call i32 inttoptr (i64 999 to i32 (i32, i8*, i32)*)(i32 %0, i8* getelementptr inbounds ({ <{ i8, i8, [98 x i8] }>, i32 }, { <{ i8, i8, [98 x i8] }>, i32 }* @tmp__abc, i64 0, i32 0, i32 0), i32 104) #2, !dbg !36 ret void, !dbg !37 } @@ -47,56 +45,55 @@ entry: ; Function Attrs: nounwind define dso_local void @prog3() local_unnamed_addr #0 !dbg !38 { entry: - %0 = tail call i32 @llvm.bpf.btf.type.id.p0i8.i32(i8* getelementptr inbounds ({ <{ i8, i8, [98 x i8] }>, i32 }, { <{ i8, i8, [98 x i8] }>, i32 }* @tmp__abc, i64 0, i32 0, i32 2, i64 1), i32 1, i64 1), !dbg !39, !llvm.preserve.access.index !11 + %0 = tail call i32 @llvm.bpf.btf.type.id(i32 2, i64 1), !dbg !39, !llvm.preserve.access.index !11 %call = tail call i32 inttoptr (i64 999 to i32 (i32, i8*, i32)*)(i32 %0, i8* getelementptr inbounds ({ <{ i8, i8, [98 x i8] }>, i32 }, { <{ i8, i8, [98 x i8] }>, i32 }* @tmp__abc, i64 0, i32 0, i32 0), i32 104) #2, !dbg !40 ret void, !dbg !41 } -; CHECK-LABEL: prog1 -; CHECK: r1 = 3 -; CHECK-LABEL: prog2 -; CHECK: r1 = 10 -; CHECK-LABEL: prog3 -; CHECK: r1 = 4 -; -; CHECK: .long 0 # BTF_KIND_STRUCT(id = 3) -; CHECK-NEXT: .long 67108866 # 0x4000002 -; CHECK-NEXT: .long 104 -; CHECK-NEXT: .long 13 -; CHECK-NEXT: .long 5 -; CHECK-NEXT: .long 0 # 0x0 -; CHECK-NEXT: .long 16 -; CHECK-NEXT: .long 7 -; CHECK-NEXT: .long 800 # 0x320 -; CHECK-NEXT: .long 19 # BTF_KIND_INT(id = 4) -; CHECK-NEXT: .long 16777216 # 0x1000000 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 16777224 # 0x1000008 -; CHECK: .long 0 # BTF_KIND_PTR(id = 10) -; CHECK-NEXT: .long 33554432 # 0x2000000 -; CHECK-NEXT: .long 3 +; CHECK-LABEL: prog1 +; CHECK: r1 = 3 +; CHECK-LABEL: prog2 +; CHECK: r1 = 10 +; CHECK-LABEL: prog3 +; CHECK: r1 = 4 -; CHECK: .long 16 # FieldReloc -; CHECK-NEXT: .long {{[0-9]+}} # Field reloc section string offset={{[0-9]+}} -; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long {{[0-9]+}} -; CHECK-NEXT: .long 6 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 10 -; CHECK-NEXT: .long {{[0-9]+}} -; CHECK-NEXT: .long 7 -; CHECK-NEXT: .long .Ltmp{{[0-9]+}} -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long {{[0-9]+}} -; CHECK-NEXT: .long 7 +; CHECK: .long 0 # BTF_KIND_STRUCT(id = 3) +; CHECK-NEXT: .long 67108866 # 0x4000002 +; CHECK-NEXT: .long 104 +; CHECK-NEXT: .long 13 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 0 # 0x0 +; CHECK-NEXT: .long 16 +; CHECK-NEXT: .long 7 +; CHECK-NEXT: .long 800 # 0x320 +; CHECK: .long 19 # BTF_KIND_INT(id = 4) +; CHECK: .long 0 # BTF_KIND_PTR(id = 10) +; CHECK-NEXT: .long 33554432 # 0x2000000 +; CHECK-NEXT: .long 3 +; CHECK: .ascii ".text" # string offset=7 +; CHECK: .ascii "f1" # string offset=13 +; CHECK: .ascii "f2" # string offset=16 +; CHECK: .ascii "char" # string offset=19 +; CHECK: .byte 48 # string offset=48 -; Function Attrs: nounwind readnone -declare i32 @llvm.bpf.btf.type.id.p0i8.i32(i8*, i32, i64) #1 +; CHECK: .long 16 # FieldReloc +; CHECK-NEXT: .long 7 # Field reloc section string offset=7 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long 48 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 10 +; CHECK-NEXT: .long 48 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long .Ltmp{{[0-9]+}} +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 48 +; CHECK-NEXT: .long 7 -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } @@ -106,7 +103,7 @@ attributes #2 = { nounwind } !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "tmp__abc", scope: !2, file: !3, line: 5, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 95253d8f16b8085b4b85cb3a6106ccbfe8a6d9b2)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !16, splitDebugInlining: false, nameTableKind: None) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project.git f39aae11dca3f8f8c2c755a871726ed2fa82fd57)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !16, splitDebugInlining: false, nameTableKind: None) !3 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/core") !4 = !{} !5 = !{!6, !11} @@ -131,7 +128,7 @@ attributes #2 = { nounwind } !24 = !{i32 7, !"Dwarf Version", i32 4} !25 = !{i32 2, !"Debug Info Version", i32 3} !26 = !{i32 1, !"wchar_size", i32 4} -!27 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 95253d8f16b8085b4b85cb3a6106ccbfe8a6d9b2)"} +!27 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git f39aae11dca3f8f8c2c755a871726ed2fa82fd57)"} !28 = distinct !DISubprogram(name: "prog1", scope: !3, file: !3, line: 6, type: !29, scopeLine: 6, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4) !29 = !DISubroutineType(types: !30) !30 = !{null} From 9521704553e8a330cfdf5a0611885680073178b2 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 4 Aug 2020 17:36:17 -0500 Subject: [PATCH 386/600] [RDF] Use hash-based containers, cache extra information This improves performance. --- llvm/include/llvm/CodeGen/RDFLiveness.h | 38 +++++++++++++++++++----- llvm/include/llvm/CodeGen/RDFRegisters.h | 33 +++++++++++++++++++- llvm/lib/CodeGen/RDFLiveness.cpp | 24 +++++++++++++-- 3 files changed, 84 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/RDFLiveness.h b/llvm/include/llvm/CodeGen/RDFLiveness.h index ea48902717266..d39d3585e7bd5 100644 --- a/llvm/include/llvm/CodeGen/RDFLiveness.h +++ b/llvm/include/llvm/CodeGen/RDFLiveness.h @@ -18,6 +18,8 @@ #include "llvm/MC/LaneBitmask.h" #include #include +#include +#include #include namespace llvm { @@ -28,6 +30,30 @@ class MachineDominatorTree; class MachineRegisterInfo; class TargetRegisterInfo; +} // namespace llvm + +namespace llvm { +namespace rdf { +namespace detail { + +using NodeRef = std::pair; + +} // namespace detail +} // namespace rdf +} // namespace llvm + +namespace std { + +template <> struct hash { + std::size_t operator()(llvm::rdf::detail::NodeRef R) const { + return std::hash{}(R.first) ^ + std::hash{}(R.second.getAsInteger()); + } +}; + +} // namespace std + +namespace llvm { namespace rdf { struct Liveness { @@ -46,10 +72,9 @@ namespace rdf { std::map Map; }; - using NodeRef = std::pair; - using NodeRefSet = std::set; - // RegisterId in RefMap must be normalized. - using RefMap = std::map; + using NodeRef = detail::NodeRef; + using NodeRefSet = std::unordered_set; + using RefMap = std::unordered_map; Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g) : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()), @@ -110,15 +135,14 @@ namespace rdf { // Cache of mapping from node ids (for RefNodes) to the containing // basic blocks. Not computing it each time for each node reduces // the liveness calculation time by a large fraction. - using NodeBlockMap = DenseMap; - NodeBlockMap NBMap; + DenseMap NBMap; // Phi information: // // RealUseMap // map: NodeId -> (map: RegisterId -> NodeRefSet) // phi id -> (map: register -> set of reached non-phi uses) - std::map RealUseMap; + DenseMap RealUseMap; // Inverse iterated dominance frontier. std::map> IIDF; diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h index 82388dc1e61fe..abeab62af3fa6 100644 --- a/llvm/include/llvm/CodeGen/RDFRegisters.h +++ b/llvm/include/llvm/CodeGen/RDFRegisters.h @@ -91,6 +91,11 @@ namespace rdf { bool operator< (const RegisterRef &RR) const { return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask); } + + size_t hash() const { + return std::hash{}(Reg) ^ + std::hash{}(Mask.getAsInteger()); + } }; @@ -171,10 +176,15 @@ namespace rdf { : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {} RegisterAggr(const RegisterAggr &RG) = default; + unsigned count() const { return Units.count(); } bool empty() const { return Units.none(); } bool hasAliasOf(RegisterRef RR) const; bool hasCoverOf(RegisterRef RR) const; + bool operator==(const RegisterAggr &A) const { + return DenseMapInfo::isEqual(Units, A.Units); + } + static bool isCoverOf(RegisterRef RA, RegisterRef RB, const PhysicalRegisterInfo &PRI) { return RegisterAggr(PRI).insert(RA).hasCoverOf(RB); @@ -191,6 +201,10 @@ namespace rdf { RegisterRef clearIn(RegisterRef RR) const; RegisterRef makeRegRef() const; + size_t hash() const { + return DenseMapInfo::getHashValue(Units); + } + void print(raw_ostream &OS) const; struct rr_iterator { @@ -244,9 +258,26 @@ namespace rdf { LaneBitmask Mask; }; raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P); - } // end namespace rdf } // end namespace llvm +namespace std { + template <> struct hash { + size_t operator()(llvm::rdf::RegisterRef A) const { + return A.hash(); + } + }; + template <> struct hash { + size_t operator()(const llvm::rdf::RegisterAggr &A) const { + return A.hash(); + } + }; + template <> struct equal_to { + bool operator()(const llvm::rdf::RegisterAggr &A, + const llvm::rdf::RegisterAggr &B) const { + return A == B; + } + }; +} #endif // LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index 95d647fbf9ac3..b2a29bf451a2a 100644 --- a/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -476,7 +477,7 @@ void Liveness::computePhiInfo() { // phi use -> (map: reaching phi -> set of registers defined in between) std::map> PhiUp; std::vector PhiUQ; // Work list of phis for upward propagation. - std::map PhiDRs; // Phi -> registers defined by it. + std::unordered_map PhiDRs; // Phi -> registers defined by it. // Go over all phis. for (NodeAddr PhiA : Phis) { @@ -652,6 +653,23 @@ void Liveness::computePhiInfo() { // is covered, or until reaching the final phi. Only assume that the // reference reaches the phi in the latter case. + // The operation "clearIn" can be expensive. For a given set of intervening + // defs, cache the result of subtracting these defs from a given register + // ref. + using SubMap = std::unordered_map; + std::unordered_map Subs; + auto ClearIn = [] (RegisterRef RR, const RegisterAggr &Mid, SubMap &SM) { + if (Mid.empty()) + return RR; + auto F = SM.find(RR); + if (F != SM.end()) + return F->second; + RegisterRef S = Mid.clearIn(RR); + SM.insert({RR, S}); + return S; + }; + + // Go over all phis. for (unsigned i = 0; i < PhiUQ.size(); ++i) { auto PA = DFG.addr(PhiUQ[i]); NodeList PUs = PA.Addr->members_if(DFG.IsRef, DFG); @@ -663,13 +681,13 @@ void Liveness::computePhiInfo() { for (const std::pair &P : PUM) { bool Changed = false; const RegisterAggr &MidDefs = P.second; - // Collect the set PropUp of uses that are reached by the current // phi PA, and are not covered by any intervening def between the // currently visited use UA and the upward phi P. if (MidDefs.hasCoverOf(UR)) continue; + SubMap &SM = Subs[MidDefs]; // General algorithm: // for each (R,U) : U is use node of R, U is reached by PA @@ -689,7 +707,7 @@ void Liveness::computePhiInfo() { LaneBitmask M = R.Mask & V.second; if (M.none()) continue; - if (RegisterRef SS = MidDefs.clearIn(RegisterRef(R.Reg, M))) { + if (RegisterRef SS = ClearIn(RegisterRef(R.Reg, M), MidDefs, SM)) { NodeRefSet &RS = RealUseMap[P.first][SS.Reg]; Changed |= RS.insert({V.first,SS.Mask}).second; } From 06d425737bd2953cbea319b5f1d4419b54870e6c Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 4 Aug 2020 17:36:17 -0500 Subject: [PATCH 387/600] [RDF] Add operator<<(raw_ostream&, RegisterAggr), NFC --- llvm/include/llvm/CodeGen/RDFRegisters.h | 2 ++ llvm/lib/CodeGen/RDFRegisters.cpp | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h index abeab62af3fa6..98b92036e1ede 100644 --- a/llvm/include/llvm/CodeGen/RDFRegisters.h +++ b/llvm/include/llvm/CodeGen/RDFRegisters.h @@ -258,6 +258,8 @@ namespace rdf { LaneBitmask Mask; }; raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P); + + raw_ostream &operator<< (raw_ostream &OS, const RegisterAggr &A); } // end namespace rdf } // end namespace llvm diff --git a/llvm/lib/CodeGen/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp index c76447d95444a..6c2c889ba64ad 100644 --- a/llvm/lib/CodeGen/RDFRegisters.cpp +++ b/llvm/lib/CodeGen/RDFRegisters.cpp @@ -374,3 +374,8 @@ RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG, Pos = End ? Masks.end() : Masks.begin(); Index = End ? Masks.size() : 0; } + +raw_ostream &rdf::operator<<(raw_ostream &OS, const RegisterAggr &A) { + A.print(OS); + return OS; +} From 7e9bab6ad51af1cab0c7457e4323166af3ac797c Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Fri, 24 Jul 2020 18:36:18 -0700 Subject: [PATCH 388/600] Fix debugserver's qProcessInfo reporting of maccatalyst binaries This patch is similar in spirit to https://reviews.llvm.org/D84480, but does the maccatalyst/macosx disambiguation. I also took the opportunity to factor out the gdb-remote packet log scanning used by several testcases into lldbutil functions. rdar://problem/66059257 Differential Revision: https://reviews.llvm.org/D84576 --- .../Python/lldbsuite/test/lldbutil.py | 37 ++++++ lldb/test/API/macosx/macCatalyst/Makefile | 10 ++ .../API/macosx/macCatalyst/TestMacCatalyst.py | 43 ++++++ lldb/test/API/macosx/macCatalyst/main.c | 4 + .../Makefile | 5 +- .../TestMacCatalystAppWithMacOSFramework.py | 51 ++++++++ .../foo.c | 0 .../foo.h | 0 .../main.c | 2 +- .../macosx/macabi/TestMacABImacOSFramework.py | 28 ---- .../macosx/simulator/TestSimulatorPlatform.py | 28 +--- lldb/tools/debugserver/source/DNB.cpp | 14 +- lldb/tools/debugserver/source/DNB.h | 10 +- .../debugserver/source/MacOSX/MachProcess.h | 13 +- .../debugserver/source/MacOSX/MachProcess.mm | 122 ++++++++++-------- lldb/tools/debugserver/source/RNBRemote.cpp | 7 +- 16 files changed, 252 insertions(+), 122 deletions(-) create mode 100644 lldb/test/API/macosx/macCatalyst/Makefile create mode 100644 lldb/test/API/macosx/macCatalyst/TestMacCatalyst.py create mode 100644 lldb/test/API/macosx/macCatalyst/main.c rename lldb/test/API/macosx/{macabi => macCatalystAppMacOSFramework}/Makefile (70%) create mode 100644 lldb/test/API/macosx/macCatalystAppMacOSFramework/TestMacCatalystAppWithMacOSFramework.py rename lldb/test/API/macosx/{macabi => macCatalystAppMacOSFramework}/foo.c (100%) rename lldb/test/API/macosx/{macabi => macCatalystAppMacOSFramework}/foo.h (100%) rename lldb/test/API/macosx/{macabi => macCatalystAppMacOSFramework}/main.c (60%) delete mode 100644 lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py diff --git a/lldb/packages/Python/lldbsuite/test/lldbutil.py b/lldb/packages/Python/lldbsuite/test/lldbutil.py index 1ce6844d973c4..1b366f295540f 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbutil.py +++ b/lldb/packages/Python/lldbsuite/test/lldbutil.py @@ -1458,3 +1458,40 @@ def wait_for_file_on_target(testcase, file_path, max_attempts=6): (file_path, max_attempts)) return read_file_on_target(testcase, file_path) + +def packetlog_get_process_info(log): + """parse a gdb-remote packet log file and extract the response to qProcessInfo""" + process_info = dict() + with open(log, "r") as logfile: + process_info_ostype = None + expect_process_info_response = False + for line in logfile: + if expect_process_info_response: + for pair in line.split(';'): + keyval = pair.split(':') + if len(keyval) == 2: + process_info[keyval[0]] = keyval[1] + break + if 'send packet: $qProcessInfo#' in line: + expect_process_info_response = True + return process_info + +def packetlog_get_dylib_info(log): + """parse a gdb-remote packet log file and extract the *last* response to jGetLoadedDynamicLibrariesInfos""" + import json + dylib_info = None + with open(log, "r") as logfile: + dylib_info = None + expect_dylib_info_response = False + for line in logfile: + if expect_dylib_info_response: + while line[0] != '$': + line = line[1:] + line = line[1:] + # Unescape '}'. + dylib_info = json.loads(line.replace('}]','}')[:-4]) + expect_dylib_info_response = False + if 'send packet: $jGetLoadedDynamicLibrariesInfos:{' in line: + expect_dylib_info_response = True + + return dylib_info diff --git a/lldb/test/API/macosx/macCatalyst/Makefile b/lldb/test/API/macosx/macCatalyst/Makefile new file mode 100644 index 0000000000000..9b9224571fdd6 --- /dev/null +++ b/lldb/test/API/macosx/macCatalyst/Makefile @@ -0,0 +1,10 @@ +C_SOURCES := main.c + +TRIPLE := $(ARCH)-apple-ios13.0-macabi +CFLAGS_EXTRAS := -target $(TRIPLE) + +# FIXME: rdar://problem/54986190 +# There is a Clang driver change missing on llvm.org. +override CC=xcrun clang + +include Makefile.rules diff --git a/lldb/test/API/macosx/macCatalyst/TestMacCatalyst.py b/lldb/test/API/macosx/macCatalyst/TestMacCatalyst.py new file mode 100644 index 0000000000000..555d5a13b5559 --- /dev/null +++ b/lldb/test/API/macosx/macCatalyst/TestMacCatalyst.py @@ -0,0 +1,43 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil +import os +import unittest2 + + +class TestMacCatalyst(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipIf(macos_version=["<", "10.15"]) + @skipUnlessDarwin + @skipIfDarwinEmbedded + @skipIfReproducer # This is hitting https://bugs.python.org/issue22393 + def test_macabi(self): + """Test the x86_64-apple-ios-macabi target linked against a macos dylib""" + self.build() + log = self.getBuildArtifact('packets.log') + self.expect("log enable gdb-remote packets -f "+log) + lldbutil.run_to_source_breakpoint(self, "break here", + lldb.SBFileSpec('main.c')) + self.expect("image list -t -b", + patterns=[self.getArchitecture() + + r'.*-apple-ios.*-macabi a\.out']) + self.expect("fr v s", "Hello macCatalyst") + self.expect("p s", "Hello macCatalyst") + self.check_debugserver(log) + + def check_debugserver(self, log): + """scan the debugserver packet log""" + process_info = lldbutil.packetlog_get_process_info(log) + self.assertTrue('ostype' in process_info) + self.assertEquals(process_info['ostype'], 'maccatalyst') + + aout_info = None + dylib_info = lldbutil.packetlog_get_dylib_info(log) + for image in dylib_info['images']: + if image['pathname'].endswith('a.out'): + aout_info = image + self.assertTrue(aout_info) + self.assertEquals(aout_info['min_version_os_name'], 'maccatalyst') diff --git a/lldb/test/API/macosx/macCatalyst/main.c b/lldb/test/API/macosx/macCatalyst/main.c new file mode 100644 index 0000000000000..ef224fb0987e0 --- /dev/null +++ b/lldb/test/API/macosx/macCatalyst/main.c @@ -0,0 +1,4 @@ +int main() { + const char *s = "Hello macCatalyst!"; + return 0; // break here +} diff --git a/lldb/test/API/macosx/macabi/Makefile b/lldb/test/API/macosx/macCatalystAppMacOSFramework/Makefile similarity index 70% rename from lldb/test/API/macosx/macabi/Makefile rename to lldb/test/API/macosx/macCatalystAppMacOSFramework/Makefile index 2123af1dd7010..96fbf4a9cf172 100644 --- a/lldb/test/API/macosx/macabi/Makefile +++ b/lldb/test/API/macosx/macCatalystAppMacOSFramework/Makefile @@ -1,9 +1,12 @@ C_SOURCES := main.c LD_EXTRAS := -L. -lfoo -TRIPLE := x86_64-apple-ios13.0-macabi +TRIPLE := $(ARCH)-apple-ios13.0-macabi CFLAGS_EXTRAS := -target $(TRIPLE) +# FIXME: rdar://problem/54986190 +override CC=xcrun clang + all: libfoo.dylib a.out libfoo.dylib: foo.c diff --git a/lldb/test/API/macosx/macCatalystAppMacOSFramework/TestMacCatalystAppWithMacOSFramework.py b/lldb/test/API/macosx/macCatalystAppMacOSFramework/TestMacCatalystAppWithMacOSFramework.py new file mode 100644 index 0000000000000..46c0efd9d526c --- /dev/null +++ b/lldb/test/API/macosx/macCatalystAppMacOSFramework/TestMacCatalystAppWithMacOSFramework.py @@ -0,0 +1,51 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil +import os +import unittest2 + + +class TestMacCatalystAppWithMacOSFramework(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipIf(macos_version=["<", "10.15"]) + @skipUnlessDarwin + @skipIfDarwinEmbedded + # There is a Clang driver change missing on llvm.org. + @expectedFailureAll(bugnumber="rdar://problem/54986190>") + @skipIfReproducer # This is hitting https://bugs.python.org/issue22393 + def test(self): + """Test the x86_64-apple-ios-macabi target linked against a macos dylib""" + self.build() + log = self.getBuildArtifact('packets.log') + self.expect("log enable gdb-remote packets -f "+log) + lldbutil.run_to_source_breakpoint(self, "break here", + lldb.SBFileSpec('main.c')) + arch = self.getArchitecture() + self.expect("image list -t -b", + patterns=[arch + r'.*-apple-ios.*-macabi a\.out', + arch + r'.*-apple-macosx.* libfoo.dylib[^(]']) + self.expect("fr v s", "Hello macCatalyst") + self.expect("p s", "Hello macCatalyst") + self.check_debugserver(log) + + def check_debugserver(self, log): + """scan the debugserver packet log""" + process_info = lldbutil.packetlog_get_process_info(log) + self.assertTrue('ostype' in process_info) + self.assertEquals(process_info['ostype'], 'maccatalyst') + + aout_info = None + libfoo_info = None + dylib_info = lldbutil.packetlog_get_dylib_info(log) + for image in dylib_info['images']: + if image['pathname'].endswith('a.out'): + aout_info = image + if image['pathname'].endswith('libfoo.dylib'): + libfoo_info = image + self.assertTrue(aout_info) + self.assertTrue(libfoo_info) + self.assertEquals(aout_info['min_version_os_name'], 'maccatalyst') + self.assertEquals(libfoo_info['min_version_os_name'], 'macosx') diff --git a/lldb/test/API/macosx/macabi/foo.c b/lldb/test/API/macosx/macCatalystAppMacOSFramework/foo.c similarity index 100% rename from lldb/test/API/macosx/macabi/foo.c rename to lldb/test/API/macosx/macCatalystAppMacOSFramework/foo.c diff --git a/lldb/test/API/macosx/macabi/foo.h b/lldb/test/API/macosx/macCatalystAppMacOSFramework/foo.h similarity index 100% rename from lldb/test/API/macosx/macabi/foo.h rename to lldb/test/API/macosx/macCatalystAppMacOSFramework/foo.h diff --git a/lldb/test/API/macosx/macabi/main.c b/lldb/test/API/macosx/macCatalystAppMacOSFramework/main.c similarity index 60% rename from lldb/test/API/macosx/macabi/main.c rename to lldb/test/API/macosx/macCatalystAppMacOSFramework/main.c index 92069d902fd0b..164fe25f308a4 100644 --- a/lldb/test/API/macosx/macabi/main.c +++ b/lldb/test/API/macosx/macCatalystAppMacOSFramework/main.c @@ -1,5 +1,5 @@ #include "foo.h" int main() { - const char *s = "Hello MacABI!"; + const char *s = "Hello macCatalyst!"; return foo(); // break here } diff --git a/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py b/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py deleted file mode 100644 index aff99e3e2804b..0000000000000 --- a/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py +++ /dev/null @@ -1,28 +0,0 @@ -import lldb -from lldbsuite.test.lldbtest import * -from lldbsuite.test.decorators import * -import lldbsuite.test.lldbutil as lldbutil -import os -import unittest2 - - -class TestMacABImacOSFramework(TestBase): - - mydir = TestBase.compute_mydir(__file__) - - @skipIf(macos_version=["<", "10.15"]) - @skipUnlessDarwin - @skipIfDarwinEmbedded - # There is a Clang driver change missing on llvm.org. - @expectedFailureAll(bugnumber="rdar://problem/54986190>") - @skipIfReproducer # This is hitting https://bugs.python.org/issue22393 - def test_macabi(self): - """Test the x86_64-apple-ios-macabi target linked against a macos dylib""" - self.build() - lldbutil.run_to_source_breakpoint(self, "break here", - lldb.SBFileSpec('main.c')) - self.expect("image list -t -b", - patterns=["x86_64.*-apple-ios.*-macabi a\.out", - "x86_64.*-apple-macosx.* libfoo.dylib[^(]"]) - self.expect("fr v s", "Hello MacABI") - self.expect("p s", "Hello MacABI") diff --git a/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py b/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py index 824cb9eee295c..9b5aed1ed619c 100644 --- a/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py +++ b/lldb/test/API/macosx/simulator/TestSimulatorPlatform.py @@ -25,30 +25,10 @@ def check_load_commands(self, expected_load_command): def check_debugserver(self, log, expected_platform, expected_version): """scan the debugserver packet log""" - logfile = open(log, "r") - dylib_info = None - process_info_ostype = None - expect_dylib_info_response = False - expect_process_info_response = False - for line in logfile: - if expect_dylib_info_response: - while line[0] != '$': - line = line[1:] - line = line[1:] - # Unescape '}'. - dylib_info = json.loads(line.replace('}]','}')[:-4]) - expect_dylib_info_response = False - if 'send packet: $jGetLoadedDynamicLibrariesInfos:{' in line: - expect_dylib_info_response = True - if expect_process_info_response: - for pair in line.split(';'): - keyval = pair.split(':') - if len(keyval) == 2 and keyval[0] == 'ostype': - process_info_ostype = keyval[1] - if 'send packet: $qProcessInfo#' in line: - expect_process_info_response = True - - self.assertEquals(process_info_ostype, expected_platform) + process_info = lldbutil.packetlog_get_process_info(log) + self.assertTrue('ostype' in process_info) + self.assertEquals(process_info['ostype'], expected_platform) + dylib_info = lldbutil.packetlog_get_dylib_info(log) self.assertTrue(dylib_info) aout_info = None for image in dylib_info['images']: diff --git a/lldb/tools/debugserver/source/DNB.cpp b/lldb/tools/debugserver/source/DNB.cpp index 3c1cd85dc310f..afafe0d0474a5 100644 --- a/lldb/tools/debugserver/source/DNB.cpp +++ b/lldb/tools/debugserver/source/DNB.cpp @@ -1418,19 +1418,20 @@ nub_bool_t DNBProcessSharedLibrariesUpdated(nub_process_t pid) { return false; } -const char *DNBGetDeploymentInfo(nub_process_t pid, - const struct load_command& lc, +const char *DNBGetDeploymentInfo(nub_process_t pid, bool is_executable, + const struct load_command &lc, uint64_t load_command_address, - uint32_t& major_version, - uint32_t& minor_version, - uint32_t& patch_version) { + uint32_t &major_version, + uint32_t &minor_version, + uint32_t &patch_version) { MachProcessSP procSP; if (GetProcessSP(pid, procSP)) { // FIXME: This doesn't return the correct result when xctest (a // macOS binary) is loaded with the macCatalyst dyld platform // override. The image info corrects for this, but qProcessInfo // will return what is in the binary. - auto info = procSP->GetDeploymentInfo(lc, load_command_address); + auto info = + procSP->GetDeploymentInfo(lc, load_command_address, is_executable); major_version = info.major_version; minor_version = info.minor_version; patch_version = info.patch_version; @@ -1439,7 +1440,6 @@ const char *DNBGetDeploymentInfo(nub_process_t pid, return nullptr; } - // Get the current shared library information for a process. Only return // the shared libraries that have changed since the last shared library // state changed event if only_changed is non-zero. diff --git a/lldb/tools/debugserver/source/DNB.h b/lldb/tools/debugserver/source/DNB.h index e0e1cdd6d8b64..8364ec0c11624 100644 --- a/lldb/tools/debugserver/source/DNB.h +++ b/lldb/tools/debugserver/source/DNB.h @@ -128,12 +128,12 @@ nub_bool_t DNBProcessSharedLibrariesUpdated(nub_process_t pid) DNB_EXPORT; nub_size_t DNBProcessGetSharedLibraryInfo(nub_process_t pid, nub_bool_t only_changed, DNBExecutableImageInfo **image_infos) DNB_EXPORT; -const char *DNBGetDeploymentInfo(nub_process_t pid, - const struct load_command& lc, +const char *DNBGetDeploymentInfo(nub_process_t pid, bool is_executable, + const struct load_command &lc, uint64_t load_command_address, - uint32_t& major_version, - uint32_t& minor_version, - uint32_t& patch_version); + uint32_t &major_version, + uint32_t &minor_version, + uint32_t &patch_version); nub_bool_t DNBProcessSetNameToAddressCallback(nub_process_t pid, DNBCallbackNameToAddress callback, void *baton) DNB_EXPORT; diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.h b/lldb/tools/debugserver/source/MacOSX/MachProcess.h index 9d712390ac2ac..7eb663cc2d511 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachProcess.h +++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.h @@ -241,7 +241,8 @@ class MachProcess { uint32_t patch_version = 0; }; DeploymentInfo GetDeploymentInfo(const struct load_command &, - uint64_t load_command_address); + uint64_t load_command_address, + bool is_executable); static const char *GetPlatformString(unsigned char platform); bool GetMachOInformationFromMemory(uint32_t platform, nub_addr_t mach_o_header_addr, @@ -249,7 +250,15 @@ class MachProcess { struct mach_o_information &inf); JSONGenerator::ObjectSP FormatDynamicLibrariesIntoJSON( const std::vector &image_infos); - uint32_t GetAllLoadedBinariesViaDYLDSPI( + /// Get the runtime platform from DYLD via SPI. + uint32_t GetProcessPlatformViaDYLDSPI(); + /// Use the dyld SPI present in macOS 10.12, iOS 10, tvOS 10, + /// watchOS 3 and newer to get the load address, uuid, and filenames + /// of all the libraries. This only fills in those three fields in + /// the 'struct binary_image_information' - call + /// GetMachOInformationFromMemory to fill in the mach-o header/load + /// command details. + void GetAllLoadedBinariesViaDYLDSPI( std::vector &image_infos); JSONGenerator::ObjectSP GetLoadedDynamicLibrariesInfos( nub_process_t pid, nub_addr_t image_list_address, nub_addr_t image_count); diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm index 10eaf38ea4357..d3c2f52a972a7 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm @@ -93,6 +93,7 @@ static CFStringRef CopyBundleIDForPath(const char *app_bundle_path, typedef bool (*CallOpenApplicationFunction)(NSString *bundleIDNSStr, NSDictionary *options, DNBError &error, pid_t *return_pid); + // This function runs the BKSSystemService (or FBSSystemService) method // openApplication:options:clientPort:withResult, // messaging the app passed in bundleIDNSStr. @@ -483,6 +484,7 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, #define _POSIX_SPAWN_DISABLE_ASLR 0x0100 #endif + MachProcess::MachProcess() : m_pid(0), m_cpu_type(0), m_child_stdin(-1), m_child_stdout(-1), m_child_stderr(-1), m_path(), m_args(), m_task(this), @@ -603,9 +605,11 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, MachProcess::DeploymentInfo MachProcess::GetDeploymentInfo(const struct load_command &lc, - uint64_t load_command_address) { + uint64_t load_command_address, + bool is_executable) { DeploymentInfo info; uint32_t cmd = lc.cmd & ~LC_REQ_DYLD; + // Handle the older LC_VERSION load commands, which don't // distinguish between simulator and real hardware. auto handle_version_min = [&](char platform) { @@ -640,6 +644,7 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, // unambiguous LC_BUILD_VERSION load commands. #endif }; + switch (cmd) { case LC_VERSION_MIN_IPHONEOS: handle_version_min(PLATFORM_IOS); @@ -667,6 +672,27 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, } #endif } + + // The xctest binary is a pure macOS binary but is launched with + // DYLD_FORCE_PLATFORM=6. In that case, force the platform to + // macCatalyst and use the macCatalyst version of the host OS + // instead of the macOS deployment target. + if (is_executable && GetProcessPlatformViaDYLDSPI() == PLATFORM_MACCATALYST) { + info.platform = PLATFORM_MACCATALYST; + std::string catalyst_version = GetMacCatalystVersionString(); + const char *major = catalyst_version.c_str(); + char *minor = nullptr; + char *patch = nullptr; + info.major_version = std::strtoul(major, &minor, 10); + info.minor_version = 0; + info.patch_version = 0; + if (minor && *minor == '.') { + info.minor_version = std::strtoul(++minor, &patch, 10); + if (patch && *patch == '.') + info.patch_version = std::strtoul(++patch, nullptr, 10); + } + } + return info; } @@ -798,37 +824,21 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, sizeof(struct uuid_command)) uuid_copy(inf.uuid, uuidcmd.uuid); } - if (DeploymentInfo deployment_info = GetDeploymentInfo(lc, load_cmds_p)) { + if (DeploymentInfo deployment_info = GetDeploymentInfo( + lc, load_cmds_p, inf.mach_header.filetype == MH_EXECUTE)) { const char *lc_platform = GetPlatformString(deployment_info.platform); - // macCatalyst support. - // - // This handles two special cases: - // - // 1. Frameworks that have both a PLATFORM_MACOS and a - // PLATFORM_MACCATALYST load command. Make sure to select - // the requested one. - // - // 2. The xctest binary is a pure macOS binary but is launched - // with DYLD_FORCE_PLATFORM=6. - if (dyld_platform == PLATFORM_MACCATALYST && - inf.mach_header.filetype == MH_EXECUTE && - inf.min_version_os_name.empty() && - (strcmp("macosx", lc_platform) == 0)) { - // DYLD says this *is* a macCatalyst process. If we haven't - // parsed any load commands, transform a macOS load command - // into a generic macCatalyst load command. It will be - // overwritten by a more specific one if there is one. This - // is only done for the main executable. It is perfectly fine - // for a macCatalyst binary to link against a macOS-only framework. - inf.min_version_os_name = "maccatalyst"; - inf.min_version_os_version = GetMacCatalystVersionString(); - } else if (dyld_platform != PLATFORM_MACCATALYST && - inf.min_version_os_name == "macosx") { - // This is a binary with both PLATFORM_MACOS and - // PLATFORM_MACCATALYST load commands and the process is not - // running as PLATFORM_MACCATALYST. Stick with the - // "macosx" load command that we've already processed, - // ignore this one, which is presumed to be a + if (dyld_platform != PLATFORM_MACCATALYST && + inf.min_version_os_name == "macosx") { + // macCatalyst support. + // + // This the special case of "zippered" frameworks that have both + // a PLATFORM_MACOS and a PLATFORM_MACCATALYST load command. + // + // When we are in this block, this is a binary with both + // PLATFORM_MACOS and PLATFORM_MACCATALYST load commands and + // the process is not running as PLATFORM_MACCATALYST. Stick + // with the "macosx" load command that we've already + // processed, ignore this one, which is presumed to be a // PLATFORM_MACCATALYST one. } else { inf.min_version_os_name = lc_platform; @@ -1056,25 +1066,36 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, return reply_sp; } -// From dyld SPI header dyld_process_info.h +/// From dyld SPI header dyld_process_info.h typedef void *dyld_process_info; struct dyld_process_cache_info { - uuid_t cacheUUID; // UUID of cache used by process - uint64_t cacheBaseAddress; // load address of dyld shared cache - bool noCache; // process is running without a dyld cache - bool privateCache; // process is using a private copy of its dyld cache + /// UUID of cache used by process. + uuid_t cacheUUID; + /// Load address of dyld shared cache. + uint64_t cacheBaseAddress; + /// Process is running without a dyld cache. + bool noCache; + /// Process is using a private copy of its dyld cache. + bool privateCache; }; -// Use the dyld SPI present in macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer -// to get -// the load address, uuid, and filenames of all the libraries. -// This only fills in those three fields in the 'struct -// binary_image_information' - call -// GetMachOInformationFromMemory to fill in the mach-o header/load command -// details. -uint32_t MachProcess::GetAllLoadedBinariesViaDYLDSPI( - std::vector &image_infos) { +uint32_t MachProcess::GetProcessPlatformViaDYLDSPI() { + kern_return_t kern_ret; uint32_t platform = 0; + if (m_dyld_process_info_create) { + dyld_process_info info = + m_dyld_process_info_create(m_task.TaskPort(), 0, &kern_ret); + if (info) { + if (m_dyld_process_info_get_platform) + platform = m_dyld_process_info_get_platform(info); + m_dyld_process_info_release(info); + } + } + return platform; +} + +void MachProcess::GetAllLoadedBinariesViaDYLDSPI( + std::vector &image_infos) { kern_return_t kern_ret; if (m_dyld_process_info_create) { dyld_process_info info = @@ -1089,12 +1110,9 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, image.load_address = mach_header_addr; image_infos.push_back(image); }); - if (m_dyld_process_info_get_platform) - platform = m_dyld_process_info_get_platform(info); m_dyld_process_info_release(info); } } - return platform; } // Fetch information about all shared libraries using the dyld SPIs that exist @@ -1115,7 +1133,8 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, pointer_size = 8; std::vector image_infos; - uint32_t platform = GetAllLoadedBinariesViaDYLDSPI(image_infos); + GetAllLoadedBinariesViaDYLDSPI(image_infos); + uint32_t platform = GetProcessPlatformViaDYLDSPI(); const size_t image_count = image_infos.size(); for (size_t i = 0; i < image_count; i++) { GetMachOInformationFromMemory(platform, @@ -1145,7 +1164,8 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, pointer_size = 8; std::vector all_image_infos; - uint32_t platform = GetAllLoadedBinariesViaDYLDSPI(all_image_infos); + GetAllLoadedBinariesViaDYLDSPI(all_image_infos); + uint32_t platform = GetProcessPlatformViaDYLDSPI(); std::vector image_infos; const size_t macho_addresses_count = macho_addresses.size(); @@ -1173,7 +1193,7 @@ static bool FBSAddEventDataToOptions(NSMutableDictionary *options, JSONGenerator::ObjectSP MachProcess::GetSharedCacheInfo(nub_process_t pid) { JSONGenerator::DictionarySP reply_sp(new JSONGenerator::Dictionary()); - ; + kern_return_t kern_ret; if (m_dyld_process_info_create && m_dyld_process_info_get_cache) { dyld_process_info info = diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp index 8a3045564aaf1..5e2512731f39c 100644 --- a/lldb/tools/debugserver/source/RNBRemote.cpp +++ b/lldb/tools/debugserver/source/RNBRemote.cpp @@ -6356,10 +6356,11 @@ rnb_err_t RNBRemote::HandlePacket_qProcessInfo(const char *p) { DNBProcessMemoryRead(pid, load_command_addr, sizeof(lc), &lc); (void)bytes_read; + bool is_executable = true; uint32_t major_version, minor_version, patch_version; - auto *platform = DNBGetDeploymentInfo(pid, lc, load_command_addr, - major_version, minor_version, - patch_version); + auto *platform = + DNBGetDeploymentInfo(pid, is_executable, lc, load_command_addr, + major_version, minor_version, patch_version); if (platform) { os_handled = true; rep << "ostype:" << platform << ";"; From 3f3303324eb2dc8a7e6d0ff867159c08479609de Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 5 Aug 2020 01:52:57 +0300 Subject: [PATCH 389/600] [NFC][InstCombine] Negator: add tests for negation of left-shift by constant --- .../InstCombine/sub-of-negatible.ll | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index f913de3c653ff..caa6e25ccf69c 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -1040,3 +1040,65 @@ define <2 x i4> @negate_insertelement_nonnegatible_insert(<2 x i4> %src, i4 %a, %t3 = sub <2 x i4> %b, %t2 ret <2 x i4> %t3 } + +; left-shift by constant can always be negated +define i8 @negate_left_shift_by_constant_prefer_keeping_shl(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @negate_left_shift_by_constant_prefer_keeping_shl( +; CHECK-NEXT: [[T0:%.*]] = sub i8 0, [[Z:%.*]] +; CHECK-NEXT: call void @use8(i8 [[T0]]) +; CHECK-NEXT: [[T1_NEG:%.*]] = shl i8 [[Z]], 4 +; CHECK-NEXT: [[T2:%.*]] = add i8 [[T1_NEG]], [[X:%.*]] +; CHECK-NEXT: ret i8 [[T2]] +; + %t0 = sub i8 0, %z + call void @use8(i8 %t0) + %t1 = shl i8 %t0, 4 + %t2 = sub i8 %x, %t1 + ret i8 %t2 +} +define i8 @negate_left_shift_by_constant_prefer_keeping_shl_extrause(i8 %x, i8 %y, i8 %z) { +; CHECK-LABEL: @negate_left_shift_by_constant_prefer_keeping_shl_extrause( +; CHECK-NEXT: [[T0:%.*]] = sub i8 0, [[Z:%.*]] +; CHECK-NEXT: call void @use8(i8 [[T0]]) +; CHECK-NEXT: [[T1:%.*]] = shl i8 [[T0]], 4 +; CHECK-NEXT: call void @use8(i8 [[T1]]) +; CHECK-NEXT: [[T2:%.*]] = sub i8 [[X:%.*]], [[T1]] +; CHECK-NEXT: ret i8 [[T2]] +; + %t0 = sub i8 0, %z + call void @use8(i8 %t0) + %t1 = shl i8 %t0, 4 + call void @use8(i8 %t1) + %t2 = sub i8 %x, %t1 + ret i8 %t2 +} +define i8 @negate_left_shift_by_constant(i8 %x, i8 %y, i8 %z, i8 %k) { +; CHECK-LABEL: @negate_left_shift_by_constant( +; CHECK-NEXT: [[T0:%.*]] = sub i8 [[K:%.*]], [[Z:%.*]] +; CHECK-NEXT: call void @use8(i8 [[T0]]) +; CHECK-NEXT: [[T1:%.*]] = shl i8 [[T0]], 4 +; CHECK-NEXT: [[T2:%.*]] = sub i8 [[X:%.*]], [[T1]] +; CHECK-NEXT: ret i8 [[T2]] +; + %t0 = sub i8 %k, %z + call void @use8(i8 %t0) + %t1 = shl i8 %t0, 4 + %t2 = sub i8 %x, %t1 + ret i8 %t2 +} +define i8 @negate_left_shift_by_constant_extrause(i8 %x, i8 %y, i8 %z, i8 %k) { +; CHECK-LABEL: @negate_left_shift_by_constant_extrause( +; CHECK-NEXT: [[T0:%.*]] = sub i8 [[K:%.*]], [[Z:%.*]] +; CHECK-NEXT: call void @use8(i8 [[T0]]) +; CHECK-NEXT: [[T1:%.*]] = shl i8 [[T0]], 4 +; CHECK-NEXT: call void @use8(i8 [[T1]]) +; CHECK-NEXT: [[T2:%.*]] = sub i8 [[X:%.*]], [[T1]] +; CHECK-NEXT: ret i8 [[T2]] +; + %t0 = sub i8 %k, %z + call void @use8(i8 %t0) + %t1 = shl i8 %t0, 4 + call void @use8(i8 %t1) + %t2 = sub i8 %x, %t1 + ret i8 %t2 +} From 8fd57b06a4fd04ada4f1d1c4124adc9998be1e5f Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 5 Aug 2020 02:42:21 +0300 Subject: [PATCH 390/600] [NFC][InstCombine] Fix value names (s/%tmp/%i/) and autogenerate a few tests being affected by negator change --- llvm/test/Transforms/InstCombine/icmp.ll | 26 +++++++------- llvm/test/Transforms/InstCombine/sub-gep.ll | 6 ++-- llvm/test/Transforms/InstCombine/sub.ll | 40 ++++++++++----------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 1f848680a4eee..e3050aa1bac28 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -916,14 +916,14 @@ define i1 @PR32949(i32 %X, i32 %Y, i32 %Z) { } ; PR8469 -define <2 x i1> @test49(<2 x i32> %tmp3) { +define <2 x i1> @test49(<2 x i32> %i3) { ; CHECK-LABEL: @test49( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret <2 x i1> ; entry: - %tmp11 = and <2 x i32> %tmp3, - %cmp = icmp ult <2 x i32> %tmp11, + %i11 = and <2 x i32> %i3, + %cmp = icmp ult <2 x i32> %i11, ret <2 x i1> %cmp } @@ -959,9 +959,9 @@ define i1 @test52(i32 %x1) { ; %conv = and i32 %x1, 255 %cmp = icmp eq i32 %conv, 127 - %tmp2 = lshr i32 %x1, 16 - %tmp3 = trunc i32 %tmp2 to i8 - %cmp15 = icmp eq i8 %tmp3, 76 + %i2 = lshr i32 %x1, 16 + %i3 = trunc i32 %i2 to i8 + %cmp15 = icmp eq i8 %i3, 76 %A = and i1 %cmp, %cmp15 ret i1 %A @@ -975,9 +975,9 @@ define i1 @test52b(i128 %x1) { ; %conv = and i128 %x1, 255 %cmp = icmp eq i128 %conv, 127 - %tmp2 = lshr i128 %x1, 16 - %tmp3 = trunc i128 %tmp2 to i8 - %cmp15 = icmp eq i8 %tmp3, 76 + %i2 = lshr i128 %x1, 16 + %i3 = trunc i128 %i2 to i8 + %cmp15 = icmp eq i8 %i3, 76 %A = and i1 %cmp, %cmp15 ret i1 %A @@ -3006,10 +3006,10 @@ define i32 @f6(i32 %a, i32 %b) { define i32 @f7(i32 %a, i32 %b) { ; CHECK-LABEL: @f7( -; CHECK-NEXT: [[CMP_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[CMP_MASK:%.*]] = and i32 [[CMP_UNSHIFTED]], 511 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CMP_MASK]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i32 0, i32 10000 +; CHECK-NEXT: [[CMP_NOT_UNSHIFTED:%.*]] = xor i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[CMP_NOT_MASK:%.*]] = and i32 [[CMP_NOT_UNSHIFTED]], 511 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[CMP_NOT_MASK]], 0 +; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP_NOT]], i32 0, i32 10000 ; CHECK-NEXT: ret i32 [[S]] ; %sext = shl i32 %a, 23 diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index 1b9079738ba7f..51eb994bf3453 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -73,9 +73,9 @@ define i64 @test_inbounds_nuw_swapped([0 x i32]* %base, i64 %idx) { ; The sub and shl here could be nuw, but this is harder to handle. define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @test_inbounds_nuw_two_gep( -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[IDX2:%.*]], [[IDX:%.*]] -; CHECK-NEXT: [[DIFF_NEG:%.*]] = shl i64 [[TMP1]], 2 -; CHECK-NEXT: ret i64 [[DIFF_NEG]] +; CHECK-NEXT: [[P1_IDX1_NEG:%.*]] = sub i64 [[IDX2:%.*]], [[IDX:%.*]] +; CHECK-NEXT: [[DOTNEG:%.*]] = shl i64 [[P1_IDX1_NEG]], 2 +; CHECK-NEXT: ret i64 [[DOTNEG]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx2 diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 9463cea877b92..3bc8b67649fc0 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -376,42 +376,42 @@ define i64 @test18(i64 %Y) { ; CHECK-LABEL: @test18( ; CHECK-NEXT: ret i64 0 ; - %tmp.4 = shl i64 %Y, 2 - %tmp.12 = shl i64 %Y, 2 - %tmp.8 = sub i64 %tmp.4, %tmp.12 - ret i64 %tmp.8 + %i.4 = shl i64 %Y, 2 + %i.12 = shl i64 %Y, 2 + %i.8 = sub i64 %i.4, %i.12 + ret i64 %i.8 } define i1 @test20(i32 %g, i32 %h) { ; CHECK-LABEL: @test20( -; CHECK-NEXT: [[TMP_4:%.*]] = icmp ne i32 [[H:%.*]], 0 -; CHECK-NEXT: ret i1 [[TMP_4]] +; CHECK-NEXT: [[I_4:%.*]] = icmp ne i32 [[H:%.*]], 0 +; CHECK-NEXT: ret i1 [[I_4]] ; - %tmp.2 = sub i32 %g, %h - %tmp.4 = icmp ne i32 %tmp.2, %g - ret i1 %tmp.4 + %i.2 = sub i32 %g, %h + %i.4 = icmp ne i32 %i.2, %g + ret i1 %i.4 } define i1 @test21(i32 %g, i32 %h) { ; CHECK-LABEL: @test21( -; CHECK-NEXT: [[TMP_4:%.*]] = icmp ne i32 [[H:%.*]], 0 -; CHECK-NEXT: ret i1 [[TMP_4]] +; CHECK-NEXT: [[I_4:%.*]] = icmp ne i32 [[H:%.*]], 0 +; CHECK-NEXT: ret i1 [[I_4]] ; - %tmp.2 = sub i32 %g, %h - %tmp.4 = icmp ne i32 %tmp.2, %g - ret i1 %tmp.4 + %i.2 = sub i32 %g, %h + %i.4 = icmp ne i32 %i.2, %g + ret i1 %i.4 } ; PR2298 define zeroext i1 @test22(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: @test22( -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: ret i1 [[TMP5]] +; CHECK-NEXT: [[I5:%.*]] = icmp eq i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: ret i1 [[I5]] ; - %tmp2 = sub i32 0, %a - %tmp4 = sub i32 0, %b - %tmp5 = icmp eq i32 %tmp2, %tmp4 - ret i1 %tmp5 + %i2 = sub i32 0, %a + %i4 = sub i32 0, %b + %i5 = icmp eq i32 %i2, %i4 + ret i1 %i5 } ; rdar://7362831 From 8aeb2fe13a4100b4c2e78d6ef75119304100cb1f Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 5 Aug 2020 02:08:58 +0300 Subject: [PATCH 391/600] [InstCombine] Negator: -(X << C) --> X * (-1 << C) This shows some regressions in tests, but they are all around GEP's, so i'm not really sure how important those are. https://rise4fun.com/Alive/1Gn --- .../InstCombine/InstCombineNegator.cpp | 12 ++++-- llvm/test/Transforms/InstCombine/icmp.ll | 3 +- llvm/test/Transforms/InstCombine/sub-gep.ll | 10 ++--- .../InstCombine/sub-of-negatible.ll | 4 +- llvm/test/Transforms/InstCombine/sub.ll | 43 ++++++++++--------- 5 files changed, 40 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp index 1c7f00b0edee7..b684016b6a296 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -324,10 +324,16 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { } case Instruction::Shl: { // `shl` is negatible if the first operand is negatible. - Value *NegOp0 = negate(I->getOperand(0), Depth + 1); - if (!NegOp0) // Early return. + if (Value *NegOp0 = negate(I->getOperand(0), Depth + 1)) + return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg"); + // Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<(I->getOperand(1)); + if (!Op1C) // Early return. return nullptr; - return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg"); + return Builder.CreateMul( + I->getOperand(0), + ConstantExpr::getShl(Constant::getAllOnesValue(Op1C->getType()), Op1C), + I->getName() + ".neg"); } case Instruction::Or: if (!haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL, &AC, I, diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index e3050aa1bac28..7addc85c4c8bb 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -512,7 +512,8 @@ define i1 @test24(i64 %i) { ; unsigned overflow does not happen during offset computation define i1 @test24_neg_offs(i32* %p, i64 %offs) { ; CHECK-LABEL: @test24_neg_offs( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OFFS:%.*]], -2 +; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[OFFS:%.*]], -4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[P1_IDX_NEG]], 8 ; CHECK-NEXT: ret i1 [[CMP]] ; %p1 = getelementptr inbounds i32, i32* %p, i64 %offs diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index 51eb994bf3453..cf9604223f6c1 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -58,9 +58,8 @@ define i32 @test_inbounds_nuw_trunc([0 x i32]* %base, i64 %idx) { define i64 @test_inbounds_nuw_swapped([0 x i32]* %base, i64 %idx) { ; CHECK-LABEL: @test_inbounds_nuw_swapped( -; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX:%.*]], 2 -; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i64 0, [[P2_IDX]] -; CHECK-NEXT: ret i64 [[DIFF_NEG]] +; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx @@ -73,8 +72,9 @@ define i64 @test_inbounds_nuw_swapped([0 x i32]* %base, i64 %idx) { ; The sub and shl here could be nuw, but this is harder to handle. define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @test_inbounds_nuw_two_gep( -; CHECK-NEXT: [[P1_IDX1_NEG:%.*]] = sub i64 [[IDX2:%.*]], [[IDX:%.*]] -; CHECK-NEXT: [[DOTNEG:%.*]] = shl i64 [[P1_IDX1_NEG]], 2 +; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: [[P2_IDX_NEG_NEG:%.*]] = shl i64 [[IDX2:%.*]], 2 +; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] ; CHECK-NEXT: ret i64 [[DOTNEG]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index caa6e25ccf69c..4a3c56337c228 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -1076,8 +1076,8 @@ define i8 @negate_left_shift_by_constant(i8 %x, i8 %y, i8 %z, i8 %k) { ; CHECK-LABEL: @negate_left_shift_by_constant( ; CHECK-NEXT: [[T0:%.*]] = sub i8 [[K:%.*]], [[Z:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1:%.*]] = shl i8 [[T0]], 4 -; CHECK-NEXT: [[T2:%.*]] = sub i8 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T1_NEG:%.*]] = mul i8 [[T0]], -16 +; CHECK-NEXT: [[T2:%.*]] = add i8 [[T1_NEG]], [[X:%.*]] ; CHECK-NEXT: ret i8 [[T2]] ; %t0 = sub i8 %k, %z diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 3bc8b67649fc0..4116a79d66d9b 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -506,8 +506,8 @@ define i64 @test24b(i8* %P, i64 %A){ define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( -; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[B_IDX]], -84 +; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i64 [[A:%.*]], 1 +; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[B_IDX_NEG_NEG]], -84 ; CHECK-NEXT: ret i64 [[DOTNEG]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A @@ -521,8 +521,8 @@ define i64 @test25(i8* %P, i64 %A){ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 -; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i16 [[B_IDX]], -84 +; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i16 [[TMP1]], 1 +; CHECK-NEXT: [[DOTNEG:%.*]] = add i16 [[B_IDX_NEG_NEG]], -84 ; CHECK-NEXT: ret i16 [[DOTNEG]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A @@ -557,9 +557,8 @@ define i64 @test_neg_shl_sub_extra_use1(i64 %a, i64 %b, i64* %p) { ; CHECK-LABEL: @test_neg_shl_sub_extra_use1( ; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: store i64 [[SUB]], i64* [[P:%.*]], align 8 -; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[SUB]], 2 -; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]] -; CHECK-NEXT: ret i64 [[NEG]] +; CHECK-NEXT: [[MUL_NEG:%.*]] = mul i64 [[SUB]], -4 +; CHECK-NEXT: ret i64 [[MUL_NEG]] ; %sub = sub i64 %a, %b store i64 %sub, i64* %p @@ -840,9 +839,10 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEP1_IDX_NEG:%.*]] = mul i64 [[I:%.*]], -4 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX_NEG]], [[J:%.*]] +; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i64 0, [[TMP1]] +; CHECK-NEXT: ret i64 [[DIFF_NEG]] ; %bit = bitcast i8* %foo to i32* %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i @@ -855,9 +855,10 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( -; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] -; CHECK-NEXT: ret i16 [[DOTNEG]] +; CHECK-NEXT: [[GEP1_IDX_NEG:%.*]] = mul i16 [[I:%.*]], -4 +; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[GEP1_IDX_NEG]], [[J:%.*]] +; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i16 0, [[TMP1]] +; CHECK-NEXT: ret i16 [[DIFF_NEG]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i @@ -1310,8 +1311,8 @@ define i64 @test61([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { define i32 @test62(i32 %A) { ; CHECK-LABEL: @test62( -; CHECK-NEXT: [[B:%.*]] = shl i32 [[A:%.*]], 1 -; CHECK-NEXT: [[C:%.*]] = sub i32 2, [[B]] +; CHECK-NEXT: [[B_NEG:%.*]] = mul i32 [[A:%.*]], -2 +; CHECK-NEXT: [[C:%.*]] = add i32 [[B_NEG]], 2 ; CHECK-NEXT: ret i32 [[C]] ; %B = sub i32 1, %A @@ -1321,8 +1322,8 @@ define i32 @test62(i32 %A) { define <2 x i32> @test62vec(<2 x i32> %A) { ; CHECK-LABEL: @test62vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = sub <2 x i32> , [[B]] +; CHECK-NEXT: [[B_NEG:%.*]] = mul <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B_NEG]], ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = sub <2 x i32> , %A @@ -1332,8 +1333,8 @@ define <2 x i32> @test62vec(<2 x i32> %A) { define i32 @test63(i32 %A) { ; CHECK-LABEL: @test63( -; CHECK-NEXT: [[B:%.*]] = shl i32 [[A:%.*]], 1 -; CHECK-NEXT: ret i32 [[B]] +; CHECK-NEXT: [[B_NEG_NEG:%.*]] = shl i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i32 [[B_NEG_NEG]] ; %B = sub i32 1, %A %C = shl i32 %B, 1 @@ -1343,8 +1344,8 @@ define i32 @test63(i32 %A) { define <2 x i32> @test63vec(<2 x i32> %A) { ; CHECK-LABEL: @test63vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: ret <2 x i32> [[B]] +; CHECK-NEXT: [[B_NEG_NEG:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i32> [[B_NEG_NEG]] ; %B = sub <2 x i32> , %A %C = shl <2 x i32> %B, From ac70b37a00dc02bd8923e0a4602d26be4581c570 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 5 Aug 2020 03:19:38 +0300 Subject: [PATCH 392/600] Revert "[InstCombine] Negator: -(X << C) --> X * (-1 << C)" Breaks codegen tests, will recommit later. This reverts commit 8aeb2fe13a4100b4c2e78d6ef75119304100cb1f. --- .../InstCombine/InstCombineNegator.cpp | 12 ++---- llvm/test/Transforms/InstCombine/icmp.ll | 3 +- llvm/test/Transforms/InstCombine/sub-gep.ll | 10 ++--- .../InstCombine/sub-of-negatible.ll | 4 +- llvm/test/Transforms/InstCombine/sub.ll | 43 +++++++++---------- 5 files changed, 32 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp index b684016b6a296..1c7f00b0edee7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -324,16 +324,10 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) { } case Instruction::Shl: { // `shl` is negatible if the first operand is negatible. - if (Value *NegOp0 = negate(I->getOperand(0), Depth + 1)) - return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg"); - // Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<(I->getOperand(1)); - if (!Op1C) // Early return. + Value *NegOp0 = negate(I->getOperand(0), Depth + 1); + if (!NegOp0) // Early return. return nullptr; - return Builder.CreateMul( - I->getOperand(0), - ConstantExpr::getShl(Constant::getAllOnesValue(Op1C->getType()), Op1C), - I->getName() + ".neg"); + return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg"); } case Instruction::Or: if (!haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL, &AC, I, diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 7addc85c4c8bb..e3050aa1bac28 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -512,8 +512,7 @@ define i1 @test24(i64 %i) { ; unsigned overflow does not happen during offset computation define i1 @test24_neg_offs(i32* %p, i64 %offs) { ; CHECK-LABEL: @test24_neg_offs( -; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[OFFS:%.*]], -4 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[P1_IDX_NEG]], 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[OFFS:%.*]], -2 ; CHECK-NEXT: ret i1 [[CMP]] ; %p1 = getelementptr inbounds i32, i32* %p, i64 %offs diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index cf9604223f6c1..51eb994bf3453 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -58,8 +58,9 @@ define i32 @test_inbounds_nuw_trunc([0 x i32]* %base, i64 %idx) { define i64 @test_inbounds_nuw_swapped([0 x i32]* %base, i64 %idx) { ; CHECK-LABEL: @test_inbounds_nuw_swapped( -; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 -; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] +; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX:%.*]], 2 +; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i64 0, [[P2_IDX]] +; CHECK-NEXT: ret i64 [[DIFF_NEG]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx @@ -72,9 +73,8 @@ define i64 @test_inbounds_nuw_swapped([0 x i32]* %base, i64 %idx) { ; The sub and shl here could be nuw, but this is harder to handle. define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @test_inbounds_nuw_two_gep( -; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 -; CHECK-NEXT: [[P2_IDX_NEG_NEG:%.*]] = shl i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] +; CHECK-NEXT: [[P1_IDX1_NEG:%.*]] = sub i64 [[IDX2:%.*]], [[IDX:%.*]] +; CHECK-NEXT: [[DOTNEG:%.*]] = shl i64 [[P1_IDX1_NEG]], 2 ; CHECK-NEXT: ret i64 [[DOTNEG]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index 4a3c56337c228..caa6e25ccf69c 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -1076,8 +1076,8 @@ define i8 @negate_left_shift_by_constant(i8 %x, i8 %y, i8 %z, i8 %k) { ; CHECK-LABEL: @negate_left_shift_by_constant( ; CHECK-NEXT: [[T0:%.*]] = sub i8 [[K:%.*]], [[Z:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1_NEG:%.*]] = mul i8 [[T0]], -16 -; CHECK-NEXT: [[T2:%.*]] = add i8 [[T1_NEG]], [[X:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i8 [[T0]], 4 +; CHECK-NEXT: [[T2:%.*]] = sub i8 [[X:%.*]], [[T1]] ; CHECK-NEXT: ret i8 [[T2]] ; %t0 = sub i8 %k, %z diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 4116a79d66d9b..3bc8b67649fc0 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -506,8 +506,8 @@ define i64 @test24b(i8* %P, i64 %A){ define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i64 [[A:%.*]], 1 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[B_IDX_NEG_NEG]], -84 +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 +; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[B_IDX]], -84 ; CHECK-NEXT: ret i64 [[DOTNEG]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A @@ -521,8 +521,8 @@ define i64 @test25(i8* %P, i64 %A){ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i16 [[TMP1]], 1 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i16 [[B_IDX_NEG_NEG]], -84 +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 +; CHECK-NEXT: [[DOTNEG:%.*]] = add i16 [[B_IDX]], -84 ; CHECK-NEXT: ret i16 [[DOTNEG]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A @@ -557,8 +557,9 @@ define i64 @test_neg_shl_sub_extra_use1(i64 %a, i64 %b, i64* %p) { ; CHECK-LABEL: @test_neg_shl_sub_extra_use1( ; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: store i64 [[SUB]], i64* [[P:%.*]], align 8 -; CHECK-NEXT: [[MUL_NEG:%.*]] = mul i64 [[SUB]], -4 -; CHECK-NEXT: ret i64 [[MUL_NEG]] +; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[SUB]], 2 +; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[MUL]] +; CHECK-NEXT: ret i64 [[NEG]] ; %sub = sub i64 %a, %b store i64 %sub, i64* %p @@ -839,10 +840,9 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: [[GEP1_IDX_NEG:%.*]] = mul i64 [[I:%.*]], -4 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX_NEG]], [[J:%.*]] -; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i64 0, [[TMP1]] -; CHECK-NEXT: ret i64 [[DIFF_NEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[DOTNEG:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[DOTNEG]] ; %bit = bitcast i8* %foo to i32* %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i @@ -855,10 +855,9 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( -; CHECK-NEXT: [[GEP1_IDX_NEG:%.*]] = mul i16 [[I:%.*]], -4 -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[GEP1_IDX_NEG]], [[J:%.*]] -; CHECK-NEXT: [[DIFF_NEG:%.*]] = sub i16 0, [[TMP1]] -; CHECK-NEXT: ret i16 [[DIFF_NEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 +; CHECK-NEXT: [[DOTNEG:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i16 [[DOTNEG]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i @@ -1311,8 +1310,8 @@ define i64 @test61([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { define i32 @test62(i32 %A) { ; CHECK-LABEL: @test62( -; CHECK-NEXT: [[B_NEG:%.*]] = mul i32 [[A:%.*]], -2 -; CHECK-NEXT: [[C:%.*]] = add i32 [[B_NEG]], 2 +; CHECK-NEXT: [[B:%.*]] = shl i32 [[A:%.*]], 1 +; CHECK-NEXT: [[C:%.*]] = sub i32 2, [[B]] ; CHECK-NEXT: ret i32 [[C]] ; %B = sub i32 1, %A @@ -1322,8 +1321,8 @@ define i32 @test62(i32 %A) { define <2 x i32> @test62vec(<2 x i32> %A) { ; CHECK-LABEL: @test62vec( -; CHECK-NEXT: [[B_NEG:%.*]] = mul <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B_NEG]], +; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = sub <2 x i32> , [[B]] ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = sub <2 x i32> , %A @@ -1333,8 +1332,8 @@ define <2 x i32> @test62vec(<2 x i32> %A) { define i32 @test63(i32 %A) { ; CHECK-LABEL: @test63( -; CHECK-NEXT: [[B_NEG_NEG:%.*]] = shl i32 [[A:%.*]], 1 -; CHECK-NEXT: ret i32 [[B_NEG_NEG]] +; CHECK-NEXT: [[B:%.*]] = shl i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i32 [[B]] ; %B = sub i32 1, %A %C = shl i32 %B, 1 @@ -1344,8 +1343,8 @@ define i32 @test63(i32 %A) { define <2 x i32> @test63vec(<2 x i32> %A) { ; CHECK-LABEL: @test63vec( -; CHECK-NEXT: [[B_NEG_NEG:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: ret <2 x i32> [[B_NEG_NEG]] +; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i32> [[B]] ; %B = sub <2 x i32> , %A %C = shl <2 x i32> %B, From 90b9c49ca6477a85e69018967c0a4d4d38ee6e72 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 4 Aug 2020 15:00:35 -0700 Subject: [PATCH 393/600] [llvm] Expose type and element count-related APIs on TensorSpec Added a mechanism to check the element type, get the total element count, and the size of an element. Differential Revision: https://reviews.llvm.org/D85250 --- llvm/include/llvm/Analysis/Utils/TFUtils.h | 13 +++++++++++-- llvm/lib/Analysis/TFUtils.cpp | 11 +++++++++++ llvm/unittests/Analysis/TFUtilsTest.cpp | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h index d4450276a22ee..681560e453354 100644 --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -66,10 +66,18 @@ class TensorSpec final { bool operator!=(const TensorSpec &Other) const { return !(*this == Other); } + /// Get the number of elements in a tensor with this shape. + size_t getElementCount() const { return ElementCount; } + /// Get the size, in bytes, of one element. + size_t getElementByteSize() const; + + template bool isElementType() const { + return getDataType() == TypeIndex; + } + private: TensorSpec(const std::string &Name, int Port, int TypeIndex, - const std::vector &Shape) - : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape) {} + const std::vector &Shape); template static int getDataType() { llvm_unreachable("Undefined tensor type"); @@ -79,6 +87,7 @@ class TensorSpec final { int Port = 0; int TypeIndex = 0; std::vector Shape; + size_t ElementCount = 0; }; Optional getTensorSpecFromJSON(LLVMContext &Ctx, diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp index 8fd4011e6cd42..b1be027dc940a 100644 --- a/llvm/lib/Analysis/TFUtils.cpp +++ b/llvm/lib/Analysis/TFUtils.cpp @@ -24,6 +24,7 @@ #include "tensorflow/c/c_api_experimental.h" #include +#include using namespace llvm; @@ -84,6 +85,16 @@ class EvaluationResultImpl { std::vector Output; }; +size_t TensorSpec::getElementByteSize() const { + return TF_DataTypeSize(static_cast(TypeIndex)); +} + +TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex, + const std::vector &Shape) + : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape), + ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1, + std::multiplies())) {} + Optional getTensorSpecFromJSON(LLVMContext &Ctx, const json::Value &Value) { auto EmitError = [&](const llvm::Twine &Message) -> Optional { diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp index abdf2b2b97844..9e4f2c7faf716 100644 --- a/llvm/unittests/Analysis/TFUtilsTest.cpp +++ b/llvm/unittests/Analysis/TFUtilsTest.cpp @@ -123,3 +123,18 @@ TEST(TFUtilsTest, JSONParsingInvalidTensorType) { auto Spec = getTensorSpecFromJSON(Ctx, *Value); EXPECT_FALSE(Spec.hasValue()); } + +TEST(TFUtilsTest, TensorSpecSizesAndTypes) { + auto Spec1D = TensorSpec::createSpec("Hi1", {1}); + auto Spec2D = TensorSpec::createSpec("Hi2", {1, 1}); + auto Spec1DLarge = TensorSpec::createSpec("Hi3", {10}); + auto Spec3DLarge = TensorSpec::createSpec("Hi3", {2, 4, 10}); + EXPECT_TRUE(Spec1D.isElementType()); + EXPECT_FALSE(Spec3DLarge.isElementType()); + EXPECT_EQ(Spec1D.getElementCount(), 1); + EXPECT_EQ(Spec2D.getElementCount(), 1); + EXPECT_EQ(Spec1DLarge.getElementCount(), 10); + EXPECT_EQ(Spec3DLarge.getElementCount(), 80); + EXPECT_EQ(Spec3DLarge.getElementByteSize(), sizeof(float)); + EXPECT_EQ(Spec1D.getElementByteSize(), sizeof(int16_t)); +} \ No newline at end of file From 03a822f7a676089fca99aac02a057eab8474acc2 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 4 Aug 2020 17:39:46 -0700 Subject: [PATCH 394/600] [StackSafety,NFC] Add combined index test --- .../thinlto-function-summary-paramaccess.ll | 92 ++++++++++++++++++- 1 file changed, 89 insertions(+), 3 deletions(-) diff --git a/llvm/test/Bitcode/thinlto-function-summary-paramaccess.ll b/llvm/test/Bitcode/thinlto-function-summary-paramaccess.ll index 45fea64bbf753..e52dda7081619 100644 --- a/llvm/test/Bitcode/thinlto-function-summary-paramaccess.ll +++ b/llvm/test/Bitcode/thinlto-function-summary-paramaccess.ll @@ -6,6 +6,16 @@ ; RUN: opt -module-summary %s -o %t.bc ; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s -check-prefixes=BC +; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-paramaccess.ll -o %t2.bc + +; RUN: llvm-lto -thinlto -o %t %t.bc %t2.bc + +; RUN: llvm-dis -o - %t.thinlto.bc | FileCheck %s --check-prefix=DCO +; Round trip it through llvm-as +; RUN: llvm-dis -o - %t.thinlto.bc | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DCO + +; RUN: llvm-bcanalyzer -dump %t.thinlto.bc | FileCheck %s --check-prefix=COMBINED + ; RUN: llvm-dis -o - %t.bc | FileCheck %s --check-prefix=DIS ; Round trip it through llvm-as ; RUN: llvm-dis -o - %t.bc | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS @@ -18,6 +28,9 @@ ; RUN: llvm-dis -o - %t.bc | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS ; DIS: ^0 = module: (path: "{{.*}}", hash: ({{.*}})) +; DCO: ^0 = module: (path: "{{.*}}", hash: ({{.*}})) +; DCO: ^1 = module: (path: "{{.*}}", hash: ({{.*}})) + ; ModuleID = 'thinlto-function-summary-paramaccess.ll' target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux" @@ -29,13 +42,16 @@ attributes #0 = { noinline sanitize_memtag "target-features"="+mte,+neon" } ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: ; BC-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: \ No newline at end of file From 4c9ed3ed3d2fc7622acf5fc0d80ad20b44cf376a Mon Sep 17 00:00:00 2001 From: Fred Riss Date: Tue, 4 Aug 2020 17:45:36 -0700 Subject: [PATCH 395/600] [lldb/testsuite] Skip 'frame diagnose' tests based on architecture AFAICS, the feature only works on x86, skipping the tests has nothing to do with the target being iOS or remote. --- lldb/test/API/commands/frame/diagnose/array/TestArray.py | 2 +- .../commands/frame/diagnose/bad-reference/TestBadReference.py | 2 +- .../complicated-expression/TestComplicatedExpression.py | 2 +- .../dereference-argument/TestDiagnoseDereferenceArgument.py | 2 +- .../TestDiagnoseDereferenceFunctionReturn.py | 2 +- .../diagnose/dereference-this/TestDiagnoseDereferenceThis.py | 2 +- .../frame/diagnose/inheritance/TestDiagnoseInheritance.py | 2 +- .../commands/frame/diagnose/local-variable/TestLocalVariable.py | 2 +- .../TestDiagnoseDereferenceVirtualMethodCall.py | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lldb/test/API/commands/frame/diagnose/array/TestArray.py b/lldb/test/API/commands/frame/diagnose/array/TestArray.py index 9b049a2bf2a41..5788cacb9a2ee 100644 --- a/lldb/test/API/commands/frame/diagnose/array/TestArray.py +++ b/lldb/test/API/commands/frame/diagnose/array/TestArray.py @@ -13,7 +13,7 @@ class TestArray(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # frame diagnose doesn't work for armv7 or arm64 + @skipIf(archs=no_match(['x86_64'])) # frame diagnose doesn't work for armv7 or arm64 def test_array(self): self.build() exe = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/commands/frame/diagnose/bad-reference/TestBadReference.py b/lldb/test/API/commands/frame/diagnose/bad-reference/TestBadReference.py index 8650484f12a6c..737b297ed76b6 100644 --- a/lldb/test/API/commands/frame/diagnose/bad-reference/TestBadReference.py +++ b/lldb/test/API/commands/frame/diagnose/bad-reference/TestBadReference.py @@ -13,7 +13,7 @@ class TestBadReference(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # frame diagnose doesn't work for armv7 or arm64 + @skipIf(archs=no_match(['x86_64'])) # frame diagnose doesn't work for armv7 or arm64 def test_bad_reference(self): TestBase.setUp(self) self.build() diff --git a/lldb/test/API/commands/frame/diagnose/complicated-expression/TestComplicatedExpression.py b/lldb/test/API/commands/frame/diagnose/complicated-expression/TestComplicatedExpression.py index ccc0f88efe060..277fafd14b574 100644 --- a/lldb/test/API/commands/frame/diagnose/complicated-expression/TestComplicatedExpression.py +++ b/lldb/test/API/commands/frame/diagnose/complicated-expression/TestComplicatedExpression.py @@ -13,7 +13,7 @@ class TestDiagnoseDereferenceArgument(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # frame diagnose doesn't work for armv7 or arm64 + @skipIf(archs=no_match(['x86_64'])) # frame diagnose doesn't work for armv7 or arm64 def test_diagnose_dereference_argument(self): TestBase.setUp(self) self.build() diff --git a/lldb/test/API/commands/frame/diagnose/dereference-argument/TestDiagnoseDereferenceArgument.py b/lldb/test/API/commands/frame/diagnose/dereference-argument/TestDiagnoseDereferenceArgument.py index bdc89a6ed83df..5d5b3a0cf17fb 100644 --- a/lldb/test/API/commands/frame/diagnose/dereference-argument/TestDiagnoseDereferenceArgument.py +++ b/lldb/test/API/commands/frame/diagnose/dereference-argument/TestDiagnoseDereferenceArgument.py @@ -13,7 +13,7 @@ class TestDiagnoseDereferenceArgument(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # frame diagnose doesn't work for armv7 or arm64 + @skipIf(archs=no_match(['x86_64'])) # frame diagnose doesn't work for armv7 or arm64 def test_diagnose_dereference_argument(self): TestBase.setUp(self) self.build() diff --git a/lldb/test/API/commands/frame/diagnose/dereference-function-return/TestDiagnoseDereferenceFunctionReturn.py b/lldb/test/API/commands/frame/diagnose/dereference-function-return/TestDiagnoseDereferenceFunctionReturn.py index c49c80791af8a..25d7519e5330e 100644 --- a/lldb/test/API/commands/frame/diagnose/dereference-function-return/TestDiagnoseDereferenceFunctionReturn.py +++ b/lldb/test/API/commands/frame/diagnose/dereference-function-return/TestDiagnoseDereferenceFunctionReturn.py @@ -13,7 +13,7 @@ class TestDiagnoseDereferenceFunctionReturn(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # frame diagnose doesn't work for armv7 or arm64 + @skipIf(archs=no_match(['x86_64'])) # frame diagnose doesn't work for armv7 or arm64 @expectedFailureAll(oslist=['macosx'], archs=['i386'], bugnumber="rdar://28656408") def test_diagnose_dereference_function_return(self): TestBase.setUp(self) diff --git a/lldb/test/API/commands/frame/diagnose/dereference-this/TestDiagnoseDereferenceThis.py b/lldb/test/API/commands/frame/diagnose/dereference-this/TestDiagnoseDereferenceThis.py index 85de511e56d14..b1f6b2c87943b 100644 --- a/lldb/test/API/commands/frame/diagnose/dereference-this/TestDiagnoseDereferenceThis.py +++ b/lldb/test/API/commands/frame/diagnose/dereference-this/TestDiagnoseDereferenceThis.py @@ -13,7 +13,7 @@ class TestDiagnoseDereferenceThis(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # frame diagnose doesn't work for armv7 or arm64 + @skipIf(archs=no_match(['x86_64'])) # frame diagnose doesn't work for armv7 or arm64 def test_diagnose_dereference_this(self): TestBase.setUp(self) self.build() diff --git a/lldb/test/API/commands/frame/diagnose/inheritance/TestDiagnoseInheritance.py b/lldb/test/API/commands/frame/diagnose/inheritance/TestDiagnoseInheritance.py index 54d44f0cb3c5f..2e5a5f19b940f 100644 --- a/lldb/test/API/commands/frame/diagnose/inheritance/TestDiagnoseInheritance.py +++ b/lldb/test/API/commands/frame/diagnose/inheritance/TestDiagnoseInheritance.py @@ -13,7 +13,7 @@ class TestDiagnoseInheritance(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # frame diagnose doesn't work for armv7 or arm64 + @skipIf(archs=no_match(['x86_64'])) # frame diagnose doesn't work for armv7 or arm64 def test_diagnose_inheritance(self): TestBase.setUp(self) self.build() diff --git a/lldb/test/API/commands/frame/diagnose/local-variable/TestLocalVariable.py b/lldb/test/API/commands/frame/diagnose/local-variable/TestLocalVariable.py index 8d49d30b5e7bb..7e60467bf4258 100644 --- a/lldb/test/API/commands/frame/diagnose/local-variable/TestLocalVariable.py +++ b/lldb/test/API/commands/frame/diagnose/local-variable/TestLocalVariable.py @@ -13,7 +13,7 @@ class TestLocalVariable(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # frame diagnose doesn't work for armv7 or arm64 + @skipIf(archs=no_match(['x86_64'])) # frame diagnose doesn't work for armv7 or arm64 def test_local_variable(self): TestBase.setUp(self) self.build() diff --git a/lldb/test/API/commands/frame/diagnose/virtual-method-call/TestDiagnoseDereferenceVirtualMethodCall.py b/lldb/test/API/commands/frame/diagnose/virtual-method-call/TestDiagnoseDereferenceVirtualMethodCall.py index 7ea42dea49c14..802bf1bd29d6e 100644 --- a/lldb/test/API/commands/frame/diagnose/virtual-method-call/TestDiagnoseDereferenceVirtualMethodCall.py +++ b/lldb/test/API/commands/frame/diagnose/virtual-method-call/TestDiagnoseDereferenceVirtualMethodCall.py @@ -13,7 +13,7 @@ class TestDiagnoseVirtualMethodCall(TestBase): mydir = TestBase.compute_mydir(__file__) @skipUnlessDarwin - @skipIfDarwinEmbedded # frame diagnose doesn't work for armv7 or arm64 + @skipIf(archs=no_match(['x86_64'])) # frame diagnose doesn't work for armv7 or arm64 def test_diagnose_virtual_method_call(self): TestBase.setUp(self) self.build() From 0c7af8c83bd1acb0ca78f35ddde29b6fde4363a0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 4 Aug 2020 17:50:06 -0700 Subject: [PATCH 396/600] [X86] Optimize getImpliedDisabledFeatures & getImpliedEnabledFeatures after D83273 Previously the time complexity is O(|number of paths from the root to an implied feature| * CPU_FWATURE_MAX) where CPU_FEATURE_MAX is 92. The number of paths can be large (theoretically exponential). For an inline asm statement, there is a code path `clang::Parser::ParseAsmStatement -> clang::Sema::ActOnGCCAsmStmt -> ASTContext::getFunctionFeatureMap` leading to potentially many calls of getImpliedEnabledFeatures (41 for my -march=native case). We should improve the performance a bit in case the number of inline asm statements is large (Linux kernel builds). Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D85257 --- llvm/lib/Support/X86TargetParser.cpp | 39 +++++++++++++++++++++------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index 572d1203aaf21..c629f872df121 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -37,6 +37,10 @@ class FeatureBitset { set(I); } + bool any() const { + return llvm::any_of(Bits, [](uint64_t V) { return V != 0; }); + } + constexpr FeatureBitset &set(unsigned I) { // GCC <6.2 crashes if this is written in a single statement. uint32_t NewBits = Bits[I / 32] | (uint32_t(1) << (I % 32)); @@ -89,6 +93,13 @@ class FeatureBitset { Result.Bits[I] = ~Bits[I]; return Result; } + + constexpr bool operator!=(const FeatureBitset &RHS) const { + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) + if (Bits[I] != RHS.Bits[I]) + return true; + return false; + } }; struct ProcInfo { @@ -552,11 +563,17 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU, // For each feature that is (transitively) implied by this feature, set it. static void getImpliedEnabledFeatures(FeatureBitset &Bits, const FeatureBitset &Implies) { + // Fast path: Implies is often empty. + if (!Implies.any()) + return; + FeatureBitset Prev; Bits |= Implies; - for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) { - if (Implies[i]) - getImpliedEnabledFeatures(Bits, FeatureInfos[i].ImpliedFeatures); - } + do { + Prev = Bits; + for (unsigned i = CPU_FEATURE_MAX; i;) + if (Bits[--i]) + Bits |= FeatureInfos[i].ImpliedFeatures; + } while (Prev != Bits); } /// Create bit vector of features that are implied disabled if the feature @@ -564,12 +581,14 @@ static void getImpliedEnabledFeatures(FeatureBitset &Bits, static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) { // Check all features looking for any dependent on this feature. If we find // one, mark it and recursively find any feature that depend on it. - for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) { - if (FeatureInfos[i].ImpliedFeatures[Value]) { - Bits.set(i); - getImpliedDisabledFeatures(Bits, i); - } - } + FeatureBitset Prev; + Bits.set(Value); + do { + Prev = Bits; + for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) + if ((FeatureInfos[i].ImpliedFeatures & Bits).any()) + Bits.set(i); + } while (Prev != Bits); } void llvm::X86::getImpliedFeatures( From dd37b5a35b6923970fff1f9252a9f704981035dc Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Tue, 4 Aug 2020 17:49:56 -0700 Subject: [PATCH 397/600] PR46997: don't run clang-format on clang's testcases. The formatting of the testcases matters and shouldn't be overwritten by a tool. --- .arclint | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.arclint b/.arclint index 246ff9f39d475..27d838eb153f2 100644 --- a/.arclint +++ b/.arclint @@ -6,6 +6,9 @@ "script-and-regex.regex": "/^(?P[[:alpha:]]+)\n(?P[^\n]+)\n(====|(?P\\d),(?P\\d)\n(?P.*)>>>>\n(?P.*)<<<<\n)$/s", "include": [ "(\\.(cc|cpp|h)$)" + ], + "exclude": [ + "(^clang/test/)" ] } } From fe74f731e7024ce8b66869777dca6d6ed5a22e47 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 4 Aug 2020 18:31:10 -0700 Subject: [PATCH 398/600] [StackSafety,NFC] Add combined index test Missing file for the previous patch --- .../thinlto-function-summary-paramaccess.ll | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 llvm/test/Bitcode/Inputs/thinlto-function-summary-paramaccess.ll diff --git a/llvm/test/Bitcode/Inputs/thinlto-function-summary-paramaccess.ll b/llvm/test/Bitcode/Inputs/thinlto-function-summary-paramaccess.ll new file mode 100644 index 0000000000000..6048efa75a396 --- /dev/null +++ b/llvm/test/Bitcode/Inputs/thinlto-function-summary-paramaccess.ll @@ -0,0 +1,15 @@ +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux" + +attributes #0 = { noinline sanitize_memtag "target-features"="+mte,+neon" } + +define void @Callee(i8* %p) #0 { +entry: + ret void +} + +define void @Callee2(i32 %x, i8* %p) #0 { +entry: + ret void +} + From e3df9471750935876bd2bf7da93ccf0eacca8592 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Thu, 30 Jul 2020 23:36:31 -0700 Subject: [PATCH 399/600] [llvm-cov] reset executation count to 0 after wrapped segment Fix the bug: https://bugs.llvm.org/show_bug.cgi?id=36979. It also fixes this bug: https://bugs.llvm.org/show_bug.cgi?id=35404, which I think is caused by the same problem. Differential Revision: https://reviews.llvm.org/D85036 --- llvm/lib/ProfileData/Coverage/CoverageMapping.cpp | 1 + llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h | 2 +- llvm/test/tools/llvm-cov/ignore-filename-regex.test | 4 ++-- llvm/unittests/ProfileData/CoverageMappingTest.cpp | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index 70f00d333db17..3197f5d1a3d9e 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -770,6 +770,7 @@ LineCoverageStats::LineCoverageStats( ExecutionCount = WrappedSegment->Count; if (!MinRegionCount) return; + ExecutionCount = 0; for (const auto *LS : LineSegments) if (isStartOfRegion(LS)) ExecutionCount = std::max(ExecutionCount, LS->Count); diff --git a/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h b/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h index 07941f9bb497a..d224fd0d00ea0 100644 --- a/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h +++ b/llvm/test/tools/llvm-cov/Inputs/instrprof-comdat.h @@ -12,7 +12,7 @@ template T FOO::DoIt(T ti) { // HEADER: [[@LINE]]| 2|template for (T I = 0; I < ti; I++) { // HEADER: [[@LINE]]| 22| for (T t += I; // HEADER: [[@LINE]]| 20| t += I; if (I > ti / 2) // HEADER: [[@LINE]]| 20| if (I > ti - t -= 1; // HEADER: [[@LINE]]| 20| t -= 1; + t -= 1; // HEADER: [[@LINE]]| 8| t -= 1; } // HEADER: [[@LINE]]| 20| } // HEADER: [[@LINE]]| 2| return t; // HEADER: [[@LINE]]| 2| return t; diff --git a/llvm/test/tools/llvm-cov/ignore-filename-regex.test b/llvm/test/tools/llvm-cov/ignore-filename-regex.test index b8c15da281c10..0824645b50822 100644 --- a/llvm/test/tools/llvm-cov/ignore-filename-regex.test +++ b/llvm/test/tools/llvm-cov/ignore-filename-regex.test @@ -22,7 +22,7 @@ REPORT_IGNORE_DIR-NOT: {{.*}}extra{{[/\\]}}dec.h{{.*}} REPORT_IGNORE_DIR-NOT: {{.*}}extra{{[/\\]}}inc.h{{.*}} REPORT_IGNORE_DIR: {{.*}}abs.h{{.*}} REPORT_IGNORE_DIR: {{.*}}main.cc{{.*}} -REPORT_IGNORE_DIR: {{^}}TOTAL 5{{.*}}100.00%{{$}} +REPORT_IGNORE_DIR: {{^}}TOTAL 5{{.*}}90.00%{{$}} # Ignore all files from "extra" directory even when SOURCES specified. RUN: llvm-cov report -instr-profile %S/Inputs/sources_specified/main.profdata \ @@ -35,7 +35,7 @@ REPORT_IGNORE_DIR_WITH_SOURCES-NOT: {{.*}}extra{{[/\\]}}dec.h{{.*}} REPORT_IGNORE_DIR_WITH_SOURCES-NOT: {{.*}}extra{{[/\\]}}inc.h{{.*}} REPORT_IGNORE_DIR_WITH_SOURCES-NOT: {{.*}}main.cc{{.*}} REPORT_IGNORE_DIR_WITH_SOURCES: {{.*}}abs.h{{.*}} -REPORT_IGNORE_DIR_WITH_SOURCES: {{^}}TOTAL 4{{.*}}100.00%{{$}} +REPORT_IGNORE_DIR_WITH_SOURCES: {{^}}TOTAL 4{{.*}}80.00%{{$}} ######################## # Test "show" command. diff --git a/llvm/unittests/ProfileData/CoverageMappingTest.cpp b/llvm/unittests/ProfileData/CoverageMappingTest.cpp index 4854b7f1454c3..43386d23883e9 100644 --- a/llvm/unittests/ProfileData/CoverageMappingTest.cpp +++ b/llvm/unittests/ProfileData/CoverageMappingTest.cpp @@ -675,7 +675,7 @@ TEST_P(CoverageMappingTest, test_line_coverage_iterator) { CoverageData Data = LoadedCoverage->getCoverageForFile("file1"); unsigned Line = 0; - unsigned LineCounts[] = {20, 20, 20, 20, 30, 10, 10, 10, 10, 0, 0}; + unsigned LineCounts[] = {20, 20, 20, 20, 10, 10, 10, 10, 10, 0, 0}; for (const auto &LCS : getLineCoverageStats(Data)) { ASSERT_EQ(Line + 1, LCS.getLine()); errs() << "Line: " << Line + 1 << ", count = " << LCS.getExecutionCount() << "\n"; From 54615ec48f3ef6e9ea004bb2b84caadac2dead5b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 31 Jul 2020 10:09:00 -0400 Subject: [PATCH 400/600] GlobalISel: Move load/store lowering to separate functions --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 2 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 276 +++++++++--------- 2 files changed, 145 insertions(+), 133 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index dfd27bd5f7c5f..9ca7bf67e6d51 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -316,6 +316,8 @@ class LegalizerHelper { LLT CastTy); LegalizeResult lowerBitcast(MachineInstr &MI); + LegalizeResult lowerLoad(MachineInstr &MI); + LegalizeResult lowerStore(MachineInstr &MI); LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 592f79aa7b712..f914b0b26e4a6 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2478,6 +2478,145 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, return UnableToLegalize; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerLoad(MachineInstr &MI) { + // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + auto &MMO = **MI.memoperands_begin(); + + if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 zextload (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + auto LargeLoad = MIRBuilder.buildLoadInstr( + TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); + + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), + *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } + + MIRBuilder.buildLoad(DstReg, PtrReg, MMO); + MI.eraseFromParent(); + return Legalized; + } + + if (DstTy.isScalar()) { + Register TmpReg = + MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); + MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case TargetOpcode::G_LOAD: + MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg); + break; + case TargetOpcode::G_SEXTLOAD: + MIRBuilder.buildSExt(DstReg, TmpReg); + break; + case TargetOpcode::G_ZEXTLOAD: + MIRBuilder.buildZExt(DstReg, TmpReg); + break; + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerStore(MachineInstr &MI) { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store. + // We do this by first extending the stored value to the next largest power + // of 2 type, and then using truncating stores to store the components. + // By doing this, likewise with G_LOAD, generate an extend that can be + // artifact-combined away instead of leaving behind extracts. + Register SrcReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Extend to the next pow-2. + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + + // Obtain the smaller value by shifting away the larger value. + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + + // Generate the PtrAdd and truncating stores. + LLT PtrTy = MRI.getType(PtrReg); + auto OffsetCst = MIRBuilder.buildConstant( + LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); + Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = + MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { switch (MI.getOpcode()) { @@ -2658,139 +2797,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { } case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: - case TargetOpcode::G_ZEXTLOAD: { - // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT - Register DstReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - auto &MMO = **MI.memoperands_begin(); - - if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { - if (MI.getOpcode() == TargetOpcode::G_LOAD) { - // This load needs splitting into power of 2 sized loads. - if (DstTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(DstTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Our strategy here is to generate anyextending loads for the smaller - // types up to next power-2 result type, and then combine the two larger - // result values together, before truncating back down to the non-pow-2 - // type. - // E.g. v1 = i24 load => - // v2 = i32 zextload (2 byte) - // v3 = i32 load (1 byte) - // v4 = i32 shl v3, 16 - // v5 = i32 or v4, v2 - // v1 = i24 trunc v5 - // By doing this we generate the correct truncate which should get - // combined away as an artifact with a matching extend. - uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); - uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; - - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = MF.getMachineMemOperand( - &MMO, LargeSplitSize / 8, SmallSplitSize / 8); - - LLT PtrTy = MRI.getType(PtrReg); - unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); - LLT AnyExtTy = LLT::scalar(AnyExtSize); - Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); - auto LargeLoad = MIRBuilder.buildLoadInstr( - TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO); - - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); - auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), - *SmallMMO); - - auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); - auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); - auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); - MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); - MI.eraseFromParent(); - return Legalized; - } - MIRBuilder.buildLoad(DstReg, PtrReg, MMO); - MI.eraseFromParent(); - return Legalized; - } - - if (DstTy.isScalar()) { - Register TmpReg = - MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); - MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); - switch (MI.getOpcode()) { - default: - llvm_unreachable("Unexpected opcode"); - case TargetOpcode::G_LOAD: - MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg); - break; - case TargetOpcode::G_SEXTLOAD: - MIRBuilder.buildSExt(DstReg, TmpReg); - break; - case TargetOpcode::G_ZEXTLOAD: - MIRBuilder.buildZExt(DstReg, TmpReg); - break; - } - MI.eraseFromParent(); - return Legalized; - } - - return UnableToLegalize; - } - case TargetOpcode::G_STORE: { - // Lower a non-power of 2 store into multiple pow-2 stores. - // E.g. split an i24 store into an i16 store + i8 store. - // We do this by first extending the stored value to the next largest power - // of 2 type, and then using truncating stores to store the components. - // By doing this, likewise with G_LOAD, generate an extend that can be - // artifact-combined away instead of leaving behind extracts. - Register SrcReg = MI.getOperand(0).getReg(); - Register PtrReg = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - MachineMemOperand &MMO = **MI.memoperands_begin(); - if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) - return UnableToLegalize; - if (SrcTy.isVector()) - return UnableToLegalize; - if (isPowerOf2_32(SrcTy.getSizeInBits())) - return UnableToLegalize; // Don't know what we're being asked to do. - - // Extend to the next pow-2. - const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); - auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); - - // Obtain the smaller value by shifting away the larger value. - uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); - uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; - auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); - auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); - - // Generate the PtrAdd and truncating stores. - LLT PtrTy = MRI.getType(PtrReg); - auto OffsetCst = MIRBuilder.buildConstant( - LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8); - Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy); - auto SmallPtr = - MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0)); - - MachineFunction &MF = MIRBuilder.getMF(); - MachineMemOperand *LargeMMO = - MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); - MachineMemOperand *SmallMMO = - MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); - MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); - MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); - MI.eraseFromParent(); - return Legalized; - } + case TargetOpcode::G_ZEXTLOAD: + return lowerLoad(MI); + case TargetOpcode::G_STORE: + return lowerStore(MI); case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: From 8f65c933c42879ff807e3518d9f84892babd30a5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 31 Jul 2020 10:14:22 -0400 Subject: [PATCH 401/600] GlobalISel: Fix redundant variable and shadowing --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f914b0b26e4a6..e7f105f1dfeac 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -921,7 +921,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_INSERT: return narrowScalarInsert(MI, TypeIdx, NarrowTy); case TargetOpcode::G_LOAD: { - const auto &MMO = **MI.memoperands_begin(); + auto &MMO = **MI.memoperands_begin(); Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); if (DstTy.isVector()) @@ -929,7 +929,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, if (8 * MMO.getSize() != DstTy.getSizeInBits()) { Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - auto &MMO = **MI.memoperands_begin(); MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO); MIRBuilder.buildAnyExt(DstReg, TmpReg); MI.eraseFromParent(); From 1ea182ce79eedae740e20bbb33bdb3c4c4eb53cb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 31 Jul 2020 10:19:02 -0400 Subject: [PATCH 402/600] GlobalISel: Simplify code This cannot be a vector of pointers, so using getScalarSizeInBits just added a bit extra noise. --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e7f105f1dfeac..d4ee08eca119a 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3515,7 +3515,8 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, if (NumParts == -1) return UnableToLegalize; - const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); + LLT PtrTy = MRI.getType(AddrReg); + const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits()); unsigned TotalSize = ValTy.getSizeInBits(); From 93cebb190ad24ea367c1302ee989e7cafcf238ac Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 31 Jul 2020 10:11:00 -0400 Subject: [PATCH 403/600] GlobalISel: Use buildAnyExtOrTrunc --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index d4ee08eca119a..a7d11d9c6c8f4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2551,7 +2551,7 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) { default: llvm_unreachable("Unexpected opcode"); case TargetOpcode::G_LOAD: - MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg); + MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg); break; case TargetOpcode::G_SEXTLOAD: MIRBuilder.buildSExt(DstReg, TmpReg); From c35585e209efe69e2233bdc5ecd23bed7b735ba3 Mon Sep 17 00:00:00 2001 From: Yevgeny Rouban Date: Wed, 5 Aug 2020 11:06:54 +0700 Subject: [PATCH 404/600] DomTree: Make PostDomTree immune to block successors swap This is another fix for the bug 46098 where PostDominatorTree is unexpectedly changed by InstCombine's branch swapping transformation. This patch fixes PostDomTree builder. While looking for the furthest away node in a reverse unreachable subgraph this patch runs DFS with successors in their function order. This order is indifferent to the order of successors, so is the furthest away node. Reviewers: kuhar, nikic, lebedev.ri Differential Revision: https://reviews.llvm.org/D84763 --- .../llvm/Support/GenericDomTreeConstruction.h | 52 +++- .../InstCombine/infinite-loop-postdom.ll | 222 ++++++++++++++++++ 2 files changed, 271 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h index 6a9d38bceb388..3c85cafd6ece9 100644 --- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -151,6 +151,8 @@ struct SemiNCAInfo { } }; + using NodeOrderMap = DenseMap; + // Custom DFS implementation which can skip nodes based on a provided // predicate. It also collects ReverseChildren so that we don't have to spend // time getting predecessors in SemiNCA. @@ -158,9 +160,13 @@ struct SemiNCAInfo { // If IsReverse is set to true, the DFS walk will be performed backwards // relative to IsPostDom -- using reverse edges for dominators and forward // edges for postdominators. + // + // If SuccOrder is specified then in this order the DFS traverses the children + // otherwise the order is implied by the results of getChildren(). template unsigned runDFS(NodePtr V, unsigned LastNum, DescendCondition Condition, - unsigned AttachToNum) { + unsigned AttachToNum, + const NodeOrderMap *SuccOrder = nullptr) { assert(V); SmallVector WorkList = {V}; if (NodeToInfo.count(V) != 0) NodeToInfo[V].Parent = AttachToNum; @@ -176,7 +182,14 @@ struct SemiNCAInfo { NumToNode.push_back(BB); constexpr bool Direction = IsReverse != IsPostDom; // XOR. - for (const NodePtr Succ : getChildren(BB, BatchUpdates)) { + auto Successors = getChildren(BB, BatchUpdates); + if (SuccOrder && Successors.size() > 1) + llvm::sort( + Successors.begin(), Successors.end(), [=](NodePtr A, NodePtr B) { + return SuccOrder->find(A)->second < SuccOrder->find(B)->second; + }); + + for (const NodePtr Succ : Successors) { const auto SIT = NodeToInfo.find(Succ); // Don't visit nodes more than once but remember to collect // ReverseChildren. @@ -372,6 +385,34 @@ struct SemiNCAInfo { // nodes. if (Total + 1 != Num) { HasNonTrivialRoots = true; + + // SuccOrder is the order of blocks in the function. It is needed to make + // the calculation of the FurthestAway node and the whole PostDomTree + // immune to swap successors transformation (e.g. canonicalizing branch + // predicates). SuccOrder is initialized lazily only for successors of + // reverse unreachable nodes. + Optional SuccOrder; + auto InitSuccOrderOnce = [&]() { + SuccOrder = NodeOrderMap(); + for (const auto Node : nodes(DT.Parent)) + if (SNCA.NodeToInfo.count(Node) == 0) + for (const auto Succ : getChildren(Node, SNCA.BatchUpdates)) + SuccOrder->try_emplace(Succ, 0); + + // Add mapping for all entries of SuccOrder. + unsigned NodeNum = 0; + for (const auto Node : nodes(DT.Parent)) { + ++NodeNum; + auto Order = SuccOrder->find(Node); + if (Order != SuccOrder->end()) { + assert(Order->second == 0); + Order->second = NodeNum; + LLVM_DEBUG(dbgs() << "\t\t\tSuccOrder " << NodeNum << ": " + << Node->getName() << "\n"); + } + } + }; + // Make another DFS pass over all other nodes to find the // reverse-unreachable blocks, and find the furthest paths we'll be able // to make. @@ -396,7 +437,12 @@ struct SemiNCAInfo { // expensive and does not always lead to a minimal set of roots. LLVM_DEBUG(dbgs() << "\t\t\tRunning forward DFS\n"); - const unsigned NewNum = SNCA.runDFS(I, Num, AlwaysDescend, Num); + if (!SuccOrder) + InitSuccOrderOnce(); + assert(SuccOrder); + + const unsigned NewNum = + SNCA.runDFS(I, Num, AlwaysDescend, Num, &*SuccOrder); const NodePtr FurthestAway = SNCA.NumToNode[NewNum]; LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node " << "(non-trivial root): " diff --git a/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll b/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll new file mode 100644 index 0000000000000..a6ce1fadde00f --- /dev/null +++ b/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll @@ -0,0 +1,222 @@ +; RUN: opt %s -disable-output -branch-prob -instcombine -block-freq -verify-dom-info +; RUN: opt %s -postdomtree -analyze | FileCheck --check-prefixes=CHECK-POSTDOM %s +; RUN: opt %s -passes='print' 2>&1 | FileCheck --check-prefixes=CHECK-POSTDOM %s + +; Demonstrate that Predicate Canonicalization (InstCombine) does not invalidate PostDomTree +; if the basic block is post-dom unreachable. + +define void @test1(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f = icmp uge i24 %a, %b + br i1 %f, label %B1, label %B2 + +B1: + %x = add i24 %a, %b + br label %B2 + +B2: + br label %LOOP +} + +; The same as @test1 except the LOOP condition canonicalized (as by instcombine). +define void @test1-canonicalized(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f.not = icmp ult i24 %a, %b + br i1 %f.not, label %B2, label %B1 + +B1: + %x = add i24 %a, %b + br label %B2 + +B2: + br label %LOOP +} + +; The same as @test1 but different order of B1 and B2 in the function. +; The different order makes PostDomTree different in presense of postdom +; unreachable blocks. +define void @test2(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f = icmp uge i24 %a, %b + br i1 %f, label %B1, label %B2 + +B2: + br label %LOOP + +B1: + %x = add i24 %a, %b + br label %B2 +} + +; The same as @test2 except the LOOP condition canonicalized (as by instcombine). +define void @test2-canonicalized(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f.not = icmp ult i24 %a, %b + br i1 %f.not, label %B2, label %B1 + +B2: + br label %LOOP + +B1: + %x = add i24 %a, %b + br label %B2 +} + +; Two reverse unreachable subgraphs with RU1* and RU2* basic blocks respectively. +define void @test3(i24 %a, i24 %b, i32 %flag) { +entry: + switch i32 %flag, label %EXIT [ + i32 1, label %RU1 + i32 2, label %RU2 + i32 3, label %RU2_B1 + ] + +RU1: + %f = icmp uge i24 %a, %b + br label %RU1_LOOP + +RU1_LOOP: + br i1 %f, label %RU1_B1, label %RU1_B2 + +RU1_B1: + %x = add i24 %a, %b + br label %RU1_B2 + +RU1_B2: + br label %RU1_LOOP + +RU2: + %f2 = icmp uge i24 %a, %b + br i1 %f2, label %RU2_B1, label %RU2_B2 + +RU2_B1: + br label %RU2_B2 + +RU2_B2: + br label %RU2_B1 + +EXIT: + ret void +} + +; The same as @test3 except the icmp conditions are canonicalized (as by instcombine). +define void @test3-canonicalized(i24 %a, i24 %b, i32 %flag) { +entry: + switch i32 %flag, label %EXIT [ + i32 1, label %RU1 + i32 2, label %RU2 + i32 3, label %RU2_B1 + ] + +RU1: + %f.not = icmp ult i24 %a, %b + br label %RU1_LOOP + +RU1_LOOP: + br i1 %f.not, label %RU1_B2, label %RU1_B1 + +RU1_B1: + %x = add i24 %a, %b + br label %RU1_B2 + +RU1_B2: + br label %RU1_LOOP + +RU2: + %f2.not = icmp ult i24 %a, %b + br i1 %f2.not, label %RU2_B2, label %RU2_B1 + +RU2_B1: + br label %RU2_B2 + +RU2_B2: + br label %RU2_B1 + +EXIT: + ret void +} + +; PostDomTrees of @test1(), @test2() and @test3() are different. +; PostDomTrees of @testX() and @testX-canonicalize() are the same. + +; CHECK-POSTDOM-LABEL: test1 +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B1 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [4] %B2 +; CHECK-POSTDOM-NEXT: Roots: %B1 + +; CHECK-POSTDOM-LABEL: test1-canonicalized +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B1 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [4] %B2 +; CHECK-POSTDOM-NEXT: Roots: %B1 + +; CHECK-POSTDOM-LABEL: test2 +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B2 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [3] %B1 +; CHECK-POSTDOM-NEXT: Roots: %B2 + +; CHECK-POSTDOM-LABEL: test2-canonicalized +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B2 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [3] %B1 +; CHECK-POSTDOM-NEXT: Roots: %B2 + +; CHECK-POSTDOM-LABEL: test3 +; CHECK-POSTDOM-NEXT:=============================-------------------------------- +; CHECK-POSTDOM-NEXT:Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %EXIT +; CHECK-POSTDOM-NEXT: [2] %entry +; CHECK-POSTDOM-NEXT: [2] %RU1_B1 +; CHECK-POSTDOM-NEXT: [3] %RU1_LOOP +; CHECK-POSTDOM-NEXT: [4] %RU1 +; CHECK-POSTDOM-NEXT: [4] %RU1_B2 +; CHECK-POSTDOM-NEXT: [2] %RU2_B1 +; CHECK-POSTDOM-NEXT: [3] %RU2 +; CHECK-POSTDOM-NEXT: [3] %RU2_B2 +; CHECK-POSTDOM-NEXT:Roots: %EXIT %RU1_B1 %RU2_B1 + +; CHECK-POSTDOM-LABEL: test3-canonicalized +; CHECK-POSTDOM-NEXT:=============================-------------------------------- +; CHECK-POSTDOM-NEXT:Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %EXIT +; CHECK-POSTDOM-NEXT: [2] %entry +; CHECK-POSTDOM-NEXT: [2] %RU1_B1 +; CHECK-POSTDOM-NEXT: [3] %RU1_LOOP +; CHECK-POSTDOM-NEXT: [4] %RU1 +; CHECK-POSTDOM-NEXT: [4] %RU1_B2 +; CHECK-POSTDOM-NEXT: [2] %RU2_B1 +; CHECK-POSTDOM-NEXT: [3] %RU2 +; CHECK-POSTDOM-NEXT: [3] %RU2_B2 +; CHECK-POSTDOM-NEXT:Roots: %EXIT %RU1_B1 %RU2_B1 From 02a629daad0a1b7c8f70b11b312e94725c386dee Mon Sep 17 00:00:00 2001 From: Evgeniy Brevnov Date: Wed, 29 Jul 2020 19:19:00 +0700 Subject: [PATCH 405/600] [BPI][NFC] Unify handling of normal and SCC based loops This is one more NFC part extracted from D79485. Normal and SCC based loops have very different representation and have to be handled separatly each time we deal with loops. D79485 is going to introduce much more extensive use of loops what will be problematic with out this change. Reviewed By: davidxl Differential Revision: https://reviews.llvm.org/D84838 --- .../llvm/Analysis/BranchProbabilityInfo.h | 48 ++++++++ llvm/lib/Analysis/BranchProbabilityInfo.cpp | 105 +++++++++++++----- 2 files changed, 126 insertions(+), 27 deletions(-) diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 7feb5b6259380..447f14501cb65 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -32,6 +32,7 @@ namespace llvm { class Function; +class Loop; class LoopInfo; class raw_ostream; class PostDominatorTree; @@ -230,6 +231,32 @@ class BranchProbabilityInfo { : CallbackVH(const_cast(V)), BPI(BPI) {} }; + /// Pair of Loop and SCC ID number. Used to unify handling of normal and + /// SCC based loop representations. + using LoopData = std::pair; + /// Helper class to keep basic block along with its loop data information. + class LoopBlock { + public: + explicit LoopBlock(const BasicBlock *BB, const LoopInfo &LI, + const SccInfo &SccI); + + const BasicBlock *getBlock() const { return BB; } + Loop *getLoop() const { return LD.first; } + int getSccNum() const { return LD.second; } + + bool belongsToLoop() const { return getLoop() || getSccNum() != -1; } + bool belongsToSameLoop(const LoopBlock &LB) const { + return (LB.getLoop() && getLoop() == LB.getLoop()) || + (LB.getSccNum() != -1 && getSccNum() == LB.getSccNum()); + } + + private: + const BasicBlock *const BB = nullptr; + LoopData LD = {nullptr, -1}; + }; + // Pair of LoopBlocks representing an edge from first to second block. + using LoopEdge = std::pair; + DenseSet> Handles; // Since we allow duplicate edges from one basic block to another, we use @@ -258,6 +285,27 @@ class BranchProbabilityInfo { /// Track the set of blocks that always lead to a cold call. SmallPtrSet PostDominatedByColdCall; + /// Returns true if destination block belongs to some loop and source block is + /// either doesn't belong to any loop or belongs to a loop which is not inner + /// relative to the destination block. + bool isLoopEnteringEdge(const LoopEdge &Edge) const; + /// Returns true if source block belongs to some loop and destination block is + /// either doesn't belong to any loop or belongs to a loop which is not inner + /// relative to the source block. + bool isLoopExitingEdge(const LoopEdge &Edge) const; + /// Returns true if \p Edge is either enters to or exits from some loop, false + /// in all other cases. + bool isLoopEnteringExitingEdge(const LoopEdge &Edge) const; + /// Returns true if source and destination blocks belongs to the same loop and + /// destination block is loop header. + bool isLoopBackEdge(const LoopEdge &Edge) const; + // Fills in \p Enters vector with all "enter" blocks to a loop \LB belongs to. + void getLoopEnterBlocks(const LoopBlock &LB, + SmallVectorImpl &Enters) const; + // Fills in \p Exits vector with all "exit" blocks from a loop \LB belongs to. + void getLoopExitBlocks(const LoopBlock &LB, + SmallVectorImpl &Exits) const; + void computePostDominatedByUnreachable(const Function &F, PostDominatorTree *PDT); void computePostDominatedByColdCall(const Function &F, diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 7b24fe9d56c25..0a14c8c2bc44f 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -247,6 +247,66 @@ void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB, } } +BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB, + const LoopInfo &LI, + const SccInfo &SccI) + : BB(BB) { + LD.first = LI.getLoopFor(BB); + if (!LD.first) { + LD.second = SccI.getSCCNum(BB); + } +} + +bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const { + const auto &SrcBlock = Edge.first; + const auto &DstBlock = Edge.second; + return (DstBlock.getLoop() && + !DstBlock.getLoop()->contains(SrcBlock.getLoop())) || + // Assume that SCCs can't be nested. + (DstBlock.getSccNum() != -1 && + SrcBlock.getSccNum() != DstBlock.getSccNum()); +} + +bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const { + return isLoopEnteringEdge({Edge.second, Edge.first}); +} + +bool BranchProbabilityInfo::isLoopEnteringExitingEdge( + const LoopEdge &Edge) const { + return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge); +} + +bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const { + const auto &SrcBlock = Edge.first; + const auto &DstBlock = Edge.second; + return SrcBlock.belongsToSameLoop(DstBlock) && + ((DstBlock.getLoop() && + DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) || + (DstBlock.getSccNum() != -1 && + SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum()))); +} + +void BranchProbabilityInfo::getLoopEnterBlocks( + const LoopBlock &LB, SmallVectorImpl &Enters) const { + if (LB.getLoop()) { + auto *Header = LB.getLoop()->getHeader(); + Enters.append(pred_begin(Header), pred_end(Header)); + } else { + assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?"); + SccI->getSccEnterBlocks(LB.getSccNum(), Enters); + } +} + +void BranchProbabilityInfo::getLoopExitBlocks( + const LoopBlock &LB, SmallVectorImpl &Exits) const { + if (LB.getLoop()) { + LB.getLoop()->getExitBlocks(Exits); + } else { + assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?"); + SccI->getSccExitBlocks(LB.getSccNum(), Exits); + } +} + static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, SmallVectorImpl &WorkList, SmallPtrSetImpl &TargetSet) { @@ -720,17 +780,13 @@ computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, // as taken, exiting edges as not-taken. bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI) { - int SccNum; - Loop *L = LI.getLoopFor(BB); - if (!L) { - SccNum = SccI->getSCCNum(BB); - if (SccNum < 0) - return false; - } + LoopBlock LB(BB, LI, *SccI.get()); + if (!LB.belongsToLoop()) + return false; SmallPtrSet UnlikelyBlocks; - if (L) - computeUnlikelySuccessors(BB, L, UnlikelyBlocks); + if (LB.getLoop()) + computeUnlikelySuccessors(BB, LB.getLoop(), UnlikelyBlocks); SmallVector BackEdges; SmallVector ExitingEdges; @@ -738,24 +794,19 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, SmallVector UnlikelyEdges; for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - // Use LoopInfo if we have it, otherwise fall-back to SCC info to catch - // irreducible loops. - if (L) { - if (UnlikelyBlocks.count(*I) != 0) - UnlikelyEdges.push_back(I.getSuccessorIndex()); - else if (!L->contains(*I)) - ExitingEdges.push_back(I.getSuccessorIndex()); - else if (L->getHeader() == *I) - BackEdges.push_back(I.getSuccessorIndex()); - else - InEdges.push_back(I.getSuccessorIndex()); - } else { - if (SccI->getSCCNum(*I) != SccNum) - ExitingEdges.push_back(I.getSuccessorIndex()); - else if (SccI->isSCCHeader(*I, SccNum)) - BackEdges.push_back(I.getSuccessorIndex()); - else - InEdges.push_back(I.getSuccessorIndex()); + LoopBlock SuccLB(*I, LI, *SccI.get()); + LoopEdge Edge(LB, SuccLB); + bool IsUnlikelyEdge = + LB.getLoop() && (UnlikelyBlocks.find(*I) != UnlikelyBlocks.end()); + + if (IsUnlikelyEdge) + UnlikelyEdges.push_back(I.getSuccessorIndex()); + else if (isLoopExitingEdge(Edge)) + ExitingEdges.push_back(I.getSuccessorIndex()); + else if (isLoopBackEdge(Edge)) + BackEdges.push_back(I.getSuccessorIndex()); + else { + InEdges.push_back(I.getSuccessorIndex()); } } From 1366d66a22a5f0d25fcc6e922118bb51ab22f8c1 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 5 Aug 2020 04:31:30 +0000 Subject: [PATCH 406/600] Revert "DomTree: Make PostDomTree immune to block successors swap" This reverts commit c35585e209efe69e2233bdc5ecd23bed7b735ba3. The MLIR is broken with this patch, reproduce by adding -DLLVM_ENABLE_PROJECTS=mlir to the cmake configuration and build `ninja tools/mlir/lib/IR/CMakeFiles/obj.MLIRIR.dir/Dominance.cpp.o` --- .../llvm/Support/GenericDomTreeConstruction.h | 52 +--- .../InstCombine/infinite-loop-postdom.ll | 222 ------------------ 2 files changed, 3 insertions(+), 271 deletions(-) delete mode 100644 llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h index 3c85cafd6ece9..6a9d38bceb388 100644 --- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -151,8 +151,6 @@ struct SemiNCAInfo { } }; - using NodeOrderMap = DenseMap; - // Custom DFS implementation which can skip nodes based on a provided // predicate. It also collects ReverseChildren so that we don't have to spend // time getting predecessors in SemiNCA. @@ -160,13 +158,9 @@ struct SemiNCAInfo { // If IsReverse is set to true, the DFS walk will be performed backwards // relative to IsPostDom -- using reverse edges for dominators and forward // edges for postdominators. - // - // If SuccOrder is specified then in this order the DFS traverses the children - // otherwise the order is implied by the results of getChildren(). template unsigned runDFS(NodePtr V, unsigned LastNum, DescendCondition Condition, - unsigned AttachToNum, - const NodeOrderMap *SuccOrder = nullptr) { + unsigned AttachToNum) { assert(V); SmallVector WorkList = {V}; if (NodeToInfo.count(V) != 0) NodeToInfo[V].Parent = AttachToNum; @@ -182,14 +176,7 @@ struct SemiNCAInfo { NumToNode.push_back(BB); constexpr bool Direction = IsReverse != IsPostDom; // XOR. - auto Successors = getChildren(BB, BatchUpdates); - if (SuccOrder && Successors.size() > 1) - llvm::sort( - Successors.begin(), Successors.end(), [=](NodePtr A, NodePtr B) { - return SuccOrder->find(A)->second < SuccOrder->find(B)->second; - }); - - for (const NodePtr Succ : Successors) { + for (const NodePtr Succ : getChildren(BB, BatchUpdates)) { const auto SIT = NodeToInfo.find(Succ); // Don't visit nodes more than once but remember to collect // ReverseChildren. @@ -385,34 +372,6 @@ struct SemiNCAInfo { // nodes. if (Total + 1 != Num) { HasNonTrivialRoots = true; - - // SuccOrder is the order of blocks in the function. It is needed to make - // the calculation of the FurthestAway node and the whole PostDomTree - // immune to swap successors transformation (e.g. canonicalizing branch - // predicates). SuccOrder is initialized lazily only for successors of - // reverse unreachable nodes. - Optional SuccOrder; - auto InitSuccOrderOnce = [&]() { - SuccOrder = NodeOrderMap(); - for (const auto Node : nodes(DT.Parent)) - if (SNCA.NodeToInfo.count(Node) == 0) - for (const auto Succ : getChildren(Node, SNCA.BatchUpdates)) - SuccOrder->try_emplace(Succ, 0); - - // Add mapping for all entries of SuccOrder. - unsigned NodeNum = 0; - for (const auto Node : nodes(DT.Parent)) { - ++NodeNum; - auto Order = SuccOrder->find(Node); - if (Order != SuccOrder->end()) { - assert(Order->second == 0); - Order->second = NodeNum; - LLVM_DEBUG(dbgs() << "\t\t\tSuccOrder " << NodeNum << ": " - << Node->getName() << "\n"); - } - } - }; - // Make another DFS pass over all other nodes to find the // reverse-unreachable blocks, and find the furthest paths we'll be able // to make. @@ -437,12 +396,7 @@ struct SemiNCAInfo { // expensive and does not always lead to a minimal set of roots. LLVM_DEBUG(dbgs() << "\t\t\tRunning forward DFS\n"); - if (!SuccOrder) - InitSuccOrderOnce(); - assert(SuccOrder); - - const unsigned NewNum = - SNCA.runDFS(I, Num, AlwaysDescend, Num, &*SuccOrder); + const unsigned NewNum = SNCA.runDFS(I, Num, AlwaysDescend, Num); const NodePtr FurthestAway = SNCA.NumToNode[NewNum]; LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node " << "(non-trivial root): " diff --git a/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll b/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll deleted file mode 100644 index a6ce1fadde00f..0000000000000 --- a/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll +++ /dev/null @@ -1,222 +0,0 @@ -; RUN: opt %s -disable-output -branch-prob -instcombine -block-freq -verify-dom-info -; RUN: opt %s -postdomtree -analyze | FileCheck --check-prefixes=CHECK-POSTDOM %s -; RUN: opt %s -passes='print' 2>&1 | FileCheck --check-prefixes=CHECK-POSTDOM %s - -; Demonstrate that Predicate Canonicalization (InstCombine) does not invalidate PostDomTree -; if the basic block is post-dom unreachable. - -define void @test1(i24 %a, i24 %b) { -entry: - br label %LOOP - -LOOP: - %f = icmp uge i24 %a, %b - br i1 %f, label %B1, label %B2 - -B1: - %x = add i24 %a, %b - br label %B2 - -B2: - br label %LOOP -} - -; The same as @test1 except the LOOP condition canonicalized (as by instcombine). -define void @test1-canonicalized(i24 %a, i24 %b) { -entry: - br label %LOOP - -LOOP: - %f.not = icmp ult i24 %a, %b - br i1 %f.not, label %B2, label %B1 - -B1: - %x = add i24 %a, %b - br label %B2 - -B2: - br label %LOOP -} - -; The same as @test1 but different order of B1 and B2 in the function. -; The different order makes PostDomTree different in presense of postdom -; unreachable blocks. -define void @test2(i24 %a, i24 %b) { -entry: - br label %LOOP - -LOOP: - %f = icmp uge i24 %a, %b - br i1 %f, label %B1, label %B2 - -B2: - br label %LOOP - -B1: - %x = add i24 %a, %b - br label %B2 -} - -; The same as @test2 except the LOOP condition canonicalized (as by instcombine). -define void @test2-canonicalized(i24 %a, i24 %b) { -entry: - br label %LOOP - -LOOP: - %f.not = icmp ult i24 %a, %b - br i1 %f.not, label %B2, label %B1 - -B2: - br label %LOOP - -B1: - %x = add i24 %a, %b - br label %B2 -} - -; Two reverse unreachable subgraphs with RU1* and RU2* basic blocks respectively. -define void @test3(i24 %a, i24 %b, i32 %flag) { -entry: - switch i32 %flag, label %EXIT [ - i32 1, label %RU1 - i32 2, label %RU2 - i32 3, label %RU2_B1 - ] - -RU1: - %f = icmp uge i24 %a, %b - br label %RU1_LOOP - -RU1_LOOP: - br i1 %f, label %RU1_B1, label %RU1_B2 - -RU1_B1: - %x = add i24 %a, %b - br label %RU1_B2 - -RU1_B2: - br label %RU1_LOOP - -RU2: - %f2 = icmp uge i24 %a, %b - br i1 %f2, label %RU2_B1, label %RU2_B2 - -RU2_B1: - br label %RU2_B2 - -RU2_B2: - br label %RU2_B1 - -EXIT: - ret void -} - -; The same as @test3 except the icmp conditions are canonicalized (as by instcombine). -define void @test3-canonicalized(i24 %a, i24 %b, i32 %flag) { -entry: - switch i32 %flag, label %EXIT [ - i32 1, label %RU1 - i32 2, label %RU2 - i32 3, label %RU2_B1 - ] - -RU1: - %f.not = icmp ult i24 %a, %b - br label %RU1_LOOP - -RU1_LOOP: - br i1 %f.not, label %RU1_B2, label %RU1_B1 - -RU1_B1: - %x = add i24 %a, %b - br label %RU1_B2 - -RU1_B2: - br label %RU1_LOOP - -RU2: - %f2.not = icmp ult i24 %a, %b - br i1 %f2.not, label %RU2_B2, label %RU2_B1 - -RU2_B1: - br label %RU2_B2 - -RU2_B2: - br label %RU2_B1 - -EXIT: - ret void -} - -; PostDomTrees of @test1(), @test2() and @test3() are different. -; PostDomTrees of @testX() and @testX-canonicalize() are the same. - -; CHECK-POSTDOM-LABEL: test1 -; CHECK-POSTDOM-NEXT: =============================-------------------------------- -; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. -; CHECK-POSTDOM-NEXT: [1] <> -; CHECK-POSTDOM-NEXT: [2] %B1 -; CHECK-POSTDOM-NEXT: [3] %LOOP -; CHECK-POSTDOM-NEXT: [4] %entry -; CHECK-POSTDOM-NEXT: [4] %B2 -; CHECK-POSTDOM-NEXT: Roots: %B1 - -; CHECK-POSTDOM-LABEL: test1-canonicalized -; CHECK-POSTDOM-NEXT: =============================-------------------------------- -; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. -; CHECK-POSTDOM-NEXT: [1] <> -; CHECK-POSTDOM-NEXT: [2] %B1 -; CHECK-POSTDOM-NEXT: [3] %LOOP -; CHECK-POSTDOM-NEXT: [4] %entry -; CHECK-POSTDOM-NEXT: [4] %B2 -; CHECK-POSTDOM-NEXT: Roots: %B1 - -; CHECK-POSTDOM-LABEL: test2 -; CHECK-POSTDOM-NEXT: =============================-------------------------------- -; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. -; CHECK-POSTDOM-NEXT: [1] <> -; CHECK-POSTDOM-NEXT: [2] %B2 -; CHECK-POSTDOM-NEXT: [3] %LOOP -; CHECK-POSTDOM-NEXT: [4] %entry -; CHECK-POSTDOM-NEXT: [3] %B1 -; CHECK-POSTDOM-NEXT: Roots: %B2 - -; CHECK-POSTDOM-LABEL: test2-canonicalized -; CHECK-POSTDOM-NEXT: =============================-------------------------------- -; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. -; CHECK-POSTDOM-NEXT: [1] <> -; CHECK-POSTDOM-NEXT: [2] %B2 -; CHECK-POSTDOM-NEXT: [3] %LOOP -; CHECK-POSTDOM-NEXT: [4] %entry -; CHECK-POSTDOM-NEXT: [3] %B1 -; CHECK-POSTDOM-NEXT: Roots: %B2 - -; CHECK-POSTDOM-LABEL: test3 -; CHECK-POSTDOM-NEXT:=============================-------------------------------- -; CHECK-POSTDOM-NEXT:Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. -; CHECK-POSTDOM-NEXT: [1] <> -; CHECK-POSTDOM-NEXT: [2] %EXIT -; CHECK-POSTDOM-NEXT: [2] %entry -; CHECK-POSTDOM-NEXT: [2] %RU1_B1 -; CHECK-POSTDOM-NEXT: [3] %RU1_LOOP -; CHECK-POSTDOM-NEXT: [4] %RU1 -; CHECK-POSTDOM-NEXT: [4] %RU1_B2 -; CHECK-POSTDOM-NEXT: [2] %RU2_B1 -; CHECK-POSTDOM-NEXT: [3] %RU2 -; CHECK-POSTDOM-NEXT: [3] %RU2_B2 -; CHECK-POSTDOM-NEXT:Roots: %EXIT %RU1_B1 %RU2_B1 - -; CHECK-POSTDOM-LABEL: test3-canonicalized -; CHECK-POSTDOM-NEXT:=============================-------------------------------- -; CHECK-POSTDOM-NEXT:Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. -; CHECK-POSTDOM-NEXT: [1] <> -; CHECK-POSTDOM-NEXT: [2] %EXIT -; CHECK-POSTDOM-NEXT: [2] %entry -; CHECK-POSTDOM-NEXT: [2] %RU1_B1 -; CHECK-POSTDOM-NEXT: [3] %RU1_LOOP -; CHECK-POSTDOM-NEXT: [4] %RU1 -; CHECK-POSTDOM-NEXT: [4] %RU1_B2 -; CHECK-POSTDOM-NEXT: [2] %RU2_B1 -; CHECK-POSTDOM-NEXT: [3] %RU2 -; CHECK-POSTDOM-NEXT: [3] %RU2_B2 -; CHECK-POSTDOM-NEXT:Roots: %EXIT %RU1_B1 %RU2_B1 From 3401f9706be14f9c103542c8b6034a1126b9859e Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 4 Aug 2020 17:21:32 +0900 Subject: [PATCH 407/600] [JumpThreading] Add a test for D85023; NFC --- .../JumpThreading/thread-two-bbs-threshold.ll | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 llvm/test/Transforms/JumpThreading/thread-two-bbs-threshold.ll diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs-threshold.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs-threshold.ll new file mode 100644 index 0000000000000..5650d8b5a83b7 --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/thread-two-bbs-threshold.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -jump-threading -jump-threading-threshold=3 -S -verify | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = global i32 0, align 4 + +define void @foo(i32 %cond1, i32 %cond2) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[COND1:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB_COND2:%.*]], label [[BB_F1:%.*]] +; CHECK: bb.f1: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[BB_COND2]] +; CHECK: bb.cond2: +; CHECK-NEXT: [[PTR:%.*]] = phi i32* [ null, [[BB_F1]] ], [ @a, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[COND2_FR:%.*]] = freeze i32 [[COND2:%.*]] +; CHECK-NEXT: [[X:%.*]] = add i32 [[COND2_FR]], 1 +; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[X]], 0 +; CHECK-NEXT: br i1 [[TOBOOL1]], label [[BB_FILE:%.*]], label [[BB_F2:%.*]] +; CHECK: bb.f2: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: bb.file: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR]], null +; CHECK-NEXT: br i1 [[CMP]], label [[BB_F4:%.*]], label [[BB_F3:%.*]] +; CHECK: bb.f3: +; CHECK-NEXT: call void @f3() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: bb.f4: +; CHECK-NEXT: call void @f4() +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %tobool = icmp eq i32 %cond1, 0 + br i1 %tobool, label %bb.cond2, label %bb.f1 + +bb.f1: + call void @f1() + br label %bb.cond2 + +bb.cond2: + %ptr = phi i32* [ null, %bb.f1 ], [ @a, %entry ] + %cond2.fr = freeze i32 %cond2 + %x = add i32 %cond2.fr, 1 + %tobool1 = icmp eq i32 %x, 0 + br i1 %tobool1, label %bb.file, label %bb.f2 + +bb.f2: + call void @f2() + br label %exit + +bb.file: + %cmp = icmp eq i32* %ptr, null + br i1 %cmp, label %bb.f4, label %bb.f3 + +bb.f3: + call void @f3() + br label %exit + +bb.f4: + call void @f4() + br label %exit + +exit: + ret void +} + +declare void @f1() + +declare void @f2() + +declare void @f3() + +declare void @f4() From e0d99e9aaf51dac0555655cbf17909377ed37a27 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 4 Aug 2020 17:22:36 +0900 Subject: [PATCH 408/600] [JumpThreading] Consider freeze as a zero-cost instruction This is a simple patch that makes freeze as a zero-cost instruction, as bitcast already is. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D85023 --- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 4 ++++ .../JumpThreading/thread-two-bbs-threshold.ll | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index e6d261fa9aff5..57ebeeaa7e177 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -535,6 +535,10 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, if (isa(I) && I->getType()->isPointerTy()) continue; + // Freeze instruction is free, too. + if (isa(I)) + continue; + // Bail out if this instruction gives back a token type, it is not possible // to duplicate it if it is used outside this BB. if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB)) diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs-threshold.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs-threshold.ll index 5650d8b5a83b7..9b8273db18834 100644 --- a/llvm/test/Transforms/JumpThreading/thread-two-bbs-threshold.ll +++ b/llvm/test/Transforms/JumpThreading/thread-two-bbs-threshold.ll @@ -6,30 +6,31 @@ target triple = "x86_64-unknown-linux-gnu" @a = global i32 0, align 4 +; Show that freeze is not counted when comparing the cost with the threshold define void @foo(i32 %cond1, i32 %cond2) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[COND1:%.*]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB_COND2:%.*]], label [[BB_F1:%.*]] -; CHECK: bb.f1: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[BB_COND2]] +; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB_COND2_THREAD:%.*]], label [[BB_COND2:%.*]] ; CHECK: bb.cond2: -; CHECK-NEXT: [[PTR:%.*]] = phi i32* [ null, [[BB_F1]] ], [ @a, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @f1() ; CHECK-NEXT: [[COND2_FR:%.*]] = freeze i32 [[COND2:%.*]] ; CHECK-NEXT: [[X:%.*]] = add i32 [[COND2_FR]], 1 ; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: br i1 [[TOBOOL1]], label [[BB_FILE:%.*]], label [[BB_F2:%.*]] +; CHECK-NEXT: br i1 [[TOBOOL1]], label [[BB_F4:%.*]], label [[BB_F2:%.*]] +; CHECK: bb.cond2.thread: +; CHECK-NEXT: [[COND2_FR2:%.*]] = freeze i32 [[COND2]] +; CHECK-NEXT: [[X3:%.*]] = add i32 [[COND2_FR2]], 1 +; CHECK-NEXT: [[TOBOOL14:%.*]] = icmp eq i32 [[X3]], 0 +; CHECK-NEXT: br i1 [[TOBOOL14]], label [[BB_F3:%.*]], label [[BB_F2]] ; CHECK: bb.f2: ; CHECK-NEXT: call void @f2() ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: bb.file: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[PTR]], null -; CHECK-NEXT: br i1 [[CMP]], label [[BB_F4:%.*]], label [[BB_F3:%.*]] ; CHECK: bb.f3: ; CHECK-NEXT: call void @f3() ; CHECK-NEXT: br label [[EXIT]] ; CHECK: bb.f4: +; CHECK-NEXT: [[PTR5:%.*]] = phi i32* [ null, [[BB_COND2]] ] ; CHECK-NEXT: call void @f4() ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: From b989fcbae6f179ad887d19ceef83ace1c00b87cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 4 Aug 2020 10:24:32 +0300 Subject: [PATCH 409/600] [llvm-rc] Allow string table values split into multiple string literals This can practically easily be a product of combining strings with macros in resource files. This fixes https://github.com/mstorsjo/llvm-mingw/issues/140. As string literals within llvm-rc are handled as StringRefs, each referencing an uninterpreted slice of the input file, with actual interpretation of the input string (codepage handling, unescaping etc) done only right before writing them out to disk, it's hard to concatenate them other than just bundling them up in a vector, without rearchitecting a large part of llvm-rc. This matches how the same already is supported in VersionInfoValue, with a std::vector Values. MS rc.exe only supports concatenated string literals in version info values (already supported), string tables (implemented in this patch) and user data resources (easily implemented in a separate patch, but hasn't been requested by any end user yet), while GNU windres supports string immediates split into multiple strings anywhere (e.g. like (100 ICON "myicon" ".ico"). Not sure if concatenation in other statements actually is used in the wild though, in resource files normally built by GNU windres. Differential Revision: https://reviews.llvm.org/D85183 --- .../llvm-rc/Inputs/tag-stringtable-basic.rc | 4 ++-- llvm/tools/llvm-rc/ResourceFileWriter.cpp | 17 ++++++++++------- llvm/tools/llvm-rc/ResourceFileWriter.h | 5 +++-- llvm/tools/llvm-rc/ResourceScriptParser.cpp | 8 +++++++- llvm/tools/llvm-rc/ResourceScriptStmt.cpp | 8 ++++++-- llvm/tools/llvm-rc/ResourceScriptStmt.h | 6 +++--- 6 files changed, 31 insertions(+), 17 deletions(-) diff --git a/llvm/test/tools/llvm-rc/Inputs/tag-stringtable-basic.rc b/llvm/test/tools/llvm-rc/Inputs/tag-stringtable-basic.rc index afda2f3af63d2..7c929bb4a3266 100644 --- a/llvm/test/tools/llvm-rc/Inputs/tag-stringtable-basic.rc +++ b/llvm/test/tools/llvm-rc/Inputs/tag-stringtable-basic.rc @@ -13,8 +13,8 @@ STRINGTABLE { STRINGTABLE VERSION 100 LANGUAGE 4, 7 { - 16 "hello" - 17 "world" + 16 "hel" "lo" + 17 "wor" L"ld" } STRINGTABLE diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.cpp b/llvm/tools/llvm-rc/ResourceFileWriter.cpp index d8d4014124860..09b078c94cd29 100644 --- a/llvm/tools/llvm-rc/ResourceFileWriter.cpp +++ b/llvm/tools/llvm-rc/ResourceFileWriter.cpp @@ -1246,7 +1246,8 @@ Error ResourceFileWriter::visitStringTableBundle(const RCResource *Res) { } Error ResourceFileWriter::insertStringIntoBundle( - StringTableInfo::Bundle &Bundle, uint16_t StringID, StringRef String) { + StringTableInfo::Bundle &Bundle, uint16_t StringID, + const std::vector &String) { uint16_t StringLoc = StringID & 15; if (Bundle.Data[StringLoc]) return createError("Multiple STRINGTABLE strings located under ID " + @@ -1261,13 +1262,15 @@ Error ResourceFileWriter::writeStringTableBundleBody(const RCResource *Base) { // The string format is a tiny bit different here. We // first output the size of the string, and then the string itself // (which is not null-terminated). - bool IsLongString; SmallVector Data; - RETURN_IF_ERROR(processString(Res->Bundle.Data[ID].getValueOr(StringRef()), - NullHandlingMethod::CutAtDoubleNull, - IsLongString, Data, Params.CodePage)); - if (AppendNull && Res->Bundle.Data[ID]) - Data.push_back('\0'); + if (Res->Bundle.Data[ID]) { + bool IsLongString; + for (StringRef S : *Res->Bundle.Data[ID]) + RETURN_IF_ERROR(processString(S, NullHandlingMethod::CutAtDoubleNull, + IsLongString, Data, Params.CodePage)); + if (AppendNull) + Data.push_back('\0'); + } RETURN_IF_ERROR( checkNumberFits(Data.size(), "STRINGTABLE string size")); writeInt(Data.size()); diff --git a/llvm/tools/llvm-rc/ResourceFileWriter.h b/llvm/tools/llvm-rc/ResourceFileWriter.h index 673830601e86b..d545a7a9cab15 100644 --- a/llvm/tools/llvm-rc/ResourceFileWriter.h +++ b/llvm/tools/llvm-rc/ResourceFileWriter.h @@ -103,7 +103,7 @@ class ResourceFileWriter : public Visitor { using BundleKey = std::pair; // Each bundle is in fact an array of 16 strings. struct Bundle { - std::array, 16> Data; + std::array>, 16> Data; ObjectInfo DeclTimeInfo; uint16_t MemoryFlags; Bundle(const ObjectInfo &Info, uint16_t Flags) @@ -157,7 +157,8 @@ class ResourceFileWriter : public Visitor { Error visitStringTableBundle(const RCResource *); Error writeStringTableBundleBody(const RCResource *); Error insertStringIntoBundle(StringTableInfo::Bundle &Bundle, - uint16_t StringID, StringRef String); + uint16_t StringID, + const std::vector &String); // User defined resource Error writeUserDefinedBody(const RCResource *); diff --git a/llvm/tools/llvm-rc/ResourceScriptParser.cpp b/llvm/tools/llvm-rc/ResourceScriptParser.cpp index 36b305645fb81..2155985c61b8b 100644 --- a/llvm/tools/llvm-rc/ResourceScriptParser.cpp +++ b/llvm/tools/llvm-rc/ResourceScriptParser.cpp @@ -698,8 +698,14 @@ RCParser::ParseType RCParser::parseStringTableResource() { // between, however we strictly adhere to the single statement definition. ASSIGN_OR_RETURN(IDResult, readInt()); consumeOptionalType(Kind::Comma); + + std::vector Strings; ASSIGN_OR_RETURN(StrResult, readString()); - Table->addString(*IDResult, *StrResult); + Strings.push_back(*StrResult); + while (isNextTokenKind(Kind::String)) + Strings.push_back(read().value()); + + Table->addStrings(*IDResult, std::move(Strings)); } return std::move(Table); diff --git a/llvm/tools/llvm-rc/ResourceScriptStmt.cpp b/llvm/tools/llvm-rc/ResourceScriptStmt.cpp index a0d4adbe64189..ef8c34541881a 100644 --- a/llvm/tools/llvm-rc/ResourceScriptStmt.cpp +++ b/llvm/tools/llvm-rc/ResourceScriptStmt.cpp @@ -118,8 +118,12 @@ raw_ostream &MenuResource::log(raw_ostream &OS) const { raw_ostream &StringTableResource::log(raw_ostream &OS) const { OS << "StringTable:\n"; OptStatements->log(OS); - for (const auto &String : Table) - OS << " " << String.first << " => " << String.second << "\n"; + for (const auto &String : Table) { + OS << " " << String.first << " =>"; + for (const auto &S : String.second) + OS << " " << S; + OS << "\n"; + } return OS; } diff --git a/llvm/tools/llvm-rc/ResourceScriptStmt.h b/llvm/tools/llvm-rc/ResourceScriptStmt.h index b772732e78e69..27fbea3ae8cb4 100644 --- a/llvm/tools/llvm-rc/ResourceScriptStmt.h +++ b/llvm/tools/llvm-rc/ResourceScriptStmt.h @@ -583,12 +583,12 @@ class MenuResource : public OptStatementsRCResource { // Ref: msdn.microsoft.com/en-us/library/windows/desktop/aa381050(v=vs.85).aspx class StringTableResource : public OptStatementsRCResource { public: - std::vector> Table; + std::vector>> Table; StringTableResource(OptionalStmtList &&List, uint16_t Flags) : OptStatementsRCResource(std::move(List), Flags) {} - void addString(uint32_t ID, StringRef String) { - Table.emplace_back(ID, String); + void addStrings(uint32_t ID, std::vector &&Strings) { + Table.emplace_back(ID, Strings); } raw_ostream &log(raw_ostream &) const override; Twine getResourceTypeName() const override { return "STRINGTABLE"; } From 521c0b2659074c512d292dc30da78c862782d34c Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Wed, 5 Aug 2020 09:27:03 +0300 Subject: [PATCH 410/600] [MLIR][SPIRVToLLVM] Updated documentation for SPIR-V to LLVM conversion Updated the documentation for SPIR-V to LLVM conversion, particularly: - Added a section on control flow - Added a section on memory ops - Added a section on GLSL ops Also, moved `spv.FunctionCall` to control flow section. Added a new section that will be used to describe the modelling of runtime-related ops. Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D84734 --- mlir/docs/SPIRVToLLVMDialectConversion.md | 245 +++++++++++++++++++--- 1 file changed, 216 insertions(+), 29 deletions(-) diff --git a/mlir/docs/SPIRVToLLVMDialectConversion.md b/mlir/docs/SPIRVToLLVMDialectConversion.md index 1f76741bb2ed8..dcc872f59542d 100644 --- a/mlir/docs/SPIRVToLLVMDialectConversion.md +++ b/mlir/docs/SPIRVToLLVMDialectConversion.md @@ -88,8 +88,8 @@ at the moment. Hence, we adhere to the following mapping: Examples of SPIR-V struct conversion are: ```mlir -!spv.struct => !llvm<"<{ i8, i32> }>"> -!spv.struct => !llvm<"{ i8, i32> }"> +!spv.struct => !llvm<"<{ i8, i32 }>"> +!spv.struct => !llvm<"{ i8, i32 }"> // error !spv.struct @@ -373,6 +373,116 @@ modelled with `xor` operation with a mask with all bits set. %0 = spv.LogicalNot %op : i1 => %0 = llvm.xor %op, %mask : !llvm.i1 ``` +### Memory ops + +This section describes the conversion patterns for SPIR-V dialect operations +that concern memory. + +#### `spv.Load` and `spv.Store` + +These ops are converted to their LLVM counterparts: `llvm.load` and +`llvm.store`. If the op has a memory access attribute, then there are the +following cases, based on the value of the attribute: + +* **Aligned**: alignment is passed on to LLVM op builder, for example: + ```mlir + // llvm.store %ptr, %val {alignment = 4 : i64} : !llvm<"float*"> + spv.Store "Function" %ptr, %val ["Aligned", 4] : f32 + ``` +* **None**: same case as if there is no memory access attribute. + +* **Nontemporal**: set `nontemporal` flag, for example: + ```mlir + // %res = llvm.load %ptr {nontemporal} : !llvm<"float*"> + %res = spv.Load "Function" %ptr ["Nontemporal"] : f32 + ``` +* **Volatile**: mark the op as `volatile`, for example: + ```mlir + // %res = llvm.load volatile %ptr : !llvm<"float*"> + %res = spv.Load "Function" %ptr ["Volatile"] : f32 + ``` +Otherwise the conversion fails as other cases (`MakePointerAvailable`, +`MakePointerVisible`, `NonPrivatePointer`) are not supported yet. + +#### `spv.globalVariable` and `spv._address_of` + +`spv.globalVariable` is modelled with `llvm.mlir.global` op. However, there +is a difference that has to be pointed out. + +In SPIR-V dialect, the global variable returns a pointer, whereas in LLVM +dialect the global holds an actual value. This difference is handled by +`spv._address_of` and `llvm.mlir.addressof` ops that both return a pointer and +are used to reference the global. + +```mlir +// Original SPIR-V module +spv.module Logical GLSL450 { + spv.globalVariable @struct : !spv.ptr>, Private> + spv.func @func() -> () "None" { + %0 = spv._address_of @struct : !spv.ptr>, Private> + spv.Return + } +} + +// Converted result +module { + llvm.mlir.global private @struct() : !llvm<"<{ float, [10 x float] }>"> + llvm.func @func() { + %0 = llvm.mlir.addressof @struct : !llvm<"<{ float, [10 x float] }>*"> + llvm.return + } +} +``` + +At the moment, only current invocation is in conversion's scope. This means that +global variables with pointers of `Input`, `Output` and `Private` storage +classes are supported. Moreover, `bind` that specifies the descriptor set and +binding number and `built_in` that specifies SPIR-V `BuiltIn` decoration have +also no conversion. + +Currently `llvm.mlir.global`s are created with `private` linkage for +`Private` storage class and `External` for `Input`/`Output` storage classes, +based on SPIR-V spec: + +> By default, functions and global variables are private to a module and cannot +be accessed by other modules. However, a module may be written to export or +import functions and global (module scope) variables. + +If the global variable's pointer has `Input` storage class, then a `constant` +flag is added to LLVM op: + +```mlir +spv.globalVariable @var : !spv.ptr => llvm.mlir.global external constant @var() : !llvm.float +``` + +#### `spv.Variable` + +Per SPIR-V dialect spec, `spv.Variable` allocates an object in memory, resulting +in a pointer to it, which can be used with `spv.Load` and `spv.Store`. It is +also a function-level variable. + +`spv.Variable` is modelled as `llvm.alloca` op. If initialized, an additional +store instruction is used. Note that there is no initialization for arrays and +structs since constants of these types are not supported in LLVM dialect (TODO). +Also, at the moment initialization is only possible via `spv.constant`. + +```mlir +// Conversion of VariableOp without initialization + %size = llvm.mlir.constant(1 : i32) : !llvm.i32 +%res = spv.Variable : !spv.ptr, Function> => %res = llvm.alloca %size x !llvm<"<3 x float>"> : (!llvm.i32) -> !llvm<"<3 x float>*"> + +// Conversion of VariableOp with initialization + %c = llvm.mlir.constant(0 : i64) : !llvm.i64 +%c = spv.constant 0 : i64 %size = llvm.mlir.constant(1 : i32) : !llvm.i32 +%res = spv.Variable init(%c) : !spv.ptr => %res = llvm.alloca %[[SIZE]] x !llvm.i64 : (!llvm.i32) -> !llvm<"i64*"> + llvm.store %c, %res : !llvm<"i64*"> +``` + +Note that simple conversion to `alloca` may not be sufficent if the code has +some scoping. For example, if converting ops executed in a loop into `alloca`s, +a stack overflow may occur. For this case, `stacksave`/`stackrestore` pair can +be used (TODO). + ### Miscellaneous ops with direct conversions There are multiple SPIR-V ops that do not fit in a particular group but can be @@ -445,12 +555,11 @@ There is no support of the following ops: * All Atomic ops * All matrix ops -* All GLSL ops * All GroupNonUniform ops + +As well as: + * spv.AccessChain -* spv._address_of -* spv.Branch -* spv.BranchConditional * spv.CompositeConstruct * spv.CompositeExtract * spv.CompositeInsert @@ -459,23 +568,87 @@ There is no support of the following ops: * spv.EntryPoint * spv.ExecutionMode * spv.FMod -* spv.globalVariable -* spv.Load -* spv.loop +* spv.GLSL.SAbs +* spv.GLSL.SSign +* spv.GLSL.FSign * spv.MemoryBarrier -* spv._merge * spv._reference_of -* spv.selection * spv.SMod * spv.specConstant -* spv.Store * spv.SubgroupBallotKHR -* spv.Variable * spv.Unreachable ## Control flow conversion -**Note: these conversions have not been implemented yet** +### Branch ops + +`spv.Branch` and `spv.BranchConditional` are mapped to `llvm.br` and +`llvm.cond_br`. Branch weigths for `spv.BranchConditional` are mapped to +coresponding `branch_weights` attribute of `llvm.cond_br`. When translated to +proper LLVM, `branch_weights` are converted into LLVM metadata associated with +the conditional branch. + +### `spv.FunctionCall` + +`spv.FunctionCall` maps to `llvm.call`. For example: + +```mlir +%0 = spv.FunctionCall @foo() : () -> i32 => %0 = llvm.call @foo() : () -> !llvm.float +spv.FunctionCall @bar(%0) : (i32) -> () => llvm.call @bar(%0) : (!llvm.float) -> () +``` + +### `spv.selection` and `spv.loop` + +Control flow within `spv.selection` and `spv.loop` is lowered directly to LLVM +via branch ops. The conversion can only be applied to selection or loop with all +blocks being reachable. Moreover, selection and loop control attributes (such as +`Flatten` or `Unroll`) are not supported at the moment. + +```mlir +// Conversion of selection +%cond = spv.constant true %cond = llvm.mlir.constant(true) : !llvm.i1 +spv.selection { + spv.BranchConditional %cond, ^true, ^false llvm.cond_br %cond, ^true, ^false + +^true: ^true: + // True block code // True block code + spv.Branch ^merge => llvm.br ^merge + +^false: ^false: + // False block code // False block code + spv.Branch ^merge llvm.br ^merge + +^merge: ^merge: + spv._merge llvm.br ^continue +} +// Remaining code ^continue: + // Remaining code +``` + +```mlir +// Conversion of loop +%cond = spv.constant true %cond = llvm.mlir.constant(true) : !llvm.i1 +spv.loop { + spv.Branch ^header llvm.br ^header + +^header: ^header: + // Header code // Header code + spv.BranchConditional %cond, ^body, ^merge => llvm.cond_br %cond, ^body, ^merge + +^body: ^body: + // Body code // Body code + spv.Branch ^continue llvm.br ^continue + +^continue: ^continue: + // Continue code // Continue code + spv.Branch ^header llvm.br ^header + +^merge: ^merge: + spv._merge llvm.br ^remaining +} +// Remaining code ^remaining: + // Remaining code +``` ## Decorations conversion @@ -483,8 +656,6 @@ There is no support of the following ops: ## GLSL extended instruction set -**Note: these conversions have not been implemented yet** - This section describes how SPIR-V ops from GLSL extended instructions set are mapped to LLVM Dialect. @@ -502,16 +673,34 @@ SPIR-V Dialect op | LLVM Dialect op `spv.GLSL.Log` | `llvm.intr.log` `spv.GLSL.Sin` | `llvm.intr.sin` `spv.GLSL.Sqrt` | `llvm.intr.sqrt` +`spv.GLSL.SMax` | `llvm.intr.smax` +`spv.GLSL.SMin` | `llvm.intr.smin` ### Special cases -TODO: add more patterns for special cases. +`spv.InverseSqrt` is mapped to: +```mlir + %one = llvm.mlir.constant(1.0 : f32) : !llvm.float +%res = spv.InverseSqrt %arg : f32 => %sqrt = "llvm.intr.sqrt"(%arg) : (!llvm.float) -> !llvm.float + %res = fdiv %one, %sqrt : !llvm.float +``` `spv.Tan` is mapped to: ```mlir - %sin = "llvm.intr.sin"(%arg) : (!llvm.float) -> !llvm.float - %cos = "llvm.intr.cos"(%arg) : (!llvm.float) -> !llvm.float -%res = spv.Tan %arg : f32 => %res = fdiv %sin, %cos : !llvm.float + %sin = "llvm.intr.sin"(%arg) : (!llvm.float) -> !llvm.float +%res = spv.Tan %arg : f32 => %cos = "llvm.intr.cos"(%arg) : (!llvm.float) -> !llvm.float + %res = fdiv %sin, %cos : !llvm.float +``` + +`spv.Tanh` is modelled using the equality `tanh(x) = {exp(2x) - 1}/{exp(2x) + 1}`: +```mlir + %two = llvm.mlir.constant(2.0: f32) : !llvm.float + %2xArg = llvm.fmul %two, %arg : !llvm.float + %exp = "llvm.intr.exp"(%2xArg) : (!llvm.float) -> !llvm.float +%res = spv.Tanh %arg : f32 => %one = llvm.mlir.constant(1.0 : f32) : !llvm.float + %num = llvm.fsub %exp, %one : !llvm.float + %den = llvm.fadd %exp, %one : !llvm.float + %res = llvm.fdiv %num, %den : !llvm.float ``` ## Function conversion and related ops @@ -535,15 +724,6 @@ DontInline | `noinline` Pure | `readonly` Const | `readnone` -### `spv.FunctionCall` - -`spv.FunctionCall` maps to `llvm.call`. For example: - -```mlir -%0 = spv.FunctionCall @foo() : () -> i32 => %0 = llvm.call @foo() : () -> !llvm.float -spv.FunctionCall @bar(%0) : (i32) -> () => llvm.call @bar(%0) : (!llvm.float) -> () -``` - ### `spv.Return` and `spv.ReturnValue` In LLVM IR, functions may return either 1 or 0 value. Hence, we map both ops to @@ -563,5 +743,12 @@ to LLVM ops. At the moment, SPIR-V module attributes are ignored. `spv._module_end` is mapped to an equivalent terminator `ModuleTerminatorOp`. +## SPIR-V special ops + +**Note: this section is due to be implemented in August** + +This section describes how SPIR-V specific ops, *e.g* `spv.specConstant`, are +modelled in LLVM. It also provides information on `mlir-spirv-runner`. + [LLVMFunctionAttributes]: https://llvm.org/docs/LangRef.html#function-attributes [SPIRVFunctionAttributes]: https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_function_control_a_function_control From bc10888dcdda1fe80a983029bc50eec4b248cc77 Mon Sep 17 00:00:00 2001 From: Yevgeny Rouban Date: Wed, 5 Aug 2020 11:06:54 +0700 Subject: [PATCH 411/600] DomTree: Make PostDomTree indifferent to block successors swap Fixed the commit c35585e209efe69e2233bdc5ecd23bed7b735ba3. This is a fix for the bug 46098 where PostDominatorTree is unexpectedly changed by InstCombine's branch swapping transformation. This patch fixes PostDomTree builder. While looking for the furthest away node in a reverse unreachable subgraph this patch runs DFS with successors in their function order. This order is indifferent to the order of successors, so is the furthest away node. Reviewers: kuhar, nikic, lebedev.ri Differential Revision: https://reviews.llvm.org/D84763 --- .../llvm/Support/GenericDomTreeConstruction.h | 50 +++- .../InstCombine/infinite-loop-postdom.ll | 222 ++++++++++++++++++ 2 files changed, 269 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h index 6a9d38bceb388..984ddfbf4f4c1 100644 --- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -151,6 +151,8 @@ struct SemiNCAInfo { } }; + using NodeOrderMap = DenseMap; + // Custom DFS implementation which can skip nodes based on a provided // predicate. It also collects ReverseChildren so that we don't have to spend // time getting predecessors in SemiNCA. @@ -158,9 +160,13 @@ struct SemiNCAInfo { // If IsReverse is set to true, the DFS walk will be performed backwards // relative to IsPostDom -- using reverse edges for dominators and forward // edges for postdominators. + // + // If SuccOrder is specified then in this order the DFS traverses the children + // otherwise the order is implied by the results of getChildren(). template unsigned runDFS(NodePtr V, unsigned LastNum, DescendCondition Condition, - unsigned AttachToNum) { + unsigned AttachToNum, + const NodeOrderMap *SuccOrder = nullptr) { assert(V); SmallVector WorkList = {V}; if (NodeToInfo.count(V) != 0) NodeToInfo[V].Parent = AttachToNum; @@ -176,7 +182,14 @@ struct SemiNCAInfo { NumToNode.push_back(BB); constexpr bool Direction = IsReverse != IsPostDom; // XOR. - for (const NodePtr Succ : getChildren(BB, BatchUpdates)) { + auto Successors = getChildren(BB, BatchUpdates); + if (SuccOrder && Successors.size() > 1) + llvm::sort( + Successors.begin(), Successors.end(), [=](NodePtr A, NodePtr B) { + return SuccOrder->find(A)->second < SuccOrder->find(B)->second; + }); + + for (const NodePtr Succ : Successors) { const auto SIT = NodeToInfo.find(Succ); // Don't visit nodes more than once but remember to collect // ReverseChildren. @@ -372,6 +385,32 @@ struct SemiNCAInfo { // nodes. if (Total + 1 != Num) { HasNonTrivialRoots = true; + + // SuccOrder is the order of blocks in the function. It is needed to make + // the calculation of the FurthestAway node and the whole PostDomTree + // immune to swap successors transformation (e.g. canonicalizing branch + // predicates). SuccOrder is initialized lazily only for successors of + // reverse unreachable nodes. + Optional SuccOrder; + auto InitSuccOrderOnce = [&]() { + SuccOrder = NodeOrderMap(); + for (const auto Node : nodes(DT.Parent)) + if (SNCA.NodeToInfo.count(Node) == 0) + for (const auto Succ : getChildren(Node, SNCA.BatchUpdates)) + SuccOrder->try_emplace(Succ, 0); + + // Add mapping for all entries of SuccOrder. + unsigned NodeNum = 0; + for (const auto Node : nodes(DT.Parent)) { + ++NodeNum; + auto Order = SuccOrder->find(Node); + if (Order != SuccOrder->end()) { + assert(Order->second == 0); + Order->second = NodeNum; + } + } + }; + // Make another DFS pass over all other nodes to find the // reverse-unreachable blocks, and find the furthest paths we'll be able // to make. @@ -396,7 +435,12 @@ struct SemiNCAInfo { // expensive and does not always lead to a minimal set of roots. LLVM_DEBUG(dbgs() << "\t\t\tRunning forward DFS\n"); - const unsigned NewNum = SNCA.runDFS(I, Num, AlwaysDescend, Num); + if (!SuccOrder) + InitSuccOrderOnce(); + assert(SuccOrder); + + const unsigned NewNum = + SNCA.runDFS(I, Num, AlwaysDescend, Num, &*SuccOrder); const NodePtr FurthestAway = SNCA.NumToNode[NewNum]; LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node " << "(non-trivial root): " diff --git a/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll b/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll new file mode 100644 index 0000000000000..f7ae19657297f --- /dev/null +++ b/llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll @@ -0,0 +1,222 @@ +; RUN: opt %s -disable-output -branch-prob -instcombine -block-freq -verify-dom-info +; RUN: opt %s -postdomtree -analyze | FileCheck --check-prefixes=CHECK-POSTDOM %s +; RUN: opt %s -passes='print' 2>&1 | FileCheck --check-prefixes=CHECK-POSTDOM %s + +; Demonstrate that Predicate Canonicalization (InstCombine) does not invalidate PostDomTree +; if the basic block is post-dom unreachable. + +define void @test1(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f = icmp uge i24 %a, %b + br i1 %f, label %B1, label %B2 + +B1: + %x = add i24 %a, %b + br label %B2 + +B2: + br label %LOOP +} + +; The same as @test1 except the LOOP condition canonicalized (as by instcombine). +define void @test1-canonicalized(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f.not = icmp ult i24 %a, %b + br i1 %f.not, label %B2, label %B1 + +B1: + %x = add i24 %a, %b + br label %B2 + +B2: + br label %LOOP +} + +; The same as @test1 but different order of B1 and B2 in the function. +; The different order makes PostDomTree different in presense of postdom +; unreachable blocks. +define void @test2(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f = icmp uge i24 %a, %b + br i1 %f, label %B1, label %B2 + +B2: + br label %LOOP + +B1: + %x = add i24 %a, %b + br label %B2 +} + +; The same as @test2 except the LOOP condition canonicalized (as by instcombine). +define void @test2-canonicalized(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f.not = icmp ult i24 %a, %b + br i1 %f.not, label %B2, label %B1 + +B2: + br label %LOOP + +B1: + %x = add i24 %a, %b + br label %B2 +} + +; Two reverse unreachable subgraphs with RU1* and RU2* basic blocks respectively. +define void @test3(i24 %a, i24 %b, i32 %flag) { +entry: + switch i32 %flag, label %EXIT [ + i32 1, label %RU1 + i32 2, label %RU2 + i32 3, label %RU2_B1 + ] + +RU1: + %f = icmp uge i24 %a, %b + br label %RU1_LOOP + +RU1_LOOP: + br i1 %f, label %RU1_B1, label %RU1_B2 + +RU1_B1: + %x = add i24 %a, %b + br label %RU1_B2 + +RU1_B2: + br label %RU1_LOOP + +RU2: + %f2 = icmp uge i24 %a, %b + br i1 %f2, label %RU2_B1, label %RU2_B2 + +RU2_B1: + br label %RU2_B2 + +RU2_B2: + br label %RU2_B1 + +EXIT: + ret void +} + +; The same as @test3 except the icmp conditions are canonicalized (as by instcombine). +define void @test3-canonicalized(i24 %a, i24 %b, i32 %flag) { +entry: + switch i32 %flag, label %EXIT [ + i32 1, label %RU1 + i32 2, label %RU2 + i32 3, label %RU2_B1 + ] + +RU1: + %f.not = icmp ult i24 %a, %b + br label %RU1_LOOP + +RU1_LOOP: + br i1 %f.not, label %RU1_B2, label %RU1_B1 + +RU1_B1: + %x = add i24 %a, %b + br label %RU1_B2 + +RU1_B2: + br label %RU1_LOOP + +RU2: + %f2.not = icmp ult i24 %a, %b + br i1 %f2.not, label %RU2_B2, label %RU2_B1 + +RU2_B1: + br label %RU2_B2 + +RU2_B2: + br label %RU2_B1 + +EXIT: + ret void +} + +; PostDomTrees of @test1(), @test2() and @test3() are different. +; PostDomTrees of @testX() and @testX-canonicalize() are the same. + +; CHECK-POSTDOM-LABEL: test1 +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B1 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [4] %B2 +; CHECK-POSTDOM-NEXT: Roots: %B1 + +; CHECK-POSTDOM-LABEL: test1-canonicalized +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B1 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [4] %B2 +; CHECK-POSTDOM-NEXT: Roots: %B1 + +; CHECK-POSTDOM-LABEL: test2 +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B2 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [3] %B1 +; CHECK-POSTDOM-NEXT: Roots: %B2 + +; CHECK-POSTDOM-LABEL: test2-canonicalized +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B2 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [3] %B1 +; CHECK-POSTDOM-NEXT: Roots: %B2 + +; CHECK-POSTDOM-LABEL: test3 +; CHECK-POSTDOM-NEXT:=============================-------------------------------- +; CHECK-POSTDOM-NEXT:Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %EXIT +; CHECK-POSTDOM-NEXT: [2] %entry +; CHECK-POSTDOM-NEXT: [2] %RU1_B1 +; CHECK-POSTDOM-NEXT: [3] %RU1_LOOP +; CHECK-POSTDOM-NEXT: [4] %RU1 +; CHECK-POSTDOM-NEXT: [4] %RU1_B2 +; CHECK-POSTDOM-NEXT: [2] %RU2_B1 +; CHECK-POSTDOM-NEXT: [3] %RU2 +; CHECK-POSTDOM-NEXT: [3] %RU2_B2 +; CHECK-POSTDOM-NEXT:Roots: %EXIT %RU1_B1 %RU2_B1 + +; CHECK-POSTDOM-LABEL: test3-canonicalized +; CHECK-POSTDOM-NEXT:=============================-------------------------------- +; CHECK-POSTDOM-NEXT:Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %EXIT +; CHECK-POSTDOM-NEXT: [2] %entry +; CHECK-POSTDOM-NEXT: [2] %RU1_B1 +; CHECK-POSTDOM-NEXT: [3] %RU1_LOOP +; CHECK-POSTDOM-NEXT: [4] %RU1 +; CHECK-POSTDOM-NEXT: [4] %RU1_B2 +; CHECK-POSTDOM-NEXT: [2] %RU2_B1 +; CHECK-POSTDOM-NEXT: [3] %RU2 +; CHECK-POSTDOM-NEXT: [3] %RU2_B2 +; CHECK-POSTDOM-NEXT:Roots: %EXIT %RU1_B1 %RU2_B1 From e739648cfae21d2b564751ef0511fec9559305fa Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Wed, 5 Aug 2020 10:09:21 +0300 Subject: [PATCH 412/600] [MLIR][SPIRVToLLVM] Conversion pattern for loop op This patch introduces a conversion of `spv.loop` to LLVM dialect. Similarly to `spv.selection`, op's control attributes are not mapped to LLVM yet and therefore the conversion fails if the loop control is not `None`. Also, all blocks within the loop should be reachable in order for conversion to succeed. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D84245 --- .../SPIRVToLLVM/ConvertSPIRVToLLVM.cpp | 94 ++++++++++++++++++- .../SPIRVToLLVM/control-flow-ops-to-llvm.mlir | 39 ++++++++ 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp index 45b9f41121514..0c326287e69ea 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp @@ -718,6 +718,98 @@ class ReturnValuePattern : public SPIRVToLLVMConversion { } }; +/// Converts `spv.loop` to LLVM dialect. All blocks within selection should be +/// reachable for conversion to succeed. +/// The structure of the loop in LLVM dialect will be the following: +/// +/// +------------------------------------+ +/// | | +/// | llvm.br ^header | +/// +------------------------------------+ +/// | +/// +----------------+ | +/// | | | +/// | V V +/// | +------------------------------------+ +/// | | ^header: | +/// | |
| +/// | | llvm.cond_br %cond, ^body, ^exit | +/// | +------------------------------------+ +/// | | +/// | |----------------------+ +/// | | | +/// | V | +/// | +------------------------------------+ | +/// | | ^body: | | +/// | | | | +/// | | llvm.br ^continue | | +/// | +------------------------------------+ | +/// | | | +/// | V | +/// | +------------------------------------+ | +/// | | ^continue: | | +/// | | | | +/// | | llvm.br ^header | | +/// | +------------------------------------+ | +/// | | | +/// +---------------+ +----------------------+ +/// | +/// V +/// +------------------------------------+ +/// | ^exit: | +/// | llvm.br ^remaining | +/// +------------------------------------+ +/// | +/// V +/// +------------------------------------+ +/// | ^remaining: | +/// | | +/// +------------------------------------+ +/// +class LoopPattern : public SPIRVToLLVMConversion { +public: + using SPIRVToLLVMConversion::SPIRVToLLVMConversion; + + LogicalResult + matchAndRewrite(spirv::LoopOp loopOp, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + // There is no support of loop control at the moment. + if (loopOp.loop_control() != spirv::LoopControl::None) + return failure(); + + Location loc = loopOp.getLoc(); + + // Split the current block after `spv.loop`. The remaing ops will be used in + // `endBlock`. + Block *currentBlock = rewriter.getBlock(); + auto position = Block::iterator(loopOp); + Block *endBlock = rewriter.splitBlock(currentBlock, position); + + // Remove entry block and create a branch in the current block going to the + // header block. + Block *entryBlock = loopOp.getEntryBlock(); + assert(entryBlock->getOperations().size() == 1); + auto brOp = dyn_cast(entryBlock->getOperations().front()); + if (!brOp) + return failure(); + Block *headerBlock = loopOp.getHeaderBlock(); + rewriter.setInsertionPointToEnd(currentBlock); + rewriter.create(loc, brOp.getBlockArguments(), headerBlock); + rewriter.eraseBlock(entryBlock); + + // Branch from merge block to end block. + Block *mergeBlock = loopOp.getMergeBlock(); + Operation *terminator = mergeBlock->getTerminator(); + ValueRange terminatorOperands = terminator->getOperands(); + rewriter.setInsertionPointToEnd(mergeBlock); + rewriter.create(loc, terminatorOperands, endBlock); + + rewriter.inlineRegionBefore(loopOp.body(), endBlock); + rewriter.replaceOp(loopOp, endBlock->getArguments()); + return success(); + } +}; + class MergePattern : public SPIRVToLLVMConversion { public: using SPIRVToLLVMConversion::SPIRVToLLVMConversion; @@ -1109,7 +1201,7 @@ void mlir::populateSPIRVToLLVMConversionPatterns( ConstantScalarAndVectorPattern, // Control Flow ops - BranchConversionPattern, BranchConditionalConversionPattern, + BranchConversionPattern, BranchConditionalConversionPattern, LoopPattern, SelectionPattern, MergePattern, // Function Call op diff --git a/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir index 3c92040a17ed7..6427ce47dc165 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir @@ -81,6 +81,45 @@ spv.module Logical GLSL450 { // ----- +//===----------------------------------------------------------------------===// +// spv.loop +//===----------------------------------------------------------------------===// + +spv.module Logical GLSL450 { + // CHECK-LABEL: @infinite_loop + spv.func @infinite_loop(%count : i32) -> () "None" { + // CHECK: llvm.br ^[[BB1:.*]] + // CHECK: ^[[BB1]]: + // CHECK: %[[COND:.*]] = llvm.mlir.constant(true) : !llvm.i1 + // CHECK: llvm.cond_br %[[COND]], ^[[BB2:.*]], ^[[BB4:.*]] + // CHECK: ^[[BB2]]: + // CHECK: llvm.br ^[[BB3:.*]] + // CHECK: ^[[BB3]]: + // CHECK: llvm.br ^[[BB1:.*]] + // CHECK: ^[[BB4]]: + // CHECK: llvm.br ^[[BB5:.*]] + // CHECK: ^[[BB5]]: + // CHECK: llvm.return + spv.loop { + spv.Branch ^header + ^header: + %cond = spv.constant true + spv.BranchConditional %cond, ^body, ^merge + ^body: + // Do nothing + spv.Branch ^continue + ^continue: + // Do nothing + spv.Branch ^header + ^merge: + spv._merge + } + spv.Return + } +} + +// ----- + //===----------------------------------------------------------------------===// // spv.selection //===----------------------------------------------------------------------===// From c5cdc3e801ad1b0aceaf220d78a3ff3fab1e0fdb Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Tue, 4 Aug 2020 08:30:24 +0000 Subject: [PATCH 413/600] [SyntaxTree] Add test coverage for `->*` operator This was the last binary operator that we supported but didn't have any test coverage. The recent fix in a crash in member pointers allowed us to add this test. Differential Revision: https://reviews.llvm.org/D85185 --- clang/unittests/Tooling/Syntax/TreeTest.cpp | 43 +++++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index 3ccfabb95da90..e696be3dae7c3 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -2329,16 +2329,17 @@ struct X { friend bool operator<(const X&, const X&); friend X operator<<(X&, const X&); X operator,(X&); - // TODO: Fix crash on member function pointer and add a test for `->*` - // TODO: Unbox operators in syntax tree. + X operator->*(int); + // TODO: Unbox operators in syntax tree. // Represent operators by `+` instead of `IdExpression-UnqualifiedId-+` }; -void test(X x, X y) { +void test(X x, X y, X* xp, int X::* pmi) { x = y; x + y; x < y; x << y; x, y; + xp->*pmi; } )cpp", R"txt( @@ -2437,6 +2438,17 @@ void test(X x, X y) { | | | | `-& | | | `-) | | `-; +| |-SimpleDeclaration +| | |-X +| | |-SimpleDeclarator +| | | |-operator +| | | |-->* +| | | `-ParametersAndQualifiers +| | | |-( +| | | |-SimpleDeclaration +| | | | `-int +| | | `-) +| | `-; | |-} | `-; `-SimpleDeclaration @@ -2454,6 +2466,21 @@ void test(X x, X y) { | | |-X | | `-SimpleDeclarator | | `-y + | |-, + | |-SimpleDeclaration + | | |-X + | | `-SimpleDeclarator + | | |-* + | | `-xp + | |-, + | |-SimpleDeclaration + | | |-int + | | `-SimpleDeclarator + | | |-MemberPointer + | | | |-X + | | | |-:: + | | | `-* + | | `-pmi | `-) `-CompoundStatement |-{ @@ -2518,6 +2545,16 @@ void test(X x, X y) { | | `-UnqualifiedId | | `-y | `-; + |-ExpressionStatement + | |-BinaryOperatorExpression + | | |-IdExpression + | | | `-UnqualifiedId + | | | `-xp + | | |-->* + | | `-IdExpression + | | `-UnqualifiedId + | | `-pmi + | `-; `-} )txt")); } From c952ec15d38843b69e22dfd7b0665304a0459f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Tue, 4 Aug 2020 18:37:34 +0200 Subject: [PATCH 414/600] [lldb] fix building with panel.h being in /usr/include/ncurses/ My openSUSE 15.2 has /usr/include/curses.h as a symlink to /usr/include/ncurses/curses.h , but there's no such symlink for panel.h . Prefer using /usr/include/ncurses for the includes if they are found there by the CMake check. Differential Revision: https://reviews.llvm.org/D85219 --- lldb/include/lldb/Host/Config.h.cmake | 2 ++ lldb/source/Core/IOHandlerCursesGUI.cpp | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/lldb/include/lldb/Host/Config.h.cmake b/lldb/include/lldb/Host/Config.h.cmake index 42f4ca1a26c61..7467f429b6287 100644 --- a/lldb/include/lldb/Host/Config.h.cmake +++ b/lldb/include/lldb/Host/Config.h.cmake @@ -38,6 +38,8 @@ #cmakedefine01 LLDB_ENABLE_CURSES +#cmakedefine01 CURSES_HAVE_NCURSES_CURSES_H + #cmakedefine01 LLDB_ENABLE_LIBEDIT #cmakedefine01 LLDB_ENABLE_LIBXML2 diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 144b2112183c2..55c80e7780605 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -10,9 +10,14 @@ #include "lldb/Host/Config.h" #if LLDB_ENABLE_CURSES +#if CURSES_HAVE_NCURSES_CURSES_H +#include +#include +#else #include #include #endif +#endif #if defined(__APPLE__) #include From 2f1b24b70c6c3ecf3cf5ccd35209d9d7e426be63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Sun, 2 Aug 2020 12:58:22 +0200 Subject: [PATCH 415/600] [lldb][gui] implement TerminalSizeChanged() Differential Revision: https://reviews.llvm.org/D85088 --- lldb/include/lldb/Core/IOHandlerCursesGUI.h | 2 + lldb/source/Core/IOHandlerCursesGUI.cpp | 73 ++++++++++++++++++--- 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/lldb/include/lldb/Core/IOHandlerCursesGUI.h b/lldb/include/lldb/Core/IOHandlerCursesGUI.h index fe62eaea643e6..22ca735063ba1 100644 --- a/lldb/include/lldb/Core/IOHandlerCursesGUI.h +++ b/lldb/include/lldb/Core/IOHandlerCursesGUI.h @@ -31,6 +31,8 @@ class IOHandlerCursesGUI : public IOHandler { void Deactivate() override; + void TerminalSizeChanged() override; + protected: curses::ApplicationAP m_app_ap; }; diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 55c80e7780605..ea4dfd12ada4c 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -1197,13 +1197,13 @@ class Application { ConstString broadcaster_class_process(Process::GetStaticBroadcasterClass()); debugger.EnableForwardEvents(listener_sp); - bool update = true; + m_update_screen = true; #if defined(__APPLE__) std::deque escape_chars; #endif while (!done) { - if (update) { + if (m_update_screen) { m_window_sp->Draw(false); // All windows should be calling Window::DeferredRefresh() instead of // Window::Refresh() so we can do a single update and avoid any screen @@ -1215,7 +1215,7 @@ class Application { m_window_sp->MoveCursor(0, 0); doupdate(); - update = false; + m_update_screen = false; } #if defined(__APPLE__) @@ -1277,7 +1277,7 @@ class Application { if (broadcaster_class == broadcaster_class_process) { debugger.GetCommandInterpreter().UpdateExecutionContext( nullptr); - update = true; + m_update_screen = true; continue; // Don't get any key, just update our view } } @@ -1289,12 +1289,12 @@ class Application { switch (key_result) { case eKeyHandled: debugger.GetCommandInterpreter().UpdateExecutionContext(nullptr); - update = true; + m_update_screen = true; break; case eKeyNotHandled: if (ch == 12) { // Ctrl+L, force full redraw redrawwin(m_window_sp->get()); - update = true; + m_update_screen = true; } break; case eQuitApplication: @@ -1313,12 +1313,65 @@ class Application { return m_window_sp; } + void TerminalSizeChanged() { + ::endwin(); + ::refresh(); + Rect content_bounds = m_window_sp->GetFrame(); + m_window_sp->SetBounds(content_bounds); + if (WindowSP menubar_window_sp = m_window_sp->FindSubWindow("Menubar")) + menubar_window_sp->SetBounds(content_bounds.MakeMenuBar()); + if (WindowSP status_window_sp = m_window_sp->FindSubWindow("Status")) + status_window_sp->SetBounds(content_bounds.MakeStatusBar()); + + WindowSP source_window_sp = m_window_sp->FindSubWindow("Source"); + WindowSP variables_window_sp = m_window_sp->FindSubWindow("Variables"); + WindowSP registers_window_sp = m_window_sp->FindSubWindow("Registers"); + WindowSP threads_window_sp = m_window_sp->FindSubWindow("Threads"); + + Rect threads_bounds; + Rect source_variables_bounds; + content_bounds.VerticalSplitPercentage(0.80, source_variables_bounds, + threads_bounds); + if (threads_window_sp) + threads_window_sp->SetBounds(threads_bounds); + else + source_variables_bounds = content_bounds; + + Rect source_bounds; + Rect variables_registers_bounds; + source_variables_bounds.HorizontalSplitPercentage( + 0.70, source_bounds, variables_registers_bounds); + if (variables_window_sp || registers_window_sp) { + if (variables_window_sp && registers_window_sp) { + Rect variables_bounds; + Rect registers_bounds; + variables_registers_bounds.VerticalSplitPercentage( + 0.50, variables_bounds, registers_bounds); + variables_window_sp->SetBounds(variables_bounds); + registers_window_sp->SetBounds(registers_bounds); + } else if (variables_window_sp) { + variables_window_sp->SetBounds(variables_registers_bounds); + } else { + registers_window_sp->SetBounds(variables_registers_bounds); + } + } else { + source_bounds = source_variables_bounds; + } + + source_window_sp->SetBounds(source_bounds); + + touchwin(stdscr); + redrawwin(m_window_sp->get()); + m_update_screen = true; + } + protected: WindowSP m_window_sp; WindowDelegates m_window_delegates; SCREEN *m_screen; FILE *m_in; FILE *m_out; + bool m_update_screen = false; }; } // namespace curses @@ -3082,7 +3135,7 @@ class ApplicationDelegate : public WindowDelegate, public MenuDelegate { new_registers_rect); registers_window_sp->SetBounds(new_registers_rect); } else { - // No variables window, grab the bottom part of the source window + // No registers window, grab the bottom part of the source window Rect new_source_rect; source_bounds.HorizontalSplitPercentage(0.70, new_source_rect, new_variables_rect); @@ -3133,7 +3186,7 @@ class ApplicationDelegate : public WindowDelegate, public MenuDelegate { new_regs_rect); variables_window_sp->SetBounds(new_vars_rect); } else { - // No registers window, grab the bottom part of the source window + // No variables window, grab the bottom part of the source window Rect new_source_rect; source_bounds.HorizontalSplitPercentage(0.70, new_source_rect, new_regs_rect); @@ -4088,4 +4141,8 @@ bool IOHandlerCursesGUI::Interrupt() { return false; } void IOHandlerCursesGUI::GotEOF() {} +void IOHandlerCursesGUI::TerminalSizeChanged() { + m_app_ap->TerminalSizeChanged(); +} + #endif // LLDB_ENABLE_CURSES From 7a63dc534eb8ebdf1b0aa2c0f6d943d0c7adda1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Sun, 2 Aug 2020 13:18:41 +0200 Subject: [PATCH 416/600] [lldb][gui] implement shift+tab for going back in views Also simplify the code for going forward. Differential Revision: https://reviews.llvm.org/D85089 --- lldb/source/Core/IOHandlerCursesGUI.cpp | 71 ++++++++++++++----------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index ea4dfd12ada4c..2e991dc37be8b 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -692,42 +692,44 @@ class Window { void SelectNextWindowAsActive() { // Move active focus to next window - const size_t num_subwindows = m_subwindows.size(); - if (m_curr_active_window_idx == UINT32_MAX) { - uint32_t idx = 0; - for (auto subwindow_sp : m_subwindows) { - if (subwindow_sp->GetCanBeActive()) { - m_curr_active_window_idx = idx; - break; - } - ++idx; - } - } else if (m_curr_active_window_idx + 1 < num_subwindows) { - bool handled = false; + const int num_subwindows = m_subwindows.size(); + int start_idx = 0; + if (m_curr_active_window_idx != UINT32_MAX) { m_prev_active_window_idx = m_curr_active_window_idx; - for (size_t idx = m_curr_active_window_idx + 1; idx < num_subwindows; - ++idx) { - if (m_subwindows[idx]->GetCanBeActive()) { - m_curr_active_window_idx = idx; - handled = true; - break; - } + start_idx = m_curr_active_window_idx + 1; + } + for (int idx = start_idx; idx < num_subwindows; ++idx) { + if (m_subwindows[idx]->GetCanBeActive()) { + m_curr_active_window_idx = idx; + return; } - if (!handled) { - for (size_t idx = 0; idx <= m_prev_active_window_idx; ++idx) { - if (m_subwindows[idx]->GetCanBeActive()) { - m_curr_active_window_idx = idx; - break; - } - } + } + for (int idx = 0; idx < start_idx; ++idx) { + if (m_subwindows[idx]->GetCanBeActive()) { + m_curr_active_window_idx = idx; + break; } - } else { + } + } + + void SelectPreviousWindowAsActive() { + // Move active focus to previous window + const int num_subwindows = m_subwindows.size(); + int start_idx = num_subwindows - 1; + if (m_curr_active_window_idx != UINT32_MAX) { m_prev_active_window_idx = m_curr_active_window_idx; - for (size_t idx = 0; idx < num_subwindows; ++idx) { - if (m_subwindows[idx]->GetCanBeActive()) { - m_curr_active_window_idx = idx; - break; - } + start_idx = m_curr_active_window_idx - 1; + } + for (int idx = start_idx; idx >= 0; --idx) { + if (m_subwindows[idx]->GetCanBeActive()) { + m_curr_active_window_idx = idx; + return; + } + } + for (int idx = num_subwindows - 1; idx > start_idx; --idx) { + if (m_subwindows[idx]->GetCanBeActive()) { + m_curr_active_window_idx = idx; + break; } } } @@ -2928,6 +2930,10 @@ class ApplicationDelegate : public WindowDelegate, public MenuDelegate { window.SelectNextWindowAsActive(); return eKeyHandled; + case KEY_BTAB: + window.SelectPreviousWindowAsActive(); + return eKeyHandled; + case 'h': window.CreateHelpSubwindow(); return eKeyHandled; @@ -2952,6 +2958,7 @@ class ApplicationDelegate : public WindowDelegate, public MenuDelegate { KeyHelp *WindowDelegateGetKeyHelp() override { static curses::KeyHelp g_source_view_key_help[] = { {'\t', "Select next view"}, + {KEY_BTAB, "Select previous view"}, {'h', "Show help dialog with view specific key bindings"}, {',', "Page up"}, {'.', "Page down"}, From d6868d9ca1dbdeceaaa1660b6e7b4af0c207fcae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Sun, 2 Aug 2020 22:03:21 +0200 Subject: [PATCH 417/600] [lldb][gui] implement breakpoint removal on breakpoint toggling It says it toggles breakpoints, so if one already exists on the selected location, remove it instead of adding. Differential Revision: https://reviews.llvm.org/D85098 --- lldb/source/Core/IOHandlerCursesGUI.cpp | 111 +++++++++++++++++------- 1 file changed, 80 insertions(+), 31 deletions(-) diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 2e991dc37be8b..37e24d4f7533f 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -3837,37 +3837,7 @@ class SourceFileWindowDelegate : public WindowDelegate { return eKeyHandled; case 'b': // 'b' == toggle breakpoint on currently selected line - if (m_selected_line < GetNumSourceLines()) { - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasTargetScope()) { - BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( - nullptr, // Don't limit the breakpoint to certain modules - m_file_sp->GetFileSpec(), // Source file - m_selected_line + - 1, // Source line number (m_selected_line is zero based) - 0, // No column specified. - 0, // No offset - eLazyBoolCalculate, // Check inlines using global setting - eLazyBoolCalculate, // Skip prologue using global setting, - false, // internal - false, // request_hardware - eLazyBoolCalculate); // move_to_nearest_code - } - } else if (m_selected_line < GetNumDisassemblyLines()) { - const Instruction *inst = m_disassembly_sp->GetInstructionList() - .GetInstructionAtIndex(m_selected_line) - .get(); - ExecutionContext exe_ctx = - m_debugger.GetCommandInterpreter().GetExecutionContext(); - if (exe_ctx.HasTargetScope()) { - Address addr = inst->GetAddress(); - BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( - addr, // lldb_private::Address - false, // internal - false); // request_hardware - } - } + ToggleBreakpointOnSelectedLine(); return eKeyHandled; case 'D': // 'D' == detach and keep stopped @@ -3957,6 +3927,85 @@ class SourceFileWindowDelegate : public WindowDelegate { return eKeyNotHandled; } + void ToggleBreakpointOnSelectedLine() { + ExecutionContext exe_ctx = + m_debugger.GetCommandInterpreter().GetExecutionContext(); + if (!exe_ctx.HasTargetScope()) + return; + if (GetNumSourceLines() > 0) { + // Source file breakpoint. + BreakpointList &bp_list = exe_ctx.GetTargetRef().GetBreakpointList(); + const size_t num_bps = bp_list.GetSize(); + for (size_t bp_idx = 0; bp_idx < num_bps; ++bp_idx) { + BreakpointSP bp_sp = bp_list.GetBreakpointAtIndex(bp_idx); + const size_t num_bps_locs = bp_sp->GetNumLocations(); + for (size_t bp_loc_idx = 0; bp_loc_idx < num_bps_locs; ++bp_loc_idx) { + BreakpointLocationSP bp_loc_sp = + bp_sp->GetLocationAtIndex(bp_loc_idx); + LineEntry bp_loc_line_entry; + if (bp_loc_sp->GetAddress().CalculateSymbolContextLineEntry( + bp_loc_line_entry)) { + if (m_file_sp->GetFileSpec() == bp_loc_line_entry.file && + m_selected_line + 1 == bp_loc_line_entry.line) { + bool removed = + exe_ctx.GetTargetRef().RemoveBreakpointByID(bp_sp->GetID()); + assert(removed); + UNUSED_IF_ASSERT_DISABLED(removed); + return; // Existing breakpoint removed. + } + } + } + } + // No breakpoint found on the location, add it. + BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( + nullptr, // Don't limit the breakpoint to certain modules + m_file_sp->GetFileSpec(), // Source file + m_selected_line + + 1, // Source line number (m_selected_line is zero based) + 0, // No column specified. + 0, // No offset + eLazyBoolCalculate, // Check inlines using global setting + eLazyBoolCalculate, // Skip prologue using global setting, + false, // internal + false, // request_hardware + eLazyBoolCalculate); // move_to_nearest_code + } else { + // Disassembly breakpoint. + assert(GetNumDisassemblyLines() > 0); + assert(m_selected_line < GetNumDisassemblyLines()); + const Instruction *inst = m_disassembly_sp->GetInstructionList() + .GetInstructionAtIndex(m_selected_line) + .get(); + Address addr = inst->GetAddress(); + // Try to find it. + BreakpointList &bp_list = exe_ctx.GetTargetRef().GetBreakpointList(); + const size_t num_bps = bp_list.GetSize(); + for (size_t bp_idx = 0; bp_idx < num_bps; ++bp_idx) { + BreakpointSP bp_sp = bp_list.GetBreakpointAtIndex(bp_idx); + const size_t num_bps_locs = bp_sp->GetNumLocations(); + for (size_t bp_loc_idx = 0; bp_loc_idx < num_bps_locs; ++bp_loc_idx) { + BreakpointLocationSP bp_loc_sp = + bp_sp->GetLocationAtIndex(bp_loc_idx); + LineEntry bp_loc_line_entry; + const lldb::addr_t file_addr = + bp_loc_sp->GetAddress().GetFileAddress(); + if (file_addr == addr.GetFileAddress()) { + bool removed = + exe_ctx.GetTargetRef().RemoveBreakpointByID(bp_sp->GetID()); + assert(removed); + UNUSED_IF_ASSERT_DISABLED(removed); + return; // Existing breakpoint removed. + } + } + } + // No breakpoint found on the address, add it. + BreakpointSP bp_sp = + exe_ctx.GetTargetRef().CreateBreakpoint(addr, // lldb_private::Address + false, // internal + false); // request_hardware + } + } + protected: typedef std::set BreakpointLines; typedef std::set BreakpointAddrs; From c7be982c836cdaf7b1ef303d903e6a3de2eb4a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Mon, 3 Aug 2020 07:09:03 +0200 Subject: [PATCH 418/600] [lldb][gui] move TestGuiBasicDebug.py to lldb/test and update it Between the time it was created and it was pushed upstream, 99451b4453688a94c6014cac233d371ab4cc342d has moved the existing gui gui tests to lldb/test, so move this one too. And update it to contain TestGuiBasic.py changes since the time when it was based on that test. Differential Revision: https://reviews.llvm.org/D85106 --- .../test => test/API}/commands/gui/basicdebug/Makefile | 0 .../API}/commands/gui/basicdebug/TestGuiBasicDebug.py | 8 ++++++++ .../test => test/API}/commands/gui/basicdebug/func.c | 0 .../test => test/API}/commands/gui/basicdebug/main.c | 0 4 files changed, 8 insertions(+) rename lldb/{packages/Python/lldbsuite/test => test/API}/commands/gui/basicdebug/Makefile (100%) rename lldb/{packages/Python/lldbsuite/test => test/API}/commands/gui/basicdebug/TestGuiBasicDebug.py (83%) rename lldb/{packages/Python/lldbsuite/test => test/API}/commands/gui/basicdebug/func.c (100%) rename lldb/{packages/Python/lldbsuite/test => test/API}/commands/gui/basicdebug/main.c (100%) diff --git a/lldb/packages/Python/lldbsuite/test/commands/gui/basicdebug/Makefile b/lldb/test/API/commands/gui/basicdebug/Makefile similarity index 100% rename from lldb/packages/Python/lldbsuite/test/commands/gui/basicdebug/Makefile rename to lldb/test/API/commands/gui/basicdebug/Makefile diff --git a/lldb/packages/Python/lldbsuite/test/commands/gui/basicdebug/TestGuiBasicDebug.py b/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py similarity index 83% rename from lldb/packages/Python/lldbsuite/test/commands/gui/basicdebug/TestGuiBasicDebug.py rename to lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py index 54c763fd2afc2..76d9d3bdc4638 100644 --- a/lldb/packages/Python/lldbsuite/test/commands/gui/basicdebug/TestGuiBasicDebug.py +++ b/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py @@ -11,7 +11,11 @@ class TestGuiBasicDebugCommandTest(PExpectTest): mydir = TestBase.compute_mydir(__file__) + # PExpect uses many timeouts internally and doesn't play well + # under ASAN on a loaded machine.. + @skipIfAsan @skipIfCursesSupportMissing + @skipIfRemote # "run" command will not work correctly for remote debug def test_gui(self): self.build() @@ -39,4 +43,8 @@ def test_gui(self): self.child.send("n") # step over self.child.expect("return 0;[^\r\n]+<<< Thread 1: step over") + # Press escape to quit the gui + self.child.send(escape_key) + + self.expect_prompt() self.quit() diff --git a/lldb/packages/Python/lldbsuite/test/commands/gui/basicdebug/func.c b/lldb/test/API/commands/gui/basicdebug/func.c similarity index 100% rename from lldb/packages/Python/lldbsuite/test/commands/gui/basicdebug/func.c rename to lldb/test/API/commands/gui/basicdebug/func.c diff --git a/lldb/packages/Python/lldbsuite/test/commands/gui/basicdebug/main.c b/lldb/test/API/commands/gui/basicdebug/main.c similarity index 100% rename from lldb/packages/Python/lldbsuite/test/commands/gui/basicdebug/main.c rename to lldb/test/API/commands/gui/basicdebug/main.c From db828aba55aca0ce977f086dcd449f8fe667f30a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Mon, 3 Aug 2020 08:32:16 +0200 Subject: [PATCH 419/600] [lldb][gui] add a test for 'b' (toggle breakpoint) Differential Revision: https://reviews.llvm.org/D85107 --- .../API/commands/gui/breakpoints/Makefile | 2 + .../gui/breakpoints/TestGuiBreakpoints.py | 75 +++++++++++++++++++ lldb/test/API/commands/gui/breakpoints/main.c | 6 ++ 3 files changed, 83 insertions(+) create mode 100644 lldb/test/API/commands/gui/breakpoints/Makefile create mode 100644 lldb/test/API/commands/gui/breakpoints/TestGuiBreakpoints.py create mode 100644 lldb/test/API/commands/gui/breakpoints/main.c diff --git a/lldb/test/API/commands/gui/breakpoints/Makefile b/lldb/test/API/commands/gui/breakpoints/Makefile new file mode 100644 index 0000000000000..c9319d6e6888a --- /dev/null +++ b/lldb/test/API/commands/gui/breakpoints/Makefile @@ -0,0 +1,2 @@ +C_SOURCES := main.c +include Makefile.rules diff --git a/lldb/test/API/commands/gui/breakpoints/TestGuiBreakpoints.py b/lldb/test/API/commands/gui/breakpoints/TestGuiBreakpoints.py new file mode 100644 index 0000000000000..13f5e1380a743 --- /dev/null +++ b/lldb/test/API/commands/gui/breakpoints/TestGuiBreakpoints.py @@ -0,0 +1,75 @@ +""" +Test the 'gui' shortcut 'b' (toggle breakpoint). +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test.lldbpexpect import PExpectTest + +class TestGuiBasicDebugCommandTest(PExpectTest): + + mydir = TestBase.compute_mydir(__file__) + + # PExpect uses many timeouts internally and doesn't play well + # under ASAN on a loaded machine.. + @skipIfAsan + @skipIfCursesSupportMissing + @skipIfRemote # "run" command will not work correctly for remote debug + def test_gui(self): + self.build() + + self.launch(executable=self.getBuildArtifact("a.out"), dimensions=(100,500)) + self.expect('br set -o true -f main.c -p "// First break here"', substrs=["Breakpoint 1", "address ="]) + self.expect("run", substrs=["stop reason ="]) + + self.child.sendline("breakpoint list") + self.child.expect_exact("No breakpoints currently set.") + + escape_key = chr(27).encode() + down_key = chr(27)+'OB' # for vt100 terminal (lldbexpect sets TERM=vt100) + + # Start the GUI and close the welcome window. + self.child.sendline("gui") + self.child.send(escape_key) + self.child.expect_exact("Sources") # wait for gui + + # Go to next line, set a breakpoint. + self.child.send(down_key) + self.child.send('b') + self.child.send(escape_key) + self.expect_prompt() + self.child.sendline("breakpoint list") + self.child.expect("2: file = '[^']*main.c', line = 3,.*") + self.child.sendline("gui") + self.child.expect_exact("Sources") + + # Go two lines down ("gui" resets position), set a breakpoint. + self.child.send(down_key) + self.child.send(down_key) + self.child.send('b') + self.child.send(escape_key) + self.expect_prompt() + self.child.sendline("breakpoint list") + self.child.expect("2: file = '[^']*main.c', line = 3,") + self.child.expect("3: file = '[^']*main.c', line = 4,") + self.child.sendline("gui") + self.child.expect_exact("Sources") + + # Toggle both the breakpoints (remove them). + self.child.send(down_key) + self.child.send('b') + self.child.send(down_key) + self.child.send('b') + self.child.send(escape_key) + self.expect_prompt() + self.child.sendline("breakpoint list") + self.child.expect_exact("No breakpoints currently set.") + self.child.sendline("gui") + self.child.expect_exact("Sources") + + # Press escape to quit the gui + self.child.send(escape_key) + + self.expect_prompt() + self.quit() diff --git a/lldb/test/API/commands/gui/breakpoints/main.c b/lldb/test/API/commands/gui/breakpoints/main.c new file mode 100644 index 0000000000000..61a0843482a4a --- /dev/null +++ b/lldb/test/API/commands/gui/breakpoints/main.c @@ -0,0 +1,6 @@ +int main(int argc, char **argv) { + int var1 = 1; // First break here + int var2 = 2; + int var3 = 3; + return var1 + var2 + var3; +} From 621681e3e59241c2ba9a4ac59047c46cdcc3c947 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Sat, 1 Aug 2020 20:53:27 +0100 Subject: [PATCH 420/600] [Flang] Fix multi-config generator builds Based on https://reviews.llvm.org/D84022 with additional changes to maintain out-of-tree builds. Original commit message: Currently the binaries are output directly into the bin subdirectory of the build directory. This doesn't work correctly with multi-config generators which should output the binaries into /bin instead. The original patch was implemented by David Truby and the additional changes added here were also proposed by David Truby. Differential Revision: https://reviews.llvm.org/D85078/ Co-authored-by: David Truby --- flang/CMakeLists.txt | 13 ++++++++++--- flang/test/CMakeLists.txt | 4 ++++ flang/test/lit.cfg.py | 21 +++++++++++++-------- flang/test/lit.site.cfg.py.in | 1 + flang/tools/f18/CMakeLists.txt | 20 ++++++++++++++++++-- 5 files changed, 46 insertions(+), 13 deletions(-) diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index f1aaa5c6473fe..0e3228bff3d84 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -180,7 +180,12 @@ else() ${LLVM_INCLUDE_TESTS}) set(FLANG_GTEST_AVAIL 1) - set(FLANG_BINARY_DIR ${CMAKE_BINARY_DIR}/tools/flang) + if(FLANG_STANDALONE_BUILD) + set(FLANG_BINARY_DIR ${CMAKE_BINARY_DIR}/tools/flang) + else() + set(FLANG_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + endif() + set(BACKEND_PACKAGE_STRING "${PACKAGE_STRING}") if (LINK_WITH_FIR) set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --src-root @@ -194,8 +199,10 @@ endif() if(LINK_WITH_FIR) # tco tool and FIR lib output directories - set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/bin) - set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/lib) + if(FLANG_STANDALONE_BUILD) + set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/bin) + set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/lib) + endif() # Always build tco tool set(LLVM_BUILD_TOOLS ON) message(STATUS "Linking driver with FIR and LLVM") diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 7da1d94d84c4e..a1532dc7141ff 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -1,6 +1,10 @@ # Test runner infrastructure for Flang. This configures the Flang test trees # for use by Lit, and delegates to LLVM's lit test handlers. +llvm_canonicalize_cmake_booleans( + FLANG_STANDALONE_BUILD +) + set(FLANG_INTRINSIC_MODULES_DIR ${FLANG_BINARY_DIR}/include/flang) set(FLANG_TOOLS_DIR ${FLANG_BINARY_DIR}/bin) diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 8ad5a9b6357f9..25c63890832fe 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -25,9 +25,9 @@ config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) # suffixes: A list of file extensions to treat as test files. -config.suffixes = ['.f', '.F', '.ff','.FOR', '.for', '.f77', '.f90', '.F90', +config.suffixes = ['.f', '.F', '.ff', '.FOR', '.for', '.f77', '.f90', '.F90', '.ff90', '.f95', '.F95', '.ff95', '.fpp', '.FPP', '.cuf', - '.CUF', '.f18', '.F18', '.fir' ] + '.CUF', '.f18', '.F18', '.fir'] config.substitutions.append(('%PATH%', config.environment['PATH'])) @@ -48,11 +48,12 @@ llvm_config.with_environment('PATH', config.flang_tools_dir, append_path=True) llvm_config.with_environment('PATH', config.llvm_tools_dir, append_path=True) -# For builds with FIR, set path for tco and enable related tests -if config.flang_llvm_tools_dir != "" : - config.available_features.add('fir') - if config.llvm_tools_dir != config.flang_llvm_tools_dir : - llvm_config.with_environment('PATH', config.flang_llvm_tools_dir, append_path=True) +if config.flang_standalone_build: + # For builds with FIR, set path for tco and enable related tests + if config.flang_llvm_tools_dir != "": + config.available_features.add('fir') + if config.llvm_tools_dir != config.flang_llvm_tools_dir: + llvm_config.with_environment('PATH', config.flang_llvm_tools_dir, append_path=True) # For each occurrence of a flang tool name, replace it with the full path to # the build directory holding that tool. @@ -61,7 +62,11 @@ extra_args=["-intrinsic-module-directory "+config.flang_intrinsic_modules_dir], unresolved='fatal') ] -llvm_config.add_tool_substitutions(tools, [config.flang_llvm_tools_dir]) + +if config.flang_standalone_build: + llvm_config.add_tool_substitutions(tools, [config.flang_llvm_tools_dir]) +else: + llvm_config.add_tool_substitutions(tools, config.llvm_tools_dir) # Enable libpgmath testing result = lit_config.params.get("LIBPGMATH") diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index e8e2945a2cbf0..10ec132081544 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -9,6 +9,7 @@ config.flang_tools_dir = "@FLANG_TOOLS_DIR@" config.flang_intrinsic_modules_dir = "@FLANG_INTRINSIC_MODULES_DIR@" config.flang_llvm_tools_dir = "@CMAKE_BINARY_DIR@/bin" config.python_executable = "@PYTHON_EXECUTABLE@" +config.flang_standalone_build = @FLANG_STANDALONE_BUILD@ # Support substitution of the tools_dir with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 46c38fa43a2e5..6103117123ee8 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -59,8 +59,24 @@ add_custom_target(module_files ALL DEPENDS ${MODULE_FILES}) install(TARGETS f18 DESTINATION bin) set(FLANG_INTRINSIC_MODULES_DIR ${FLANG_BINARY_DIR}/include/flang) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${CMAKE_BINARY_DIR}/tools/flang/bin/flang @ONLY) -file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) + +# This flang shell script will only work in a POSIX shell. +if (NOT WIN32) + if (FLANG_STANDALONE_BUILD) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${CMAKE_BINARY_DIR}/tools/flang/bin/flang @ONLY) + file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) + else() + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in + ${CMAKE_CURRENT_BINARY_DIR}/tools/flang/bin/flang @ONLY) + add_custom_command(TARGET f18 + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_BINARY_DIR}/tools/flang/bin/flang + ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang + COMMAND chmod +x ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang) + endif() +endif() + # The flang script to be installed needs a different path to the headers. set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_INSTALL_PREFIX}/include/flang) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${FLANG_BINARY_DIR}/bin/flang-install.sh @ONLY) From 04cf4a5a65576f286ca2e8a1aae7584def512761 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 24 Jul 2020 12:05:46 +0100 Subject: [PATCH 421/600] [AMDGPU] Lower frem f16 Without this it would fail to select on subtargets that have 16-bit instructions. Differential Revision: https://reviews.llvm.org/D84517 --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 + llvm/test/CodeGen/AMDGPU/frem.ll | 667 ++++++++++++++++++ 2 files changed, 668 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a697df5553b73..14e05507218e3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -320,6 +320,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); + setOperationAction(ISD::FREM, MVT::f16, Custom); setOperationAction(ISD::FREM, MVT::f32, Custom); setOperationAction(ISD::FREM, MVT::f64, Custom); diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 0414384dabe49..09c9716024c23 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -3,6 +3,215 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -enable-misched=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -enable-misched=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +define amdgpu_kernel void @frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, +; SI-LABEL: frem_f16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:8 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 +; SI-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; SI-NEXT: v_rcp_f32_e32 v4, v3 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; SI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; SI-NEXT: v_fma_f32 v4, v5, v4, v4 +; SI-NEXT: v_mul_f32_e32 v5, v2, v4 +; SI-NEXT: v_fma_f32 v6, -v3, v5, v2 +; SI-NEXT: v_fma_f32 v5, v6, v4, v5 +; SI-NEXT: v_fma_f32 v2, -v3, v5, v2 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; SI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; SI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: frem_f16: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s11, 0xf000 +; CI-NEXT: s_mov_b32 s10, -1 +; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s4 +; CI-NEXT: s_mov_b32 s9, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s3, s11 +; CI-NEXT: s_mov_b32 s6, s10 +; CI-NEXT: s_mov_b32 s7, s11 +; CI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; CI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:8 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; CI-NEXT: v_div_scale_f32 v3, s[0:1], v1, v1, v0 +; CI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 +; CI-NEXT: v_rcp_f32_e32 v4, v3 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; CI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; CI-NEXT: v_fma_f32 v4, v5, v4, v4 +; CI-NEXT: v_mul_f32_e32 v5, v2, v4 +; CI-NEXT: v_fma_f32 v6, -v3, v5, v2 +; CI-NEXT: v_fma_f32 v5, v6, v4, v5 +; CI-NEXT: v_fma_f32 v2, -v3, v5, v2 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; CI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; CI-NEXT: v_trunc_f32_e32 v2, v2 +; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: frem_f16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: s_add_u32 s0, s0, 8 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: flat_load_ushort v4, v[2:3] +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_load_ushort v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; VI-NEXT: v_cvt_f32_f16_e32 v3, v4 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; VI-NEXT: v_rcp_f32_e32 v5, v5 +; VI-NEXT: v_mul_f32_e32 v3, v3, v5 +; VI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; VI-NEXT: v_div_fixup_f16 v3, v3, v2, v4 +; VI-NEXT: v_trunc_f16_e32 v3, v3 +; VI-NEXT: v_fma_f16 v2, -v3, v2, v4 +; VI-NEXT: flat_store_short v[0:1], v2 +; VI-NEXT: s_endpgm + half addrspace(1)* %in2) #0 { + %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4 + %r0 = load half, half addrspace(1)* %in1, align 4 + %r1 = load half, half addrspace(1)* %gep2, align 4 + %r2 = frem half %r0, %r1 + store half %r2, half addrspace(1)* %out, align 4 + ret void +} + +define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, +; SI-LABEL: unsafe_frem_f16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:8 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_rcp_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: unsafe_frem_f16: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s11, 0xf000 +; CI-NEXT: s_mov_b32 s10, -1 +; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_mov_b32 s3, s11 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:8 +; CI-NEXT: s_mov_b32 s8, s4 +; CI-NEXT: s_mov_b32 s9, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s10 +; CI-NEXT: s_mov_b32 s7, s11 +; CI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; CI-NEXT: v_rcp_f32_e32 v2, v1 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_mul_f32_e32 v2, v0, v2 +; CI-NEXT: v_trunc_f32_e32 v2, v2 +; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: unsafe_frem_f16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: s_add_u32 s0, s0, 8 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: flat_load_ushort v4, v[2:3] +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_load_ushort v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_rcp_f16_e32 v3, v2 +; VI-NEXT: v_mul_f16_e32 v3, v4, v3 +; VI-NEXT: v_trunc_f16_e32 v3, v3 +; VI-NEXT: v_fma_f16 v2, -v3, v2, v4 +; VI-NEXT: flat_store_short v[0:1], v2 +; VI-NEXT: s_endpgm + half addrspace(1)* %in2) #1 { + %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4 + %r0 = load half, half addrspace(1)* %in1, align 4 + %r1 = load half, half addrspace(1)* %gep2, align 4 + %r2 = frem half %r0, %r1 + store half %r2, half addrspace(1)* %out, align 4 + ret void +} + define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ; SI-LABEL: frem_f32: ; SI: ; %bb.0: @@ -422,6 +631,464 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add ret void } +define amdgpu_kernel void @frem_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1, +; SI-LABEL: frem_v2f16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:16 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v3, v2 +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NEXT: v_div_scale_f32 v4, vcc, v0, v2, v0 +; SI-NEXT: v_div_scale_f32 v5, s[4:5], v2, v2, v0 +; SI-NEXT: v_rcp_f32_e32 v6, v5 +; SI-NEXT: s_mov_b32 s6, 3 +; SI-NEXT: s_mov_b32 s7, 0 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v7, -v5, v6, 1.0 +; SI-NEXT: v_fma_f32 v6, v7, v6, v6 +; SI-NEXT: v_mul_f32_e32 v7, v4, v6 +; SI-NEXT: v_fma_f32 v8, -v5, v7, v4 +; SI-NEXT: v_fma_f32 v7, v8, v6, v7 +; SI-NEXT: v_fma_f32 v4, -v5, v7, v4 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v4, v4, v6, v7 +; SI-NEXT: v_div_fixup_f32 v4, v4, v2, v0 +; SI-NEXT: v_trunc_f32_e32 v4, v4 +; SI-NEXT: v_fma_f32 v0, -v4, v2, v0 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_div_scale_f32 v2, vcc, v1, v3, v1 +; SI-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v1 +; SI-NEXT: v_rcp_f32_e32 v5, v4 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v6, -v4, v5, 1.0 +; SI-NEXT: v_fma_f32 v5, v6, v5, v5 +; SI-NEXT: v_mul_f32_e32 v6, v2, v5 +; SI-NEXT: v_fma_f32 v7, -v4, v6, v2 +; SI-NEXT: v_fma_f32 v6, v7, v5, v6 +; SI-NEXT: v_fma_f32 v2, -v4, v6, v2 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v2, v2, v5, v6 +; SI-NEXT: v_div_fixup_f32 v2, v2, v3, v1 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v1, -v2, v3, v1 +; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: frem_v2f16: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: s_mov_b32 s10, s2 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s0, s4 +; CI-NEXT: s_mov_b32 s1, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s11, s3 +; CI-NEXT: s_mov_b32 s6, s2 +; CI-NEXT: s_mov_b32 s7, s3 +; CI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; CI-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:16 +; CI-NEXT: s_mov_b32 s6, 3 +; CI-NEXT: s_mov_b32 s7, 0 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f32_f16_e32 v3, v2 +; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_div_scale_f32 v5, s[4:5], v2, v2, v0 +; CI-NEXT: v_div_scale_f32 v4, vcc, v0, v2, v0 +; CI-NEXT: v_rcp_f32_e32 v6, v5 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v7, -v5, v6, 1.0 +; CI-NEXT: v_fma_f32 v6, v7, v6, v6 +; CI-NEXT: v_mul_f32_e32 v7, v4, v6 +; CI-NEXT: v_fma_f32 v8, -v5, v7, v4 +; CI-NEXT: v_fma_f32 v7, v8, v6, v7 +; CI-NEXT: v_fma_f32 v4, -v5, v7, v4 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v4, v4, v6, v7 +; CI-NEXT: v_div_fixup_f32 v4, v4, v2, v0 +; CI-NEXT: v_trunc_f32_e32 v4, v4 +; CI-NEXT: v_fma_f32 v0, -v4, v2, v0 +; CI-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, v1 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: v_div_scale_f32 v2, vcc, v1, v3, v1 +; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; CI-NEXT: v_rcp_f32_e32 v5, v4 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v6, -v4, v5, 1.0 +; CI-NEXT: v_fma_f32 v5, v6, v5, v5 +; CI-NEXT: v_mul_f32_e32 v6, v2, v5 +; CI-NEXT: v_fma_f32 v7, -v4, v6, v2 +; CI-NEXT: v_fma_f32 v6, v7, v5, v6 +; CI-NEXT: v_fma_f32 v2, -v4, v6, v2 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v2, v2, v5, v6 +; CI-NEXT: v_div_fixup_f32 v2, v2, v3, v1 +; CI-NEXT: v_trunc_f32_e32 v2, v2 +; CI-NEXT: v_fma_f32 v1, -v2, v3, v1 +; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CI-NEXT: v_or_b32_e32 v0, v1, v0 +; CI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: frem_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: s_add_u32 s0, s0, 16 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: flat_load_dword v4, v[2:3] +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; VI-NEXT: v_cvt_f32_f16_e32 v5, v3 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; VI-NEXT: v_cvt_f32_f16_e32 v7, v6 +; VI-NEXT: v_rcp_f32_e32 v7, v7 +; VI-NEXT: v_mul_f32_e32 v5, v5, v7 +; VI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; VI-NEXT: v_div_fixup_f16 v5, v5, v6, v3 +; VI-NEXT: v_trunc_f16_e32 v5, v5 +; VI-NEXT: v_fma_f16 v3, -v5, v6, v3 +; VI-NEXT: v_cvt_f32_f16_e32 v6, v2 +; VI-NEXT: v_cvt_f32_f16_e32 v5, v4 +; VI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; VI-NEXT: v_rcp_f32_e32 v6, v6 +; VI-NEXT: v_mul_f32_e32 v5, v5, v6 +; VI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; VI-NEXT: v_div_fixup_f16 v5, v5, v2, v4 +; VI-NEXT: v_trunc_f16_e32 v5, v5 +; VI-NEXT: v_fma_f16 v2, -v5, v2, v4 +; VI-NEXT: v_or_b32_e32 v2, v2, v3 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm + <2 x half> addrspace(1)* %in2) #0 { + %gep2 = getelementptr <2 x half>, <2 x half> addrspace(1)* %in2, i32 4 + %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1, align 8 + %r1 = load <2 x half>, <2 x half> addrspace(1)* %gep2, align 8 + %r2 = frem <2 x half> %r0, %r1 + store <2 x half> %r2, <2 x half> addrspace(1)* %out, align 8 + ret void +} + +define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in1, +; SI-LABEL: frem_v4f16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; SI-NEXT: v_cvt_f32_f16_e32 v5, v0 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 offset:32 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_div_scale_f32 v8, vcc, v5, v1, v5 +; SI-NEXT: v_div_scale_f32 v9, s[4:5], v1, v1, v5 +; SI-NEXT: v_rcp_f32_e32 v10, v9 +; SI-NEXT: s_mov_b32 s6, 3 +; SI-NEXT: s_mov_b32 s7, 0 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v11, -v9, v10, 1.0 +; SI-NEXT: v_fma_f32 v10, v11, v10, v10 +; SI-NEXT: v_mul_f32_e32 v11, v8, v10 +; SI-NEXT: v_fma_f32 v12, -v9, v11, v8 +; SI-NEXT: v_fma_f32 v11, v12, v10, v11 +; SI-NEXT: v_fma_f32 v8, -v9, v11, v8 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v8, v8, v10, v11 +; SI-NEXT: v_div_fixup_f32 v8, v8, v1, v5 +; SI-NEXT: v_trunc_f32_e32 v8, v8 +; SI-NEXT: v_fma_f32 v1, -v8, v1, v5 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-NEXT: v_div_scale_f32 v5, vcc, v4, v7, v4 +; SI-NEXT: v_div_scale_f32 v8, s[4:5], v7, v7, v4 +; SI-NEXT: v_rcp_f32_e32 v9, v8 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v10, -v8, v9, 1.0 +; SI-NEXT: v_fma_f32 v9, v10, v9, v9 +; SI-NEXT: v_mul_f32_e32 v10, v5, v9 +; SI-NEXT: v_fma_f32 v11, -v8, v10, v5 +; SI-NEXT: v_fma_f32 v10, v11, v9, v10 +; SI-NEXT: v_fma_f32 v5, -v8, v10, v5 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v5, v5, v9, v10 +; SI-NEXT: v_div_fixup_f32 v5, v5, v7, v4 +; SI-NEXT: v_trunc_f32_e32 v5, v5 +; SI-NEXT: v_fma_f32 v4, -v5, v7, v4 +; SI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SI-NEXT: v_or_b32_e32 v1, v4, v1 +; SI-NEXT: v_div_scale_f32 v4, vcc, v3, v0, v3 +; SI-NEXT: v_div_scale_f32 v5, s[4:5], v0, v0, v3 +; SI-NEXT: v_rcp_f32_e32 v7, v5 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v8, -v5, v7, 1.0 +; SI-NEXT: v_fma_f32 v7, v8, v7, v7 +; SI-NEXT: v_mul_f32_e32 v8, v4, v7 +; SI-NEXT: v_fma_f32 v9, -v5, v8, v4 +; SI-NEXT: v_fma_f32 v8, v9, v7, v8 +; SI-NEXT: v_fma_f32 v4, -v5, v8, v4 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v4, v4, v7, v8 +; SI-NEXT: v_div_fixup_f32 v4, v4, v0, v3 +; SI-NEXT: v_trunc_f32_e32 v4, v4 +; SI-NEXT: v_fma_f32 v0, -v4, v0, v3 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_div_scale_f32 v3, vcc, v2, v6, v2 +; SI-NEXT: v_div_scale_f32 v4, s[4:5], v6, v6, v2 +; SI-NEXT: v_rcp_f32_e32 v5, v4 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; SI-NEXT: v_fma_f32 v7, -v4, v5, 1.0 +; SI-NEXT: v_fma_f32 v5, v7, v5, v5 +; SI-NEXT: v_mul_f32_e32 v7, v3, v5 +; SI-NEXT: v_fma_f32 v8, -v4, v7, v3 +; SI-NEXT: v_fma_f32 v7, v8, v5, v7 +; SI-NEXT: v_fma_f32 v3, -v4, v7, v3 +; SI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; SI-NEXT: v_div_fmas_f32 v3, v3, v5, v7 +; SI-NEXT: v_div_fixup_f32 v3, v3, v6, v2 +; SI-NEXT: v_trunc_f32_e32 v3, v3 +; SI-NEXT: v_fma_f32 v2, -v3, v6, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NEXT: v_or_b32_e32 v0, v2, v0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: frem_v4f16: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s3, 0xf000 +; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: s_mov_b32 s10, s2 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s0, s4 +; CI-NEXT: s_mov_b32 s1, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s2 +; CI-NEXT: s_mov_b32 s7, s3 +; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; CI-NEXT: s_mov_b32 s11, s3 +; CI-NEXT: s_mov_b32 s6, 3 +; CI-NEXT: s_mov_b32 s7, 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v1 +; CI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; CI-NEXT: v_cvt_f32_f16_e32 v5, v0 +; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 offset:32 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; CI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_div_scale_f32 v9, s[4:5], v1, v1, v5 +; CI-NEXT: v_div_scale_f32 v8, vcc, v5, v1, v5 +; CI-NEXT: v_rcp_f32_e32 v10, v9 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v11, -v9, v10, 1.0 +; CI-NEXT: v_fma_f32 v10, v11, v10, v10 +; CI-NEXT: v_mul_f32_e32 v11, v8, v10 +; CI-NEXT: v_fma_f32 v12, -v9, v11, v8 +; CI-NEXT: v_fma_f32 v11, v12, v10, v11 +; CI-NEXT: v_fma_f32 v8, -v9, v11, v8 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v8, v8, v10, v11 +; CI-NEXT: v_div_fixup_f32 v8, v8, v1, v5 +; CI-NEXT: v_trunc_f32_e32 v8, v8 +; CI-NEXT: v_fma_f32 v1, -v8, v1, v5 +; CI-NEXT: v_div_scale_f32 v8, s[4:5], v7, v7, v4 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; CI-NEXT: v_div_scale_f32 v5, vcc, v4, v7, v4 +; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; CI-NEXT: v_rcp_f32_e32 v9, v8 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v10, -v8, v9, 1.0 +; CI-NEXT: v_fma_f32 v9, v10, v9, v9 +; CI-NEXT: v_mul_f32_e32 v10, v5, v9 +; CI-NEXT: v_fma_f32 v11, -v8, v10, v5 +; CI-NEXT: v_fma_f32 v10, v11, v9, v10 +; CI-NEXT: v_fma_f32 v5, -v8, v10, v5 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v5, v5, v9, v10 +; CI-NEXT: v_div_fixup_f32 v5, v5, v7, v4 +; CI-NEXT: v_trunc_f32_e32 v5, v5 +; CI-NEXT: v_fma_f32 v4, -v5, v7, v4 +; CI-NEXT: v_div_scale_f32 v5, s[4:5], v0, v0, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; CI-NEXT: v_or_b32_e32 v1, v4, v1 +; CI-NEXT: v_div_scale_f32 v4, vcc, v3, v0, v3 +; CI-NEXT: v_rcp_f32_e32 v7, v5 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v8, -v5, v7, 1.0 +; CI-NEXT: v_fma_f32 v7, v8, v7, v7 +; CI-NEXT: v_mul_f32_e32 v8, v4, v7 +; CI-NEXT: v_fma_f32 v9, -v5, v8, v4 +; CI-NEXT: v_fma_f32 v8, v9, v7, v8 +; CI-NEXT: v_fma_f32 v4, -v5, v8, v4 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v4, v4, v7, v8 +; CI-NEXT: v_div_fixup_f32 v4, v4, v0, v3 +; CI-NEXT: v_trunc_f32_e32 v4, v4 +; CI-NEXT: v_fma_f32 v0, -v4, v0, v3 +; CI-NEXT: v_div_scale_f32 v4, s[4:5], v6, v6, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: v_div_scale_f32 v3, vcc, v2, v6, v2 +; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; CI-NEXT: v_rcp_f32_e32 v5, v4 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s6 +; CI-NEXT: v_fma_f32 v7, -v4, v5, 1.0 +; CI-NEXT: v_fma_f32 v5, v7, v5, v5 +; CI-NEXT: v_mul_f32_e32 v7, v3, v5 +; CI-NEXT: v_fma_f32 v8, -v4, v7, v3 +; CI-NEXT: v_fma_f32 v7, v8, v5, v7 +; CI-NEXT: v_fma_f32 v3, -v4, v7, v3 +; CI-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s7 +; CI-NEXT: v_div_fmas_f32 v3, v3, v5, v7 +; CI-NEXT: v_div_fixup_f32 v3, v3, v6, v2 +; CI-NEXT: v_trunc_f32_e32 v3, v3 +; CI-NEXT: v_fma_f32 v2, -v3, v6, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_or_b32_e32 v0, v2, v0 +; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: frem_v4f16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: s_add_u32 s0, s0, 32 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: flat_load_dwordx2 v[4:5], v[4:5] +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v5 +; VI-NEXT: v_cvt_f32_f16_e32 v9, v8 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; VI-NEXT: v_cvt_f32_f16_e32 v7, v6 +; VI-NEXT: v_rcp_f32_e32 v9, v9 +; VI-NEXT: v_mul_f32_e32 v7, v7, v9 +; VI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; VI-NEXT: v_div_fixup_f16 v7, v7, v8, v6 +; VI-NEXT: v_trunc_f16_e32 v7, v7 +; VI-NEXT: v_fma_f16 v6, -v7, v8, v6 +; VI-NEXT: v_cvt_f32_f16_e32 v8, v5 +; VI-NEXT: v_cvt_f32_f16_e32 v7, v3 +; VI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; VI-NEXT: v_rcp_f32_e32 v8, v8 +; VI-NEXT: v_mul_f32_e32 v7, v7, v8 +; VI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; VI-NEXT: v_div_fixup_f16 v7, v7, v5, v3 +; VI-NEXT: v_trunc_f16_e32 v7, v7 +; VI-NEXT: v_fma_f16 v3, -v7, v5, v3 +; VI-NEXT: v_lshrrev_b32_e32 v7, 16, v4 +; VI-NEXT: v_cvt_f32_f16_e32 v8, v7 +; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; VI-NEXT: v_or_b32_e32 v3, v3, v6 +; VI-NEXT: v_cvt_f32_f16_e32 v6, v5 +; VI-NEXT: v_rcp_f32_e32 v8, v8 +; VI-NEXT: v_mul_f32_e32 v6, v6, v8 +; VI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; VI-NEXT: v_div_fixup_f16 v6, v6, v7, v5 +; VI-NEXT: v_trunc_f16_e32 v6, v6 +; VI-NEXT: v_fma_f16 v5, -v6, v7, v5 +; VI-NEXT: v_cvt_f32_f16_e32 v7, v4 +; VI-NEXT: v_cvt_f32_f16_e32 v6, v2 +; VI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; VI-NEXT: v_rcp_f32_e32 v7, v7 +; VI-NEXT: v_mul_f32_e32 v6, v6, v7 +; VI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; VI-NEXT: v_div_fixup_f16 v6, v6, v4, v2 +; VI-NEXT: v_trunc_f16_e32 v6, v6 +; VI-NEXT: v_fma_f16 v2, -v6, v4, v2 +; VI-NEXT: v_or_b32_e32 v2, v2, v5 +; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-NEXT: s_endpgm + <4 x half> addrspace(1)* %in2) #0 { + %gep2 = getelementptr <4 x half>, <4 x half> addrspace(1)* %in2, i32 4 + %r0 = load <4 x half>, <4 x half> addrspace(1)* %in1, align 16 + %r1 = load <4 x half>, <4 x half> addrspace(1)* %gep2, align 16 + %r2 = frem <4 x half> %r0, %r1 + store <4 x half> %r2, <4 x half> addrspace(1)* %out, align 16 + ret void +} + define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1, ; SI-LABEL: frem_v2f32: ; SI: ; %bb.0: From 1bb07e1b91c187d868bfe383175c2ce04ebed8b8 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 24 Jul 2020 12:52:36 +0100 Subject: [PATCH 422/600] [AMDGPU] Precommit tests for D84518 Propagate fast math flags in frem lowering --- llvm/test/CodeGen/AMDGPU/frem.ll | 362 +++++++++++++++++++++++++++++++ 1 file changed, 362 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 09c9716024c23..0fc9291ac2873 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -122,6 +122,125 @@ define amdgpu_kernel void @frem_f16(half addrspace(1)* %out, half addrspace(1)* ret void } +define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, +; SI-LABEL: fast_frem_f16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:8 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 +; SI-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; SI-NEXT: v_rcp_f32_e32 v4, v3 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; SI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; SI-NEXT: v_fma_f32 v4, v5, v4, v4 +; SI-NEXT: v_mul_f32_e32 v5, v2, v4 +; SI-NEXT: v_fma_f32 v6, -v3, v5, v2 +; SI-NEXT: v_fma_f32 v5, v6, v4, v5 +; SI-NEXT: v_fma_f32 v2, -v3, v5, v2 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; SI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; SI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: fast_frem_f16: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s11, 0xf000 +; CI-NEXT: s_mov_b32 s10, -1 +; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s4 +; CI-NEXT: s_mov_b32 s9, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s3, s11 +; CI-NEXT: s_mov_b32 s6, s10 +; CI-NEXT: s_mov_b32 s7, s11 +; CI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; CI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:8 +; CI-NEXT: s_waitcnt vmcnt(1) +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; CI-NEXT: v_div_scale_f32 v3, s[0:1], v1, v1, v0 +; CI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 +; CI-NEXT: v_rcp_f32_e32 v4, v3 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; CI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; CI-NEXT: v_fma_f32 v4, v5, v4, v4 +; CI-NEXT: v_mul_f32_e32 v5, v2, v4 +; CI-NEXT: v_fma_f32 v6, -v3, v5, v2 +; CI-NEXT: v_fma_f32 v5, v6, v4, v5 +; CI-NEXT: v_fma_f32 v2, -v3, v5, v2 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; CI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; CI-NEXT: v_trunc_f32_e32 v2, v2 +; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 +; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: fast_frem_f16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: s_add_u32 s0, s0, 8 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: flat_load_ushort v4, v[2:3] +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_load_ushort v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) +; VI-NEXT: v_cvt_f32_f16_e32 v3, v4 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; VI-NEXT: v_rcp_f32_e32 v5, v5 +; VI-NEXT: v_mul_f32_e32 v3, v3, v5 +; VI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; VI-NEXT: v_div_fixup_f16 v3, v3, v2, v4 +; VI-NEXT: v_trunc_f16_e32 v3, v3 +; VI-NEXT: v_fma_f16 v2, -v3, v2, v4 +; VI-NEXT: flat_store_short v[0:1], v2 +; VI-NEXT: s_endpgm + half addrspace(1)* %in2) #0 { + %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4 + %r0 = load half, half addrspace(1)* %in1, align 4 + %r1 = load half, half addrspace(1)* %gep2, align 4 + %r2 = frem fast half %r0, %r1 + store half %r2, half addrspace(1)* %out, align 4 + ret void +} + define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, ; SI-LABEL: unsafe_frem_f16: ; SI: ; %bb.0: @@ -327,6 +446,121 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1) ret void } +define amdgpu_kernel void @fast_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, +; SI-LABEL: fast_frem_f32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s6 +; SI-NEXT: s_mov_b32 s5, s7 +; SI-NEXT: s_mov_b32 s6, s2 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: s_mov_b32 s10, s2 +; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; SI-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:16 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 +; SI-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 +; SI-NEXT: v_rcp_f32_e32 v4, v3 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; SI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; SI-NEXT: v_fma_f32 v4, v5, v4, v4 +; SI-NEXT: v_mul_f32_e32 v5, v2, v4 +; SI-NEXT: v_fma_f32 v6, -v3, v5, v2 +; SI-NEXT: v_fma_f32 v5, v6, v4, v5 +; SI-NEXT: v_fma_f32 v2, -v3, v5, v2 +; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; SI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; SI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: fast_frem_f32: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s11, 0xf000 +; CI-NEXT: s_mov_b32 s10, -1 +; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s4 +; CI-NEXT: s_mov_b32 s9, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s10 +; CI-NEXT: s_mov_b32 s7, s11 +; CI-NEXT: s_mov_b32 s3, s11 +; CI-NEXT: buffer_load_dword v0, off, s[4:7], 0 +; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:16 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_div_scale_f32 v3, s[0:1], v1, v1, v0 +; CI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 +; CI-NEXT: v_rcp_f32_e32 v4, v3 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; CI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 +; CI-NEXT: v_fma_f32 v4, v5, v4, v4 +; CI-NEXT: v_mul_f32_e32 v5, v2, v4 +; CI-NEXT: v_fma_f32 v6, -v3, v5, v2 +; CI-NEXT: v_fma_f32 v5, v6, v4, v5 +; CI-NEXT: v_fma_f32 v2, -v3, v5, v2 +; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; CI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 +; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; CI-NEXT: v_trunc_f32_e32 v2, v2 +; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; CI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: fast_frem_f32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: s_add_u32 s0, s0, 16 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: s_addc_u32 s1, s1, 0 +; VI-NEXT: flat_load_dword v4, v[2:3] +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_load_dword v2, v[2:3] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_div_scale_f32 v5, s[0:1], v2, v2, v4 +; VI-NEXT: v_div_scale_f32 v3, vcc, v4, v2, v4 +; VI-NEXT: v_rcp_f32_e32 v6, v5 +; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 +; VI-NEXT: v_fma_f32 v7, -v5, v6, 1.0 +; VI-NEXT: v_fma_f32 v6, v7, v6, v6 +; VI-NEXT: v_mul_f32_e32 v7, v3, v6 +; VI-NEXT: v_fma_f32 v8, -v5, v7, v3 +; VI-NEXT: v_fma_f32 v7, v8, v6, v7 +; VI-NEXT: v_fma_f32 v3, -v5, v7, v3 +; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 +; VI-NEXT: v_div_fmas_f32 v3, v3, v6, v7 +; VI-NEXT: v_div_fixup_f32 v3, v3, v2, v4 +; VI-NEXT: v_trunc_f32_e32 v3, v3 +; VI-NEXT: v_fma_f32 v2, -v3, v2, v4 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm + float addrspace(1)* %in2) #0 { + %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4 + %r0 = load float, float addrspace(1)* %in1, align 4 + %r1 = load float, float addrspace(1)* %gep2, align 4 + %r2 = frem fast float %r0, %r1 + store float %r2, float addrspace(1)* %out, align 4 + ret void +} + define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ; SI-LABEL: unsafe_frem_f32: ; SI: ; %bb.0: @@ -537,6 +771,134 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( ret void } +define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, +; SI-LABEL: fast_frem_f64: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, s8 +; SI-NEXT: s_mov_b32 s5, s9 +; SI-NEXT: s_mov_b32 s0, s10 +; SI-NEXT: s_mov_b32 s1, s11 +; SI-NEXT: s_mov_b32 s2, s6 +; SI-NEXT: s_mov_b32 s3, s7 +; SI-NEXT: s_mov_b32 s14, s6 +; SI-NEXT: s_mov_b32 s15, s7 +; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[12:15], 0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_div_scale_f64 v[4:5], s[0:1], v[2:3], v[2:3], v[0:1] +; SI-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] +; SI-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; SI-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] +; SI-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; SI-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] +; SI-NEXT: v_div_scale_f64 v[8:9], s[0:1], v[0:1], v[2:3], v[0:1] +; SI-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] +; SI-NEXT: v_fma_f64 v[12:13], -v[4:5], v[10:11], v[8:9] +; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9 +; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc +; SI-NEXT: s_nop 0 +; SI-NEXT: s_nop 0 +; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11] +; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1] +; SI-NEXT: v_bfe_u32 v6, v5, 20, 11 +; SI-NEXT: v_add_i32_e32 v8, vcc, 0xfffffc01, v6 +; SI-NEXT: s_mov_b32 s1, 0xfffff +; SI-NEXT: s_mov_b32 s0, s6 +; SI-NEXT: v_lshr_b64 v[6:7], s[0:1], v8 +; SI-NEXT: v_not_b32_e32 v6, v6 +; SI-NEXT: v_and_b32_e32 v6, v4, v6 +; SI-NEXT: v_not_b32_e32 v7, v7 +; SI-NEXT: v_and_b32_e32 v7, v5, v7 +; SI-NEXT: v_and_b32_e32 v9, 0x80000000, v5 +; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v8 +; SI-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 51, v8 +; SI-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[0:1] +; SI-NEXT: v_cndmask_b32_e64 v6, v6, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[0:1] +; SI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; CI-LABEL: fast_frem_f64: +; CI: ; %bb.0: +; CI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; CI-NEXT: s_mov_b32 s11, 0xf000 +; CI-NEXT: s_mov_b32 s10, -1 +; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: s_mov_b32 s8, s4 +; CI-NEXT: s_mov_b32 s9, s5 +; CI-NEXT: s_mov_b32 s4, s6 +; CI-NEXT: s_mov_b32 s5, s7 +; CI-NEXT: s_mov_b32 s6, s10 +; CI-NEXT: s_mov_b32 s7, s11 +; CI-NEXT: s_mov_b32 s3, s11 +; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_div_scale_f64 v[4:5], s[0:1], v[2:3], v[2:3], v[0:1] +; CI-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] +; CI-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; CI-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] +; CI-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; CI-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] +; CI-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1] +; CI-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] +; CI-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9] +; CI-NEXT: s_nop 1 +; CI-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11] +; CI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1] +; CI-NEXT: v_trunc_f64_e32 v[4:5], v[4:5] +; CI-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] +; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; CI-NEXT: s_endpgm +; +; VI-LABEL: fast_frem_f64: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mov_b32_e32 v3, s7 +; VI-NEXT: v_mov_b32_e32 v4, s0 +; VI-NEXT: v_mov_b32_e32 v5, s1 +; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] +; VI-NEXT: flat_load_dwordx2 v[4:5], v[4:5] +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VI-NEXT: v_div_scale_f64 v[6:7], s[0:1], v[4:5], v[4:5], v[2:3] +; VI-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] +; VI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 +; VI-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] +; VI-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 +; VI-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] +; VI-NEXT: v_div_scale_f64 v[10:11], vcc, v[2:3], v[4:5], v[2:3] +; VI-NEXT: v_mul_f64 v[12:13], v[10:11], v[8:9] +; VI-NEXT: v_fma_f64 v[6:7], -v[6:7], v[12:13], v[10:11] +; VI-NEXT: s_nop 1 +; VI-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13] +; VI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[2:3] +; VI-NEXT: v_trunc_f64_e32 v[6:7], v[6:7] +; VI-NEXT: v_fma_f64 v[2:3], -v[6:7], v[4:5], v[2:3] +; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; VI-NEXT: s_endpgm + double addrspace(1)* %in2) #0 { + %r0 = load double, double addrspace(1)* %in1, align 8 + %r1 = load double, double addrspace(1)* %in2, align 8 + %r2 = frem fast double %r0, %r1 + store double %r2, double addrspace(1)* %out, align 8 + ret void +} + define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, ; SI-LABEL: unsafe_frem_f64: ; SI: ; %bb.0: From 8cbf4a17ac57921b2c0e2112e766555d345494c4 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 24 Jul 2020 12:55:12 +0100 Subject: [PATCH 423/600] [AMDGPU] Propagate fast math flags in frem lowering Differential Revision: https://reviews.llvm.org/D84518 --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 11 +-- llvm/test/CodeGen/AMDGPU/frem.ll | 94 ++++--------------- 2 files changed, 21 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 14e05507218e3..095018af5e7a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2084,16 +2084,15 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); EVT VT = Op.getValueType(); + auto Flags = Op->getFlags(); SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); - // TODO: Should this propagate fast-math-flags? - - SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y); - SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div); - SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc); + SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags); + SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags); + SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags); // TODO: For f32 use FMAD instead if !hasFastFMA32? - return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X); + return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags); } SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 0fc9291ac2873..720e45b3c30f5 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -144,22 +144,10 @@ define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace ; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:8 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 -; SI-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 -; SI-NEXT: v_rcp_f32_e32 v4, v3 -; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; SI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 -; SI-NEXT: v_fma_f32 v4, v5, v4, v4 -; SI-NEXT: v_mul_f32_e32 v5, v2, v4 -; SI-NEXT: v_fma_f32 v6, -v3, v5, v2 -; SI-NEXT: v_fma_f32 v5, v6, v4, v5 -; SI-NEXT: v_fma_f32 v2, -v3, v5, v2 -; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; SI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 -; SI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; SI-NEXT: v_rcp_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 ; SI-NEXT: v_trunc_f32_e32 v2, v2 ; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 -; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm @@ -171,36 +159,24 @@ define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace ; CI-NEXT: s_mov_b32 s11, 0xf000 ; CI-NEXT: s_mov_b32 s10, -1 ; CI-NEXT: s_mov_b32 s2, s10 +; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:8 ; CI-NEXT: s_mov_b32 s8, s4 ; CI-NEXT: s_mov_b32 s9, s5 ; CI-NEXT: s_mov_b32 s4, s6 ; CI-NEXT: s_mov_b32 s5, s7 -; CI-NEXT: s_mov_b32 s3, s11 ; CI-NEXT: s_mov_b32 s6, s10 ; CI-NEXT: s_mov_b32 s7, s11 ; CI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; CI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:8 ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_div_scale_f32 v3, s[0:1], v1, v1, v0 -; CI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 -; CI-NEXT: v_rcp_f32_e32 v4, v3 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; CI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 -; CI-NEXT: v_fma_f32 v4, v5, v4, v4 -; CI-NEXT: v_mul_f32_e32 v5, v2, v4 -; CI-NEXT: v_fma_f32 v6, -v3, v5, v2 -; CI-NEXT: v_fma_f32 v5, v6, v4, v5 -; CI-NEXT: v_fma_f32 v2, -v3, v5, v2 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; CI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 -; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; CI-NEXT: v_rcp_f32_e32 v2, v1 +; CI-NEXT: s_waitcnt vmcnt(0) +; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; CI-NEXT: v_mul_f32_e32 v2, v0, v2 ; CI-NEXT: v_trunc_f32_e32 v2, v2 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; CI-NEXT: buffer_store_short v0, off, s[8:11], 0 ; CI-NEXT: s_endpgm @@ -220,14 +196,9 @@ define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace ; VI-NEXT: flat_load_ushort v2, v[2:3] ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 -; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) -; VI-NEXT: v_cvt_f32_f16_e32 v3, v4 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v5, v2 -; VI-NEXT: v_rcp_f32_e32 v5, v5 -; VI-NEXT: v_mul_f32_e32 v3, v3, v5 -; VI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; VI-NEXT: v_div_fixup_f16 v3, v3, v2, v4 +; VI-NEXT: v_rcp_f16_e32 v3, v2 +; VI-NEXT: v_mul_f16_e32 v3, v4, v3 ; VI-NEXT: v_trunc_f16_e32 v3, v3 ; VI-NEXT: v_fma_f16 v2, -v3, v2, v4 ; VI-NEXT: flat_store_short v[0:1], v2 @@ -465,19 +436,8 @@ define amdgpu_kernel void @fast_frem_f32(float addrspace(1)* %out, float addrspa ; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; SI-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:16 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 -; SI-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 -; SI-NEXT: v_rcp_f32_e32 v4, v3 -; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; SI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 -; SI-NEXT: v_fma_f32 v4, v5, v4, v4 -; SI-NEXT: v_mul_f32_e32 v5, v2, v4 -; SI-NEXT: v_fma_f32 v6, -v3, v5, v2 -; SI-NEXT: v_fma_f32 v5, v6, v4, v5 -; SI-NEXT: v_fma_f32 v2, -v3, v5, v2 -; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; SI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 -; SI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; SI-NEXT: v_rcp_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 ; SI-NEXT: v_trunc_f32_e32 v2, v2 ; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -501,19 +461,8 @@ define amdgpu_kernel void @fast_frem_f32(float addrspace(1)* %out, float addrspa ; CI-NEXT: buffer_load_dword v0, off, s[4:7], 0 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:16 ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: v_div_scale_f32 v3, s[0:1], v1, v1, v0 -; CI-NEXT: v_div_scale_f32 v2, vcc, v0, v1, v0 -; CI-NEXT: v_rcp_f32_e32 v4, v3 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; CI-NEXT: v_fma_f32 v5, -v3, v4, 1.0 -; CI-NEXT: v_fma_f32 v4, v5, v4, v4 -; CI-NEXT: v_mul_f32_e32 v5, v2, v4 -; CI-NEXT: v_fma_f32 v6, -v3, v5, v2 -; CI-NEXT: v_fma_f32 v5, v6, v4, v5 -; CI-NEXT: v_fma_f32 v2, -v3, v5, v2 -; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; CI-NEXT: v_div_fmas_f32 v2, v2, v4, v5 -; CI-NEXT: v_div_fixup_f32 v2, v2, v1, v0 +; CI-NEXT: v_rcp_f32_e32 v2, v1 +; CI-NEXT: v_mul_f32_e32 v2, v0, v2 ; CI-NEXT: v_trunc_f32_e32 v2, v2 ; CI-NEXT: v_fma_f32 v0, -v2, v1, v0 ; CI-NEXT: buffer_store_dword v0, off, s[8:11], 0 @@ -535,19 +484,8 @@ define amdgpu_kernel void @fast_frem_f32(float addrspace(1)* %out, float addrspa ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: v_mov_b32_e32 v1, s5 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_div_scale_f32 v5, s[0:1], v2, v2, v4 -; VI-NEXT: v_div_scale_f32 v3, vcc, v4, v2, v4 -; VI-NEXT: v_rcp_f32_e32 v6, v5 -; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 -; VI-NEXT: v_fma_f32 v7, -v5, v6, 1.0 -; VI-NEXT: v_fma_f32 v6, v7, v6, v6 -; VI-NEXT: v_mul_f32_e32 v7, v3, v6 -; VI-NEXT: v_fma_f32 v8, -v5, v7, v3 -; VI-NEXT: v_fma_f32 v7, v8, v6, v7 -; VI-NEXT: v_fma_f32 v3, -v5, v7, v3 -; VI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 -; VI-NEXT: v_div_fmas_f32 v3, v3, v6, v7 -; VI-NEXT: v_div_fixup_f32 v3, v3, v2, v4 +; VI-NEXT: v_rcp_f32_e32 v3, v2 +; VI-NEXT: v_mul_f32_e32 v3, v4, v3 ; VI-NEXT: v_trunc_f32_e32 v3, v3 ; VI-NEXT: v_fma_f32 v2, -v3, v2, v4 ; VI-NEXT: flat_store_dword v[0:1], v2 From 159806704b05f2b562ea93e622f8dbb1976e24f0 Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Wed, 5 Aug 2020 11:18:38 +0300 Subject: [PATCH 424/600] [MLIR][SPIRVToLLVM] Updated LLVM types in the documentation Updated the documentation with new MLIR LLVM types for vectors, pointers, arrays and structs. Also, changed remaining tabs to spaces. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D85277 --- mlir/docs/SPIRVToLLVMDialectConversion.md | 58 +++++++++++------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/mlir/docs/SPIRVToLLVMDialectConversion.md b/mlir/docs/SPIRVToLLVMDialectConversion.md index dcc872f59542d..567d9c9510810 100644 --- a/mlir/docs/SPIRVToLLVMDialectConversion.md +++ b/mlir/docs/SPIRVToLLVMDialectConversion.md @@ -36,7 +36,7 @@ SPIR-V Dialect | LLVM Dialect SPIR-V Dialect | LLVM Dialect :----------------------------------: | :----------------------------------: -`vector< x >` | `!llvm<"< x >">` +`vector< x >` | `!llvm.vec< x >` ### Pointer types @@ -45,7 +45,7 @@ A SPIR-V pointer also takes a Storage Class. At the moment, conversion does SPIR-V Dialect | LLVM Dialect :-------------------------------------------: | :-------------------------: -`!spv.ptr< , >` | `!llvm.element-type*` +`!spv.ptr< , >` | `!llvm.ptr<>` ### Array types @@ -59,8 +59,8 @@ supported by type conversion at the moment. SPIR-V Dialect | LLVM Dialect :-----------------------------------: | :-----------------------------------: -`!spv.array< x >`| `!llvm<"[ x ]">` -`!spv.rtarray< >` | `!llvm<"[0 x ]">` +`!spv.array< x >`| `!llvm.array< x >` +`!spv.rtarray< >` | `!llvm.array<0 x >` ### Struct types @@ -88,8 +88,8 @@ at the moment. Hence, we adhere to the following mapping: Examples of SPIR-V struct conversion are: ```mlir -!spv.struct => !llvm<"<{ i8, i32 }>"> -!spv.struct => !llvm<"{ i8, i32 }"> +!spv.struct => !llvm.struct +!spv.struct => !llvm.struct<(i8, i32)> // error !spv.struct @@ -188,11 +188,11 @@ to note: ```mlir // Broadcasting offset - %offset0 = llvm.mlir.undef : !llvm<"<2 x i8>"> + %offset0 = llvm.mlir.undef : !llvm.vec<2 x i8> %zero = llvm.mlir.constant(0 : i32) : !llvm.i32 - %offset1 = llvm.insertelement %offset, %offset0[%zero : !llvm.i32] : !llvm<"<2 x i8>"> + %offset1 = llvm.insertelement %offset, %offset0[%zero : !llvm.i32] : !llvm.vec<2 x i8> %one = llvm.mlir.constant(1 : i32) : !llvm.i32 - %vec_offset = llvm.insertelement %offset, %offset1[%one : !llvm.i32] : !llvm<"<2 x i8>"> + %vec_offset = llvm.insertelement %offset, %offset1[%one : !llvm.i32] : !llvm.vec<2 x i8> // Broadcasting count // ... @@ -205,7 +205,7 @@ to note: ```mlir // Zero extending offest after broadcasting - %res_offset = llvm.zext %vec_offset: !llvm<"<2 x i8>"> to !llvm<"<2 x i32>"> + %res_offset = llvm.zext %vec_offset: !llvm.vec<2 x i8> to !llvm.vec<2 x i32> ``` Also, note that if the bitwidth of `offset` or `count` is greater than the @@ -386,19 +386,19 @@ following cases, based on the value of the attribute: * **Aligned**: alignment is passed on to LLVM op builder, for example: ```mlir - // llvm.store %ptr, %val {alignment = 4 : i64} : !llvm<"float*"> + // llvm.store %ptr, %val {alignment = 4 : i64} : !llvm.ptr spv.Store "Function" %ptr, %val ["Aligned", 4] : f32 ``` * **None**: same case as if there is no memory access attribute. * **Nontemporal**: set `nontemporal` flag, for example: ```mlir - // %res = llvm.load %ptr {nontemporal} : !llvm<"float*"> + // %res = llvm.load %ptr {nontemporal} : !llvm.ptr %res = spv.Load "Function" %ptr ["Nontemporal"] : f32 ``` * **Volatile**: mark the op as `volatile`, for example: ```mlir - // %res = llvm.load volatile %ptr : !llvm<"float*"> + // %res = llvm.load volatile %ptr : !llvm.ptr %res = spv.Load "Function" %ptr ["Volatile"] : f32 ``` Otherwise the conversion fails as other cases (`MakePointerAvailable`, @@ -426,9 +426,9 @@ spv.module Logical GLSL450 { // Converted result module { - llvm.mlir.global private @struct() : !llvm<"<{ float, [10 x float] }>"> + llvm.mlir.global private @struct() : !llvm.struct llvm.func @func() { - %0 = llvm.mlir.addressof @struct : !llvm<"<{ float, [10 x float] }>*"> + %0 = llvm.mlir.addressof @struct : !llvm.ptr> llvm.return } } @@ -469,13 +469,13 @@ Also, at the moment initialization is only possible via `spv.constant`. ```mlir // Conversion of VariableOp without initialization %size = llvm.mlir.constant(1 : i32) : !llvm.i32 -%res = spv.Variable : !spv.ptr, Function> => %res = llvm.alloca %size x !llvm<"<3 x float>"> : (!llvm.i32) -> !llvm<"<3 x float>*"> +%res = spv.Variable : !spv.ptr, Function> => %res = llvm.alloca %size x !llvm.vec<3 x float> : (!llvm.i32) -> !llvm.ptr> // Conversion of VariableOp with initialization %c = llvm.mlir.constant(0 : i64) : !llvm.i64 %c = spv.constant 0 : i64 %size = llvm.mlir.constant(1 : i32) : !llvm.i32 -%res = spv.Variable init(%c) : !spv.ptr => %res = llvm.alloca %[[SIZE]] x !llvm.i64 : (!llvm.i32) -> !llvm<"i64*"> - llvm.store %c, %res : !llvm<"i64*"> +%res = spv.Variable init(%c) : !spv.ptr => %res = llvm.alloca %[[SIZE]] x !llvm.i64 : (!llvm.i32) -> !llvm.ptr + llvm.store %c, %res : !llvm.ptr ``` Note that simple conversion to `alloca` may not be sufficent if the code has @@ -545,7 +545,7 @@ cover all possible corner cases. // %0 = llvm.mlir.constant(0 : i8) : !llvm.i8 %0 = spv.constant 0 : i8 -// %1 = llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : !llvm<"<3 x i32>"> +// %1 = llvm.mlir.constant(dense<[2, 3, 4]> : vector<3xi32>) : !llvm.vec<3 x i32> %1 = spv.constant dense<[2, 3, 4]> : vector<3xui32> ``` @@ -606,23 +606,23 @@ blocks being reachable. Moreover, selection and loop control attributes (such as ```mlir // Conversion of selection -%cond = spv.constant true %cond = llvm.mlir.constant(true) : !llvm.i1 +%cond = spv.constant true %cond = llvm.mlir.constant(true) : !llvm.i1 spv.selection { - spv.BranchConditional %cond, ^true, ^false llvm.cond_br %cond, ^true, ^false + spv.BranchConditional %cond, ^true, ^false llvm.cond_br %cond, ^true, ^false ^true: ^true: - // True block code // True block code - spv.Branch ^merge => llvm.br ^merge + // True block code // True block code + spv.Branch ^merge => llvm.br ^merge -^false: ^false: - // False block code // False block code - spv.Branch ^merge llvm.br ^merge +^false: ^false: + // False block code // False block code + spv.Branch ^merge llvm.br ^merge -^merge: ^merge: - spv._merge llvm.br ^continue +^merge: ^merge: + spv._merge llvm.br ^continue } // Remaining code ^continue: - // Remaining code + // Remaining code ``` ```mlir From f2916636f83dfeb4808a16045db0025783743471 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Wed, 5 Aug 2020 09:17:57 +0100 Subject: [PATCH 425/600] [AArch64][SVE] Disable tail calls if callee does not preserve SVE regs. This fixes an issue triggered by the following code, where emitEpilogue got confused when trying to restore the SVE registers after the call, whereas the call to bar() is implemented as a TCReturn: int non_sve(); int sve(svint32_t x) { return non_sve(); } Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D84869 --- .../Target/AArch64/AArch64ISelLowering.cpp | 38 ++++--- .../Target/AArch64/AArch64RegisterInfo.cpp | 2 +- llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 2 + llvm/test/CodeGen/AArch64/sve-tailcall.ll | 107 ++++++++++++++++++ 4 files changed, 134 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-tailcall.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 03b33086e0c85..13d6a0b882343 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4132,6 +4132,7 @@ static bool canGuaranteeTCO(CallingConv::ID CC) { static bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { case CallingConv::C: + case CallingConv::AArch64_SVE_VectorCall: case CallingConv::PreserveMost: case CallingConv::Swift: return true; @@ -4151,6 +4152,15 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( MachineFunction &MF = DAG.getMachineFunction(); const Function &CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF.getCallingConv(); + + // If this function uses the C calling convention but has an SVE signature, + // then it preserves more registers and should assume the SVE_VectorCall CC. + // The check for matching callee-saved regs will determine whether it is + // eligible for TCO. + if (CallerCC == CallingConv::C && + AArch64RegisterInfo::hasSVEArgsOrReturn(&MF)) + CallerCC = CallingConv::AArch64_SVE_VectorCall; + bool CCMatch = CallerCC == CalleeCC; // When using the Windows calling convention on a non-windows OS, we want @@ -4338,6 +4348,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; bool IsSibCall = false; + // Check callee args/returns for SVE registers and set calling convention + // accordingly. + if (CallConv == CallingConv::C) { + bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ + return Out.VT.isScalableVector(); + }); + bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ + return In.VT.isScalableVector(); + }); + + if (CalleeInSVE || CalleeOutSVE) + CallConv = CallingConv::AArch64_SVE_VectorCall; + } + if (IsTailCall) { // Check if it's really possible to do a tail call. IsTailCall = isEligibleForTailCallOptimization( @@ -4691,20 +4715,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegToPass.first, RegToPass.second.getValueType())); - // Check callee args/returns for SVE registers and set calling convention - // accordingly. - if (CallConv == CallingConv::C) { - bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ - return Out.VT.isScalableVector(); - }); - bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ - return In.VT.isScalableVector(); - }); - - if (CalleeInSVE || CalleeOutSVE) - CallConv = CallingConv::AArch64_SVE_VectorCall; - } - // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 62cc865fd1c36..3e9c8c7b6df2c 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -63,7 +63,7 @@ bool AArch64RegisterInfo::regNeedsCFI(unsigned Reg, return true; } -static bool hasSVEArgsOrReturn(const MachineFunction *MF) { +bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) { const Function &F = MF->getFunction(); return isa(F.getReturnType()) || any_of(F.args(), [](const Argument &Arg) { diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index 91064787d3dac..7b20f181e76df 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -42,6 +42,8 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo { void UpdateCustomCallPreservedMask(MachineFunction &MF, const uint32_t **Mask) const; + static bool hasSVEArgsOrReturn(const MachineFunction *MF); + /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const MCPhysReg *getDarwinCalleeSavedRegs(const MachineFunction *MF) const; diff --git a/llvm/test/CodeGen/AArch64/sve-tailcall.ll b/llvm/test/CodeGen/AArch64/sve-tailcall.ll new file mode 100644 index 0000000000000..81e98ba542876 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-tailcall.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Check that a tail call from an SVE function to another SVE function +; can use a tail-call, as the same registers will be preserved by the +; callee. +define @sve_caller_sve_callee() nounwind { +; CHECK-LABEL: sve_caller_sve_callee: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: b sve_callee + tail call void asm sideeffect "", "~{z9},~{z10}"() + %call = tail call @sve_callee() + ret %call +} + +declare @sve_callee() + +; Check that a tail call from an SVE function to a non-SVE function +; does not use a tail-call, because after the call many of the SVE +; registers may be clobbered and needs to be restored. +define i32 @sve_caller_non_sve_callee( %arg) nounwind { +; CHECK-LABEL: sve_caller_non_sve_callee: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-18 +; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: bl non_sve_callee +; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #18 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{z9},~{z10}"() + %call = tail call i32 @non_sve_callee() + ret i32 %call +} + +declare i32 @non_sve_callee() From ba0e71432a60e1fa2da9e098cbc574a1d9b9618b Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 5 Aug 2020 10:50:06 +0200 Subject: [PATCH 426/600] Do not map read-only data memory sections with EXECUTE flags. The code in SectionMemoryManager.cpp unnecessarily maps read-only data sections with the READ+EXECUTE flags. This is undesirable from a security stand-point. Moreover, on the Fuchsia platform, which is now very strict about mapping pages with the EXECUTE permission, this simply fails, because the section's pages were initially allocated with only the READ+WRITE flags. A more detailed description of the issue can be found in this public SwiftShader bug: https://issuetracker.google.com/issues/154586551 This patch just restrict the mapping to the READ flag for ROData sections. Code sections are still mapped with READ+EXECUTE as expected. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D78574 --- llvm/lib/ExecutionEngine/SectionMemoryManager.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp index febcabfaa5719..138b18a1ddcb0 100644 --- a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp @@ -161,8 +161,7 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) { } // Make read-only data memory read-only. - ec = applyMemoryGroupPermissions(RODataMem, - sys::Memory::MF_READ | sys::Memory::MF_EXEC); + ec = applyMemoryGroupPermissions(RODataMem, sys::Memory::MF_READ); if (ec) { if (ErrMsg) { *ErrMsg = ec.message(); From cc68c122cd00f99037b8ff7e645e2b387d56da8b Mon Sep 17 00:00:00 2001 From: Tatyana Krasnukha Date: Tue, 4 Aug 2020 17:15:29 +0300 Subject: [PATCH 427/600] [lldb/TestingSupport] Manually disable GTEST_HAS_TR1_TUPLE Gtest 1.8.0 uses tr1::tuple which is deprecated on MSVC. We have to force it off to avoid the compiler warnings, which will become errors after switching on C++17 (https://devblogs.microsoft.com/cppblog/c17-feature-removals-and-deprecations). --- lldb/unittests/TestingSupport/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lldb/unittests/TestingSupport/CMakeLists.txt b/lldb/unittests/TestingSupport/CMakeLists.txt index 3b5662a16e330..5322362ed3a27 100644 --- a/lldb/unittests/TestingSupport/CMakeLists.txt +++ b/lldb/unittests/TestingSupport/CMakeLists.txt @@ -1,3 +1,6 @@ +# Gtest 1.8.0 uses tr1/tuple which is deprecated on MSVC, so we force it off. +add_definitions(-DGTEST_HAS_TR1_TUPLE=0) + set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL ON) add_lldb_library(lldbUtilityHelpers MockTildeExpressionResolver.cpp From 75012a80440f2302d3dc0e57ea264b9c26c26789 Mon Sep 17 00:00:00 2001 From: Tatyana Krasnukha Date: Tue, 4 Aug 2020 20:52:48 +0300 Subject: [PATCH 428/600] [lldb] Use PyUnicode_GetLength instead of PyUnicode_GetSize PyUnicode_GetSize is deprecated since Python version 3.3. --- .../Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp index 6f040fdef09b8..7c49502f1b579 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp @@ -451,7 +451,11 @@ Expected PythonString::AsUTF8() const { size_t PythonString::GetSize() const { if (IsValid()) { #if PY_MAJOR_VERSION >= 3 +#if PY_MINOR_VERSION >= 3 + return PyUnicode_GetLength(m_py_obj); +#else return PyUnicode_GetSize(m_py_obj); +#endif #else return PyString_Size(m_py_obj); #endif From bc056b3aa7130923ab9ad0505c5a8d65ea721e39 Mon Sep 17 00:00:00 2001 From: Tatyana Krasnukha Date: Tue, 4 Aug 2020 20:53:30 +0300 Subject: [PATCH 429/600] [lldb] Suppress MSVC warning C4065 MSVC reports "switch statement contains 'default' but no 'case' labels". Suppress, as this was intended behavior. --- .../Platform/MacOSX/PlatformRemoteAppleBridge.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp index eb25a061de4e6..1cb8b9c37031e 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp @@ -96,14 +96,22 @@ PlatformSP PlatformRemoteAppleBridge::CreateInstance(bool force, break; } if (create) { +// Suppress warning "switch statement contains 'default' but no 'case' labels". +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4065) +#endif switch (triple.getOS()) { - // NEED_BRIDGEOS_TRIPLE case llvm::Triple::BridgeOS: - break; + // NEED_BRIDGEOS_TRIPLE case llvm::Triple::BridgeOS: + // break; default: create = false; break; } +#ifdef _MSC_VER +#pragma warning(pop) +#endif } } break; default: From 14948a08f31b6e3b555f73339504efd867835d27 Mon Sep 17 00:00:00 2001 From: Pierre Gousseau Date: Wed, 5 Aug 2020 10:17:25 +0100 Subject: [PATCH 430/600] [compiler-rt] Normalize some in/out doxygen parameter in interface headers. NFC. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D84945 --- compiler-rt/include/sanitizer/asan_interface.h | 16 ++++++++-------- .../include/sanitizer/common_interface_defs.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/compiler-rt/include/sanitizer/asan_interface.h b/compiler-rt/include/sanitizer/asan_interface.h index 6af93aad6512f..792ef9cfaa32b 100644 --- a/compiler-rt/include/sanitizer/asan_interface.h +++ b/compiler-rt/include/sanitizer/asan_interface.h @@ -188,8 +188,8 @@ const char *__asan_get_report_description(void); /// \param addr Address to locate. /// \param name Buffer to store the variable's name. /// \param name_size Size in bytes of the variable's name buffer. -/// \param region_address [out] Address of the region. -/// \param region_size [out] Size of the region in bytes. +/// \param[out] region_address Address of the region. +/// \param[out] region_size Size of the region in bytes. /// /// \returns Returns the category of the given pointer as a constant string. const char *__asan_locate_address(void *addr, char *name, size_t name_size, @@ -204,7 +204,7 @@ const char *__asan_locate_address(void *addr, char *name, size_t name_size, /// \param addr A heap address. /// \param trace A buffer to store the stack trace. /// \param size Size in bytes of the trace buffer. -/// \param thread_id [out] The thread ID of the address. +/// \param[out] thread_id The thread ID of the address. /// /// \returns Returns the number of stored frames or 0 on error. size_t __asan_get_alloc_stack(void *addr, void **trace, size_t size, @@ -219,7 +219,7 @@ size_t __asan_get_alloc_stack(void *addr, void **trace, size_t size, /// \param addr A heap address. /// \param trace A buffer to store the stack trace. /// \param size Size in bytes of the trace buffer. -/// \param thread_id [out] The thread ID of the address. +/// \param[out] thread_id The thread ID of the address. /// /// \returns Returns the number of stored frames or 0 on error. size_t __asan_get_free_stack(void *addr, void **trace, size_t size, @@ -228,8 +228,8 @@ size_t __asan_get_free_stack(void *addr, void **trace, size_t size, /// Gets the current shadow memory mapping (useful for calling from the /// debugger). /// -/// \param shadow_scale [out] Shadow scale value. -/// \param shadow_offset [out] Offset value. +/// \param[out] shadow_scale Shadow scale value. +/// \param[out] shadow_offset Offset value. void __asan_get_shadow_mapping(size_t *shadow_scale, size_t *shadow_offset); /// This is an internal function that is called to report an error. However, @@ -302,8 +302,8 @@ void *__asan_get_current_fake_stack(void); /// /// \param fake_stack An opaque handler to a fake stack. /// \param addr Address to test. -/// \param beg [out] Beginning of fake frame. -/// \param end [out] End of fake frame. +/// \param[out] beg Beginning of fake frame. +/// \param[out] end End of fake frame. /// \returns Stack address or NULL. void *__asan_addr_is_in_fake_stack(void *fake_stack, void *addr, void **beg, void **end); diff --git a/compiler-rt/include/sanitizer/common_interface_defs.h b/compiler-rt/include/sanitizer/common_interface_defs.h index f979c6a8f63b1..b4f977bf5579c 100644 --- a/compiler-rt/include/sanitizer/common_interface_defs.h +++ b/compiler-rt/include/sanitizer/common_interface_defs.h @@ -320,7 +320,7 @@ void __sanitizer_print_memory_profile(size_t top_percent, /// signal callback runs during the switch, it will not benefit from stack /// use-after-return detection. /// -/// \param fake_stack_save [out] Fake stack save location. +/// \param[out] fake_stack_save Fake stack save location. /// \param bottom Bottom address of stack. /// \param size Size of stack in bytes. void __sanitizer_start_switch_fiber(void **fake_stack_save, @@ -335,8 +335,8 @@ void __sanitizer_start_switch_fiber(void **fake_stack_save, /// __sanitizer_start_switch_fiber(). /// /// \param fake_stack_save Fake stack save location. -/// \param bottom_old [out] Bottom address of old stack. -/// \param size_old [out] Size of old stack in bytes. +/// \param[out] bottom_old Bottom address of old stack. +/// \param[out] size_old Size of old stack in bytes. void __sanitizer_finish_switch_fiber(void *fake_stack_save, const void **bottom_old, size_t *size_old); From 4cd923784e9079384792e0aed38d56809d6a4f9a Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Wed, 5 Aug 2020 09:41:42 +0000 Subject: [PATCH 431/600] [MLIR][Shape] Expose extent tensor type builder The extent tensor type is a `tensor` that is used in the shape dialect. To facilitate the use of this type when working with the shape dialect, we expose the helper function for its construction. Differential Revision: https://reviews.llvm.org/D85121 --- mlir/include/mlir/Dialect/Shape/IR/Shape.h | 3 +++ mlir/lib/Dialect/Shape/IR/Shape.cpp | 11 ++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/Shape/IR/Shape.h b/mlir/include/mlir/Dialect/Shape/IR/Shape.h index 62e4e0c4511fc..ca1e0668d070f 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/Shape.h +++ b/mlir/include/mlir/Dialect/Shape/IR/Shape.h @@ -26,6 +26,9 @@ class PatternRewriter; namespace shape { +/// Alias type for extent tensors. +RankedTensorType getExtentTensorType(MLIRContext *ctx); + namespace ShapeTypes { enum Kind { Component = Type::FIRST_SHAPE_TYPE, diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index 02fe7b8129f7e..be4c3c721572e 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -24,7 +24,7 @@ namespace { #include "ShapeCanonicalization.inc" } -static RankedTensorType getExtentTensorType(MLIRContext *ctx) { +RankedTensorType shape::getExtentTensorType(MLIRContext *ctx) { return RankedTensorType::get({ShapedType::kDynamicSize}, IndexType::get(ctx)); } @@ -713,12 +713,9 @@ OpFoldResult ShapeOfOp::fold(ArrayRef) { } void ShapeOfOp::build(OpBuilder &builder, OperationState &result, Value arg) { - if (arg.getType().isa()) { - auto type = RankedTensorType::get({ShapedType::kDynamicSize}, - builder.getIndexType()); - return ShapeOfOp::build(builder, result, type, arg); - } - auto type = ShapeType::get(builder.getContext()); + Type type = arg.getType().isa() + ? (Type)getExtentTensorType(builder.getContext()) + : (Type)builder.getType(); return ShapeOfOp::build(builder, result, type, arg); } From 21f142ce1df10fe6cf5721e263fc6e91aea93938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Wed, 5 Aug 2020 11:54:14 +0200 Subject: [PATCH 432/600] [lldb] temporary commit to see why a test is failing only on lldb-aarch64-ubuntu --- .../Python/module/pexpect-4.6/pexpect/pty_spawn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py b/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py index 6b9ad3f63f7cd..ca651659b4615 100644 --- a/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py +++ b/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py @@ -212,8 +212,8 @@ def __str__(self): s.append(repr(self)) s.append('command: ' + str(self.command)) s.append('args: %r' % (self.args,)) - s.append('buffer (last 100 chars): %r' % self.buffer[-100:]) - s.append('before (last 100 chars): %r' % self.before[-100:] if self.before else '') + s.append('buffer (last 100 chars): %r' % self.buffer[-10000:]) + s.append('before (last 100 chars): %r' % self.before[-10000:] if self.before else '') s.append('after: %r' % (self.after,)) s.append('match: %r' % (self.match,)) s.append('match_index: ' + str(self.match_index)) From 138281904ba029bc49fca34a8658a8fcd1b843aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Wed, 5 Aug 2020 11:55:02 +0200 Subject: [PATCH 433/600] Revert "[lldb] temporary commit to see why a test is failing only on lldb-aarch64-ubuntu" This reverts commit 21f142ce1df10fe6cf5721e263fc6e91aea93938. --- .../Python/module/pexpect-4.6/pexpect/pty_spawn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py b/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py index ca651659b4615..6b9ad3f63f7cd 100644 --- a/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py +++ b/lldb/third_party/Python/module/pexpect-4.6/pexpect/pty_spawn.py @@ -212,8 +212,8 @@ def __str__(self): s.append(repr(self)) s.append('command: ' + str(self.command)) s.append('args: %r' % (self.args,)) - s.append('buffer (last 100 chars): %r' % self.buffer[-10000:]) - s.append('before (last 100 chars): %r' % self.before[-10000:] if self.before else '') + s.append('buffer (last 100 chars): %r' % self.buffer[-100:]) + s.append('before (last 100 chars): %r' % self.before[-100:] if self.before else '') s.append('after: %r' % (self.after,)) s.append('match: %r' % (self.match,)) s.append('match_index: ' + str(self.match_index)) From 4963ca4658b0c4ab70f029110150878178328335 Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Wed, 5 Aug 2020 00:23:03 +0100 Subject: [PATCH 434/600] [docs] Document pattern of using CHECK-SAME to skip irrelevant lines This came up during the review for D67656. It's nice but also subtle, so documenting it as an idiom will make tests easier to understand. Reviewed By: probinson Differential Revision: https://reviews.llvm.org/D68061 --- llvm/docs/CommandGuide/FileCheck.rst | 53 ++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/llvm/docs/CommandGuide/FileCheck.rst b/llvm/docs/CommandGuide/FileCheck.rst index 0a0c2c5dd25d4..a7645fc9f7fd2 100644 --- a/llvm/docs/CommandGuide/FileCheck.rst +++ b/llvm/docs/CommandGuide/FileCheck.rst @@ -379,8 +379,57 @@ For example, the following works like you'd expect: ; CHECK-SAME: scope: ![[SCOPE:[0-9]+]] "``CHECK-SAME:``" directives reject the input if there are any newlines between -it and the previous directive. A "``CHECK-SAME:``" cannot be the first -directive in a file. +it and the previous directive. + +"``CHECK-SAME:``" is also useful to avoid writing matchers for irrelevant +fields. For example, suppose you're writing a test which parses a tool that +generates output like this: + +.. code-block:: text + + Name: foo + Field1: ... + Field2: ... + Field3: ... + Value: 1 + + Name: bar + Field1: ... + Field2: ... + Field3: ... + Value: 2 + + Name: baz + Field1: ... + Field2: ... + Field3: ... + Value: 1 + +To write a test that verifies ``foo`` has the value ``1``, you might first +write this: + +.. code-block:: text + + CHECK: Name: foo + CHECK: Value: 1{{$}} + +However, this would be a bad test: if the value for ``foo`` changes, the test +would still pass because the "``CHECK: Value: 1``" line would match the value +from ``baz``. To fix this, you could add ``CHECK-NEXT`` matchers for every +``FieldN:`` line, but that would be verbose, and need to be updated when +``Field4`` is added. A more succint way to write the test using the +"``CHECK-SAME:``" matcher would be as follows: + +.. code-block:: text + + CHECK: Name: foo + CHECK: Value: + CHECK-SAME: {{ 1$}} + +This verifies that the *next* time "``Value:``" appears in the ouput, it has +the value ``1``. + +Note: a "``CHECK-SAME:``" cannot be the first directive in a file. The "CHECK-EMPTY:" directive ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From c558c22cab9a555d2e521102b775759381e9727f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 5 Aug 2020 12:03:30 +0200 Subject: [PATCH 435/600] [llvm-symbolizer] Add legacy aliases -demangle=true and -demangle=false. This is used in the wild, don't break compatibility for no good reason. https://github.com/google/pprof/blob/master/internal/binutils/addr2liner_llvm.go --- llvm/test/tools/llvm-symbolizer/demangle.s | 6 ++++++ llvm/tools/llvm-symbolizer/Opts.td | 3 +++ 2 files changed, 9 insertions(+) diff --git a/llvm/test/tools/llvm-symbolizer/demangle.s b/llvm/test/tools/llvm-symbolizer/demangle.s index 14156bf498d85..f4327bdca7a1a 100644 --- a/llvm/test/tools/llvm-symbolizer/demangle.s +++ b/llvm/test/tools/llvm-symbolizer/demangle.s @@ -31,5 +31,11 @@ _Z1cv: # RUN: llvm-addr2line -fCe %t.o 0 \ # RUN: | FileCheck %s --check-prefix=DEMANGLED_FUNCTION_NAME +# pprof passes -demangle=false +# RUN: llvm-symbolizer -demangle=false --obj %t.o 0 \ +# RUN: | FileCheck %s --check-prefix=MANGLED_FUNCTION_NAME +# RUN: llvm-symbolizer -demangle=true --obj %t.o 0 \ +# RUN: | FileCheck %s --check-prefix=DEMANGLED_FUNCTION_NAME + # MANGLED_FUNCTION_NAME: _Z1cv # DEMANGLED_FUNCTION_NAME: c() diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td index 66b38924023f2..8be550983fe2a 100644 --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -62,3 +62,6 @@ def : Flag<["-"], "s">, Alias, HelpText<"Alias for --basenames">; // Compatibility aliases for old asan_symbolize.py and sanitizer binaries (before 2020-08). def : Flag<["--"], "inlining=true">, Alias, HelpText<"Alias for --inlines">; def : Flag<["--"], "inlining=false">, Alias, HelpText<"Alias for --no-inlines">; +// Compatibility aliases for pprof's symbolizer. +def : Flag<["-"], "demangle=true">, Alias, HelpText<"Alias for --demangle">; +def : Flag<["-"], "demangle=false">, Alias, HelpText<"Alias for --no-demangle">; From f97019ad6e3a96995dda3f759ee692eb81abcc4c Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 4 Aug 2020 15:19:36 +0300 Subject: [PATCH 436/600] [llvm-readobj/elf] - Add a testing for --stackmap and refine the implementation. Currently, we only test the `--stackmap` option here: https://github.com/llvm/llvm-project/blob/master/llvm/test/Object/stackmap-dump.test it uses a precompiled MachO binary currently and I've found no tests for this option for ELF. The implementation also has issues. For example, it might assert on a wrong version of the .llvm-stackmaps section. Or it might crash on an empty or truncated section. This patch introduces a new tools/llvm-readobj/ELF test file as well as implements a few basic checks to catch simple crashes/issues It also eliminates `unwrapOrError` calls in `printStackMap()`. Differential revision: https://reviews.llvm.org/D85208 --- llvm/include/llvm/Object/StackMapParser.h | 18 ++++ llvm/lib/CodeGen/StackMaps.cpp | 2 +- .../test/tools/llvm-readobj/ELF/stackmap.test | 86 +++++++++++++++++++ llvm/tools/llvm-readobj/COFFDumper.cpp | 4 - llvm/tools/llvm-readobj/ELFDumper.cpp | 34 +++++--- 5 files changed, 125 insertions(+), 19 deletions(-) create mode 100644 llvm/test/tools/llvm-readobj/ELF/stackmap.test diff --git a/llvm/include/llvm/Object/StackMapParser.h b/llvm/include/llvm/Object/StackMapParser.h index b408f40410340..83926c6471c09 100644 --- a/llvm/include/llvm/Object/StackMapParser.h +++ b/llvm/include/llvm/Object/StackMapParser.h @@ -11,6 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Object/ELF.h" #include "llvm/Support/Endian.h" #include #include @@ -318,6 +319,23 @@ class StackMapParser { } } + /// Validates the header of the specified stack map section. + static Error validateHeader(ArrayRef StackMapSection) { + // See the comment for StackMaps::emitStackmapHeader(). + if (StackMapSection.size() < 16) + return object::createError( + "the stack map section size (" + Twine(StackMapSection.size()) + + ") is less than the minimum possible size of its header (16)"); + + unsigned Version = StackMapSection[0]; + if (Version != 3) + return object::createError( + "the version (" + Twine(Version) + + ") of the stack map section is unsupported, the " + "supported version is 3"); + return Error::success(); + } + using function_iterator = AccessorIterator; using constant_iterator = AccessorIterator; using record_iterator = AccessorIterator; diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 1e060ecbeb431..113d477ec80a7 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -404,7 +404,7 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) { /// Emit the stackmap header. /// /// Header { -/// uint8 : Stack Map Version (currently 2) +/// uint8 : Stack Map Version (currently 3) /// uint8 : Reserved (expected to be 0) /// uint16 : Reserved (expected to be 0) /// } diff --git a/llvm/test/tools/llvm-readobj/ELF/stackmap.test b/llvm/test/tools/llvm-readobj/ELF/stackmap.test new file mode 100644 index 0000000000000..22a1bd1bef8dc --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/stackmap.test @@ -0,0 +1,86 @@ +## Here we test how the --stackmap option can be used to dump .llvm_stackmaps sections. + +## Check we are able to dump an empty .llvm_stackmaps section. Document that +## we are only trying to dump the first stack map section and ignore others if any. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-readobj %t --stackmap 2>&1 | \ +# RUN: FileCheck %s --check-prefix=EMPTY --implicit-check-not=warning: +# RUN: llvm-readelf %t --stackmap 2>&1 | \ +# RUN: FileCheck %s --check-prefix=EMPTY --implicit-check-not=warning: + +# EMPTY: LLVM StackMap Version: 3 +# EMPTY-NEXT: Num Functions: 0 +# EMPTY-NEXT: Num Constants: 0 +# EMPTY-NEXT: Num Records: 0 +# EMPTY-NOT: {{.}} + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: [[NAME=.llvm_stackmaps]] + Type: SHT_PROGBITS + ContentArray: [ [[VERSION=0x3]] ] + Size: [[SIZE=16]] + ShSize: [[SHSIZE=]] + ShOffset: [[SHOFFSET=]] +## An arbitrary second broken .llvm_stackmaps section. + - Name: .llvm_stackmaps (1) + Type: SHT_PROGBITS + ContentArray: [ 0xFF ] + Size: 0x1 + +## Hide the first stack map section to allow dumpers to locate and validate the second one, which is broken. +## Check we are able to find it and report a warning properly. + +# RUN: yaml2obj %s -DNAME=.foo -o %t.second +# RUN: llvm-readobj %t.second --stackmap 2>&1 | \ +# RUN: FileCheck %s --check-prefix=SECOND -DFILE=%t.second --implicit-check-not=warning: +# RUN: llvm-readelf %t.second --stackmap 2>&1 | \ +# RUN: FileCheck %s --check-prefix=SECOND -DFILE=%t.second --implicit-check-not=warning: + +# SECOND: warning: '[[FILE]]': unable to read the stack map from SHT_PROGBITS section with index 2: the stack map section size (1) is less than the minimum possible size of its header (16) + +## Check we report a warning when the size of the .llvm_stackmaps section is less +## than the minimum possible size of its header. + +# RUN: yaml2obj %s -DSHSIZE=0 -o %t.trunc0 +# RUN: llvm-readobj %t.trunc0 --stackmap 2>&1 | FileCheck %s -DFILE=%t.trunc0 --check-prefix=TRUNC -DVAL=0 +# RUN: llvm-readelf %t.trunc0 --stackmap 2>&1 | FileCheck %s -DFILE=%t.trunc0 --check-prefix=TRUNC -DVAL=0 + +# RUN: yaml2obj %s -DSIZE=1 -o %t.trunc1 +# RUN: llvm-readobj %t.trunc1 --stackmap 2>&1 | FileCheck %s -DFILE=%t.trunc1 --check-prefix=TRUNC -DVAL=1 +# RUN: llvm-readelf %t.trunc1 --stackmap 2>&1 | FileCheck %s -DFILE=%t.trunc1 --check-prefix=TRUNC -DVAL=1 + +# RUN: yaml2obj %s -DSIZE=15 -o %t.trunc15 +# RUN: llvm-readobj %t.trunc15 --stackmap 2>&1 | FileCheck %s -DFILE=%t.trunc15 --check-prefix=TRUNC -DVAL=15 +# RUN: llvm-readelf %t.trunc15 --stackmap 2>&1 | FileCheck %s -DFILE=%t.trunc15 --check-prefix=TRUNC -DVAL=15 + +# TRUNC: warning: '[[FILE]]': unable to read the stack map from SHT_PROGBITS section with index 1: the stack map section size ([[VAL]]) is less than the minimum possible size of its header (16) + +## Check that we report a warning when the version of the stack map section is not supported. + +# RUN: yaml2obj %s -DVERSION=2 -o %t.ver2 +# RUN: llvm-readobj %t.ver2 --stackmap 2>&1 | \ +# RUN: FileCheck %s --check-prefix=VERSION -DFILE=%t.ver2 --implicit-check-not=warning: -DVERSION=2 +# RUN: llvm-readelf %t.ver2 --stackmap 2>&1 | \ +# RUN: FileCheck %s --check-prefix=VERSION -DFILE=%t.ver2 --implicit-check-not=warning: -DVERSION=2 + +# RUN: yaml2obj %s -DVERSION=4 -o %t.ver4 +# RUN: llvm-readobj %t.ver4 --stackmap 2>&1 | \ +# RUN: FileCheck %s --check-prefix=VERSION -DFILE=%t.ver4 --implicit-check-not=warning: -DVERSION=4 +# RUN: llvm-readelf %t.ver4 --stackmap 2>&1 | \ +# RUN: FileCheck %s --check-prefix=VERSION -DFILE=%t.ver4 --implicit-check-not=warning: -DVERSION=4 + +# VERSION: warning: '[[FILE]]': unable to read the stack map from SHT_PROGBITS section with index 1: the version ([[VERSION]]) of the stack map section is unsupported, the supported version is 3 + +## Check that we report a warning when we are unable to read the content of the stack map section. +# RUN: yaml2obj %s -DSHOFFSET=0xffff -o %t.offset +# RUN: llvm-readobj %t.offset --stackmap 2>&1 | FileCheck %s -DFILE=%t.offset --check-prefix=OFFSET +# RUN: llvm-readelf %t.offset --stackmap 2>&1 | FileCheck %s -DFILE=%t.offset --check-prefix=OFFSET + +# OFFSET: warning: '[[FILE]]': unable to read the stack map from SHT_PROGBITS section with index 1: section [index 1] has a sh_offset (0xffff) + sh_size (0x10) that is greater than the file size (0x1b8) diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index 89a904f53ae7d..39549efc040c2 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -60,10 +60,6 @@ using namespace llvm::codeview; using namespace llvm::support; using namespace llvm::Win64EH; -static inline Error createError(const Twine &Err) { - return make_error(Err, object_error::parse_failed); -} - namespace { struct LoadConfigTables { diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 53ebfd5663c94..78d47b540ab6a 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -3423,24 +3423,30 @@ template void ELFDumper::printMipsOptions() { template void ELFDumper::printStackMap() const { const ELFFile *Obj = ObjF->getELFFile(); - const Elf_Shdr *StackMapSection = nullptr; - for (const Elf_Shdr &Sec : cantFail(Obj->sections())) { - StringRef Name = - unwrapOrError(ObjF->getFileName(), Obj->getSectionName(&Sec)); - if (Name == ".llvm_stackmaps") { - StackMapSection = &Sec; - break; - } - } - + const Elf_Shdr *StackMapSection = findSectionByName(".llvm_stackmaps"); if (!StackMapSection) return; - ArrayRef StackMapContentsArray = unwrapOrError( - ObjF->getFileName(), Obj->getSectionContents(StackMapSection)); + auto Warn = [&](Error &&E) { + this->reportUniqueWarning(createError("unable to read the stack map from " + + describe(*StackMapSection) + ": " + + toString(std::move(E)))); + }; + + Expected> ContentOrErr = + Obj->getSectionContents(StackMapSection); + if (!ContentOrErr) { + Warn(ContentOrErr.takeError()); + return; + } + + if (Error E = StackMapParser::validateHeader( + *ContentOrErr)) { + Warn(std::move(E)); + return; + } - prettyPrintStackMap( - W, StackMapParser(StackMapContentsArray)); + prettyPrintStackMap(W, StackMapParser(*ContentOrErr)); } template void ELFDumper::printGroupSections() { From a3d427d30cd32f218f53e32b58e232ea8312aa50 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Wed, 5 Aug 2020 12:12:45 +0200 Subject: [PATCH 437/600] [mlir] Lower RankOp to LLVM for unranked memrefs. Differential Revision: https://reviews.llvm.org/D85273 --- .../StandardToLLVM/StandardToLLVM.cpp | 23 +++++++++++++++++++ .../StandardToLLVM/convert-to-llvmir.mlir | 23 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index d0b49bb181955..533ac629ba5ae 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -2402,6 +2402,28 @@ struct DimOpLowering : public ConvertOpToLLVMPattern { } }; +struct RankOpLowering : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); + Type operandType = cast(op).memrefOrTensor().getType(); + if (auto unrankedMemRefType = operandType.dyn_cast()) { + UnrankedMemRefDescriptor desc(RankOp::Adaptor(operands).memrefOrTensor()); + rewriter.replaceOp(op, {desc.rank(rewriter, loc)}); + return success(); + } + if (auto rankedMemRefType = operandType.dyn_cast()) { + rewriter.replaceOp( + op, {createIndexConstant(rewriter, loc, rankedMemRefType.getRank())}); + return success(); + } + return failure(); + } +}; + // Common base for load and store operations on MemRefs. Restricts the match // to supported MemRef types. Provides functionality to emit code accessing a // specific element of the underlying data buffer. @@ -3272,6 +3294,7 @@ void mlir::populateStdToLLVMMemoryConversionPatterns( DimOpLowering, LoadOpLowering, MemRefCastOpLowering, + RankOpLowering, StoreOpLowering, SubViewOpLowering, ViewOpLowering, diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir index d0e883a10bee3..6123f68b7e859 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir @@ -1291,3 +1291,26 @@ func @bfloat(%arg0: bf16) -> bf16 { func @memref_index(%arg0: memref<32xindex>) -> memref<32xindex> { return %arg0 : memref<32xindex> } + +// ----- + +// CHECK-LABEL: func @rank_of_unranked +// CHECK32-LABEL: func @rank_of_unranked +func @rank_of_unranked(%unranked: memref<*xi32>) { + %rank = rank %unranked : memref<*xi32> + return +} +// CHECK-NEXT: llvm.mlir.undef +// CHECK-NEXT: llvm.insertvalue +// CHECK-NEXT: llvm.insertvalue +// CHECK-NEXT: llvm.extractvalue %{{.*}}[0] : !llvm<"{ i64, i8* }"> +// CHECK32: llvm.extractvalue %{{.*}}[0] : !llvm<"{ i64, i8* }"> + +// CHECK-LABEL: func @rank_of_ranked +// CHECK32-LABEL: func @rank_of_ranked +func @rank_of_ranked(%ranked: memref) { + %rank = rank %ranked : memref + return +} +// CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK32: llvm.mlir.constant(1 : index) : !llvm.i32 From fab4b59961aa35109861493dfe071979d56b4360 Mon Sep 17 00:00:00 2001 From: "Arpith C. Jacob" Date: Wed, 5 Aug 2020 12:16:17 +0200 Subject: [PATCH 438/600] [mlir] Conversion of ViewOp with memory space to LLVM. Handle the case where the ViewOp takes in a memref that has an memory space. Reviewed By: ftynse, bondhugula, nicolasvasilache Differential Revision: https://reviews.llvm.org/D85048 --- .../StandardToLLVM/StandardToLLVM.cpp | 7 ++++-- .../StandardToLLVM/convert-to-llvmir.mlir | 22 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 533ac629ba5ae..2ada7c4256009 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -2960,8 +2960,10 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { // Field 1: Copy the allocated pointer, used for malloc/free. Value allocatedPtr = sourceMemRef.allocatedPtr(rewriter, loc); + auto srcMemRefType = viewOp.source().getType().cast(); Value bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(), allocatedPtr); + loc, targetElementTy.getPointerTo(srcMemRefType.getMemorySpace()), + allocatedPtr); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); // Field 2: Copy the actual aligned pointer to payload. @@ -2969,7 +2971,8 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { alignedPtr = rewriter.create(loc, alignedPtr.getType(), alignedPtr, adaptor.byte_shift()); bitcastPtr = rewriter.create( - loc, targetElementTy.getPointerTo(), alignedPtr); + loc, targetElementTy.getPointerTo(srcMemRefType.getMemorySpace()), + alignedPtr); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); // Field 3: The offset in the resulting type must be 0. This is because of diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir index 6123f68b7e859..9042bf36c1b3b 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir @@ -824,6 +824,28 @@ func @view(%arg0 : index, %arg1 : index, %arg2 : index) { // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> %5 = view %0[%arg2][] : memref<2048xi8> to memref<64x4xf32> + // Test view memory space. + // CHECK: llvm.mlir.constant(2048 : index) : !llvm.i64 + // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %6 = alloc() : memref<2048xi8, 4> + + // CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[BASE_PTR_4:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: %[[SHIFTED_BASE_PTR_4:.*]] = llvm.getelementptr %[[BASE_PTR_4]][%[[ARG2]]] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + // CHECK: %[[CAST_SHIFTED_BASE_PTR_4:.*]] = llvm.bitcast %[[SHIFTED_BASE_PTR_4]] : !llvm.ptr to !llvm.ptr + // CHECK: llvm.insertvalue %[[CAST_SHIFTED_BASE_PTR_4]], %{{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: %[[C0_4:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 + // CHECK: llvm.insertvalue %[[C0_4]], %{{.*}}[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.mlir.constant(64 : index) : !llvm.i64 + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 + // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> + %7 = view %6[%arg2][] : memref<2048xi8, 4> to memref<64x4xf32, 4> + return } From 188187f062a56604a4339f16f139e3cc720ba2bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= Date: Wed, 5 Aug 2020 12:18:38 +0200 Subject: [PATCH 439/600] [lldb] expect TestGuiBasicDebug.py failure on aarch64 http://lab.llvm.org:8011/builders/lldb-aarch64-ubuntu/builds/7287/steps/test/logs/stdio fails, and the output suggests that gui 'finish' (='thread step-out') is broken on aarch64. --- lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py b/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py index 76d9d3bdc4638..5c3d0d7369b45 100644 --- a/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py +++ b/lldb/test/API/commands/gui/basicdebug/TestGuiBasicDebug.py @@ -16,6 +16,7 @@ class TestGuiBasicDebugCommandTest(PExpectTest): @skipIfAsan @skipIfCursesSupportMissing @skipIfRemote # "run" command will not work correctly for remote debug + @expectedFailureAll(archs=["aarch64"], oslist=["linux"]) def test_gui(self): self.build() From 4aaf301fb8f4f666d935fe78ef5d74592eee5fc1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 5 Aug 2020 11:22:07 +0100 Subject: [PATCH 440/600] [DAG] Fold vector (aext (load x)) -> (zext (truncate (zextload x))) We currently don't do anything to fold any_extend vector loads as no target has such an instruction. Instead I've added support for folding to a zextload, SimplifyDemandedBits does a good job of adjusting the zext(truncate(()) stages as required later on. We still need the custom scalar extload handling instead of using the tryToFoldExtOfLoad helper as it has different legality tests - we can probably tweak that to reduce most of the code duplication. Fixes the regression I mentioned in rG99a971cadff7 Differential Revision: https://reviews.llvm.org/D85129 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 30 +++++++++++-------- .../CodeGen/X86/avx512-any_extend_load.ll | 26 +++++----------- .../X86/avx512-shuffles/partial_permute.ll | 5 ++-- llvm/test/CodeGen/X86/vector-mul.ll | 3 +- 4 files changed, 27 insertions(+), 37 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b22d978d37366..b2077f47d4a39 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10606,22 +10606,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (load x)) -> (aext (truncate (extload x))) // None of the supported targets knows how to perform load and any_ext - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + // on vectors in one instruction, so attempt to fold to zext instead. + if (VT.isVector()) { + // Try to simplify (zext (load x)). + if (SDValue foldedExt = + tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, + ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) + return foldedExt; + } else if (ISD::isNON_EXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; - SmallVector SetCCs; + SmallVector SetCCs; if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, - TLI); + DoXform = + ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, - LN0->getChain(), - LN0->getBasePtr(), N0.getValueType(), - LN0->getMemOperand()); + LN0->getChain(), LN0->getBasePtr(), + N0.getValueType(), LN0->getMemOperand()); ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); // If the load value is used only by N, replace it via CombineTo N. bool NoReplaceTrunc = N0.hasOneUse(); @@ -10630,8 +10634,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); recursivelyDeleteUnusedNodes(LN0); } else { - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad); + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(LN0, Trunc, ExtLoad.getValue(1)); } return SDValue(N, 0); // Return N so it doesn't get rechecked! diff --git a/llvm/test/CodeGen/X86/avx512-any_extend_load.ll b/llvm/test/CodeGen/X86/avx512-any_extend_load.ll index 99ebd4baffcca..db5f8c244eee2 100644 --- a/llvm/test/CodeGen/X86/avx512-any_extend_load.ll +++ b/llvm/test/CodeGen/X86/avx512-any_extend_load.ll @@ -4,25 +4,13 @@ define void @any_extend_load_v8i64(<8 x i8> * %ptr) { -; KNL-LABEL: any_extend_load_v8i64: -; KNL: # %bb.0: -; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero -; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] -; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero -; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; KNL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; KNL-NEXT: vpmovqb %zmm0, (%rdi) -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: any_extend_load_v8i64: -; SKX: # %bb.0: -; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero -; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; SKX-NEXT: vpmovqb %zmm0, (%rdi) -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq +; ALL-LABEL: any_extend_load_v8i64: +; ALL: # %bb.0: +; ALL-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero +; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; ALL-NEXT: vpmovqb %zmm0, (%rdi) +; ALL-NEXT: vzeroupper +; ALL-NEXT: retq %wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1 %1 = zext <8 x i8> %wide.load to <8 x i64> %2 = add nuw nsw <8 x i64> %1, diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 050269c2ba2f3..c445a522bf2fd 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -4368,9 +4368,8 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) { ; CHECK-LABEL: test_zext_v8i8_to_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovq (%rdi), %xmm0 # xmm0 = mem[0],zero -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; CHECK-NEXT: vpmovzxbw (%rdi), %xmm0 # xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, (%rsi) ; CHECK-NEXT: retq %tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0 diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index d93ac61592f29..805ff9f69ed5e 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -1904,8 +1904,7 @@ define <2 x i64> @mul_v2i64_zext_cross_bb(<2 x i32>* %in, <2 x i32>* %y) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0,0,1,1] +; X86-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; X86-NEXT: pmuludq %xmm1, %xmm0 ; X86-NEXT: retl ; From 927fc536ca225568f2ae853dddbb58e8712b1fbf Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Wed, 5 Aug 2020 11:21:34 +0100 Subject: [PATCH 441/600] [SVE] Add lowering for fixed length vector and, or & xor operations. Since there are no ill effects when performing these operations with undefined elements, they are lowered to the already supported unpredicated scalable vector equivalents. Differential Revision: https://reviews.llvm.org/D85117 --- .../Target/AArch64/AArch64ISelLowering.cpp | 39 +- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../AArch64/sve-fixed-length-int-log.ll | 1042 +++++++++++++++++ 3 files changed, 1081 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 13d6a0b882343..9b78428978ec6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1076,14 +1076,17 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) { // Lower fixed length vector operations to scalable equivalents. setOperationAction(ISD::ADD, VT, Custom); + setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::FADD, VT, Custom); setOperationAction(ISD::FDIV, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); setOperationAction(ISD::FMUL, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::OR, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::XOR, VT, Custom); } void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { @@ -2544,7 +2547,10 @@ SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result; } -static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { +SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const { + if (useSVEForFixedLengthVectorVT(Op.getValueType())) + return LowerToScalableOp(Op, DAG); + SDValue Sel = Op.getOperand(0); SDValue Other = Op.getOperand(1); SDLoc dl(Sel); @@ -3611,6 +3617,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, if (useSVEForFixedLengthVectorVT(Op.getValueType())) return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); llvm_unreachable("Unexpected request to lower ISD::ADD"); + case ISD::AND: + return LowerToScalableOp(Op, DAG); } } @@ -8284,6 +8292,9 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, SelectionDAG &DAG) const { + if (useSVEForFixedLengthVectorVT(Op.getValueType())) + return LowerToScalableOp(Op, DAG); + // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2)) if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG)) return Res; @@ -15237,6 +15248,8 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE( return convertFromScalableVector(DAG, VT, Val); } +// Convert vector operation 'Op' to an equivalent predicated operation whereby +// the original operation's type is used to construct a suitable predicate. SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const { @@ -15247,7 +15260,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, if (useSVEForFixedLengthVectorVT(VT)) { EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); - // Create list of operands by convereting existing ones to scalable types. + // Create list of operands by converting existing ones to scalable types. SmallVector Operands = {Pg}; for (const SDValue &V : Op->op_values()) { if (isa(V)) { @@ -15275,3 +15288,25 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op, return DAG.getNode(NewOp, DL, VT, Operands); } + +// If a fixed length vector operation has no side effects when applied to +// undefined elements, we can safely use scalable vectors to perform the same +// operation without needing to worry about predication. +SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + assert(useSVEForFixedLengthVectorVT(VT) && + "Only expected to lower fixed length vector operation!"); + EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); + + // Create list of operands by converting existing ones to scalable types. + SmallVector Ops; + for (const SDValue &V : Op->op_values()) { + assert(useSVEForFixedLengthVectorVT(V.getValueType()) && + "Only fixed length vectors are supported!"); + Ops.push_back(convertToScalableVector(DAG, ContainerVT, V)); + } + + auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops); + return convertFromScalableVector(DAG, VT, ScalableRes); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 72c9e69ce7b84..08f00361bf609 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -861,6 +861,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; + SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; @@ -878,6 +879,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll new file mode 100644 index 0000000000000..7c1095b920093 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll @@ -0,0 +1,1042 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512,VBITS_LE_256 +; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512,VBITS_LE_256 +; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024 +; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK + +; VBYTES represents the useful byte size of a vector register from the code +; generator's point of view. It is clamped to power-of-2 values because +; only power-of-2 vector lengths are considered legal, regardless of the +; user specified vector length. + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +; Don't use SVE for 64-bit vectors. +define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { +; CHECK-LABEL: @and_v8i8 +; CHECK: and v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = and <8 x i8> %op1, %op2 + ret <8 x i8> %res +} + +; Don't use SVE for 128-bit vectors. +define <16 x i8> @and_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { +; CHECK-LABEL: @and_v16i8 +; CHECK: and v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = and <16 x i8> %op1, %op2 + ret <16 x i8> %res +} + +define void @and_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 { +; CHECK-LABEL: @and_v32i8 +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,32)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i8>, <32 x i8>* %a + %op2 = load <32 x i8>, <32 x i8>* %b + %res = and <32 x i8> %op1, %op2 + store <32 x i8> %res, <32 x i8>* %a + ret void +} + +define void @and_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { +; CHECK-LABEL: @and_v64i8 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,64)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK-DAG: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK-DAG: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_LE_256-DAG: mov w[[OFF_1:[0-9]+]], #[[#VBYTES]] +; VBITS_LE_256-DAG: ld1b { [[OP1_1:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_1]]] +; VBITS_LE_256-DAG: ld1b { [[OP2_1:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_1]]] +; VBITS_LE_256-DAG: and [[RES_1:z[0-9]+]].d, [[OP1_1]].d, [[OP2_1]].d +; VBITS_LE_256-DAG: st1b { [[RES_1]].b }, [[PG]], [x0, x[[OFF_1]]] +; CHECK: ret + %op1 = load <64 x i8>, <64 x i8>* %a + %op2 = load <64 x i8>, <64 x i8>* %b + %res = and <64 x i8> %op1, %op2 + store <64 x i8> %res, <64 x i8>* %a + ret void +} + +define void @and_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 { +; CHECK-LABEL: @and_v128i8 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,128)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK-DAG: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK-DAG: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_LE_512-DAG: mov w[[OFF_1:[0-9]+]], #[[#VBYTES]] +; VBITS_LE_512-DAG: ld1b { [[OP1_1:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_1]]] +; VBITS_LE_512-DAG: ld1b { [[OP2_1:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_1]]] +; VBITS_LE_512-DAG: and [[RES_1:z[0-9]+]].d, [[OP1_1]].d, [[OP2_1]].d +; VBITS_LE_512-DAG: st1b { [[RES_1]].b }, [[PG]], [x0, x[[OFF_1]]] +; VBITS_LE_256-DAG: mov w[[OFF_2:[0-9]+]], #[[#mul(VBYTES,2)]] +; VBITS_LE_256-DAG: ld1b { [[OP1_2:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_2]]] +; VBITS_LE_256-DAG: ld1b { [[OP2_2:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_2]]] +; VBITS_LE_256-DAG: and [[RES_2:z[0-9]+]].d, [[OP1_2]].d, [[OP2_2]].d +; VBITS_LE_256-DAG: st1b { [[RES_2]].b }, [[PG]], [x0, x[[OFF_2]]] +; VBITS_LE_256-DAG: mov w[[OFF_3:[0-9]+]], #[[#mul(VBYTES,3)]] +; VBITS_LE_256-DAG: ld1b { [[OP1_3:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_3]]] +; VBITS_LE_256-DAG: ld1b { [[OP2_3:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_3]]] +; VBITS_LE_256-DAG: and [[RES_3:z[0-9]+]].d, [[OP1_3]].d, [[OP2_3]].d +; VBITS_LE_256-DAG: st1b { [[RES_3]].b }, [[PG]], [x0, x[[OFF_3]]] +; CHECK: ret + %op1 = load <128 x i8>, <128 x i8>* %a + %op2 = load <128 x i8>, <128 x i8>* %b + %res = and <128 x i8> %op1, %op2 + store <128 x i8> %res, <128 x i8>* %a + ret void +} + +define void @and_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 { +; CHECK-LABEL: @and_v256i8 +; CHECK-DAG: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,256)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK-DAG: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK-DAG: st1b { [[RES]].b }, [[PG]], [x0] +; VBITS_LE_1024-DAG: mov w[[OFF_1:[0-9]+]], #[[#VBYTES]] +; VBITS_LE_1024-DAG: ld1b { [[OP1_1:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_1]]] +; VBITS_LE_1024-DAG: ld1b { [[OP2_1:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_1]]] +; VBITS_LE_1024-DAG: and [[RES_1:z[0-9]+]].d, [[OP1_1]].d, [[OP2_1]].d +; VBITS_LE_1024-DAG: st1b { [[RES_1]].b }, [[PG]], [x0, x[[OFF_1]]] +; VBITS_LE_512-DAG: mov w[[OFF_2:[0-9]+]], #[[#mul(VBYTES,2)]] +; VBITS_LE_512-DAG: ld1b { [[OP1_2:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_2]]] +; VBITS_LE_512-DAG: ld1b { [[OP2_2:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_2]]] +; VBITS_LE_512-DAG: and [[RES_2:z[0-9]+]].d, [[OP1_2]].d, [[OP2_2]].d +; VBITS_LE_512-DAG: st1b { [[RES_2]].b }, [[PG]], [x0, x[[OFF_2]]] +; VBITS_LE_512-DAG: mov w[[OFF_3:[0-9]+]], #[[#mul(VBYTES,3)]] +; VBITS_LE_512-DAG: ld1b { [[OP1_3:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_3]]] +; VBITS_LE_512-DAG: ld1b { [[OP2_3:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_3]]] +; VBITS_LE_512-DAG: and [[RES_3:z[0-9]+]].d, [[OP1_3]].d, [[OP2_3]].d +; VBITS_LE_512-DAG: st1b { [[RES_3]].b }, [[PG]], [x0, x[[OFF_3]]] +; VBITS_LE_256-DAG: mov w[[OFF_4:[0-9]+]], #[[#mul(VBYTES,4)]] +; VBITS_LE_256-DAG: ld1b { [[OP1_4:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_4]]] +; VBITS_LE_256-DAG: ld1b { [[OP2_4:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_4]]] +; VBITS_LE_256-DAG: and [[RES_4:z[0-9]+]].d, [[OP1_4]].d, [[OP2_4]].d +; VBITS_LE_256-DAG: st1b { [[RES_4]].b }, [[PG]], [x0, x[[OFF_4]]] +; VBITS_LE_256-DAG: mov w[[OFF_5:[0-9]+]], #[[#mul(VBYTES,5)]] +; VBITS_LE_256-DAG: ld1b { [[OP1_5:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_5]]] +; VBITS_LE_256-DAG: ld1b { [[OP2_5:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_5]]] +; VBITS_LE_256-DAG: and [[RES_5:z[0-9]+]].d, [[OP1_5]].d, [[OP2_5]].d +; VBITS_LE_256-DAG: st1b { [[RES_5]].b }, [[PG]], [x0, x[[OFF_5]]] +; VBITS_LE_256-DAG: mov w[[OFF_6:[0-9]+]], #[[#mul(VBYTES,6)]] +; VBITS_LE_256-DAG: ld1b { [[OP1_6:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_6]]] +; VBITS_LE_256-DAG: ld1b { [[OP2_6:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_6]]] +; VBITS_LE_256-DAG: and [[RES_6:z[0-9]+]].d, [[OP1_6]].d, [[OP2_6]].d +; VBITS_LE_256-DAG: st1b { [[RES_6]].b }, [[PG]], [x0, x[[OFF_6]]] +; VBITS_LE_256-DAG: mov w[[OFF_7:[0-9]+]], #[[#mul(VBYTES,7)]] +; VBITS_LE_256-DAG: ld1b { [[OP1_7:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_7]]] +; VBITS_LE_256-DAG: ld1b { [[OP2_7:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_7]]] +; VBITS_LE_256-DAG: and [[RES_7:z[0-9]+]].d, [[OP1_7]].d, [[OP2_7]].d +; VBITS_LE_256-DAG: st1b { [[RES_7]].b }, [[PG]], [x0, x[[OFF_7]]] +; CHECK: ret + %op1 = load <256 x i8>, <256 x i8>* %a + %op2 = load <256 x i8>, <256 x i8>* %b + %res = and <256 x i8> %op1, %op2 + store <256 x i8> %res, <256 x i8>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { +; CHECK-LABEL: @and_v4i16 +; CHECK: and v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = and <4 x i16> %op1, %op2 + ret <4 x i16> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x i16> @and_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { +; CHECK-LABEL: @and_v8i16 +; CHECK: and v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = and <8 x i16> %op1, %op2 + ret <8 x i16> %res +} + +define void @and_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 { +; CHECK-LABEL: @and_v16i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i16>, <16 x i16>* %a + %op2 = load <16 x i16>, <16 x i16>* %b + %res = and <16 x i16> %op1, %op2 + store <16 x i16> %res, <16 x i16>* %a + ret void +} + +; NOTE: Check lines only cover the first VBYTES because the and_v#i8 tests +; already cover the general legalisation cases. +define void @and_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 { +; CHECK-LABEL: @and_v32i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),32)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i16>, <32 x i16>* %a + %op2 = load <32 x i16>, <32 x i16>* %b + %res = and <32 x i16> %op1, %op2 + store <32 x i16> %res, <32 x i16>* %a + ret void +} + +; NOTE: Check lines only cover the first VBYTES because the and_v#i8 tests +; already cover the general legalisation cases. +define void @and_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 { +; CHECK-LABEL: @and_v64i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),64)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i16>, <64 x i16>* %a + %op2 = load <64 x i16>, <64 x i16>* %b + %res = and <64 x i16> %op1, %op2 + store <64 x i16> %res, <64 x i16>* %a + ret void +} + +; NOTE: Check lines only cover the first VBYTES because the and_v#i8 tests +; already cover the general legalisation cases. +define void @and_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 { +; CHECK-LABEL: @and_v128i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),128)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <128 x i16>, <128 x i16>* %a + %op2 = load <128 x i16>, <128 x i16>* %b + %res = and <128 x i16> %op1, %op2 + store <128 x i16> %res, <128 x i16>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 { +; CHECK-LABEL: @and_v2i32 +; CHECK: and v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = and <2 x i32> %op1, %op2 + ret <2 x i32> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x i32> @and_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 { +; CHECK-LABEL: @and_v4i32 +; CHECK: and v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = and <4 x i32> %op1, %op2 + ret <4 x i32> %res +} + +define void @and_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 { +; CHECK-LABEL: @and_v8i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <8 x i32>, <8 x i32>* %a + %op2 = load <8 x i32>, <8 x i32>* %b + %res = and <8 x i32> %op1, %op2 + store <8 x i32> %res, <8 x i32>* %a + ret void +} + +; NOTE: Check lines only cover the first VBYTES because the and_v#i8 tests +; already cover the general legalisation cases. +define void @and_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 { +; CHECK-LABEL: @and_v16i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = and <16 x i32> %op1, %op2 + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +; NOTE: Check lines only cover the first VBYTES because the and_v#i8 tests +; already cover the general legalisation cases. +define void @and_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 { +; CHECK-LABEL: @and_v32i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i32>, <32 x i32>* %a + %op2 = load <32 x i32>, <32 x i32>* %b + %res = and <32 x i32> %op1, %op2 + store <32 x i32> %res, <32 x i32>* %a + ret void +} + +; NOTE: Check lines only cover the first VBYTES because the and_v#i8 tests +; already cover the general legalisation cases. +define void @and_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 { +; CHECK-LABEL: @and_v64i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i32>, <64 x i32>* %a + %op2 = load <64 x i32>, <64 x i32>* %b + %res = and <64 x i32> %op1, %op2 + store <64 x i32> %res, <64 x i32>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 { +; CHECK-LABEL: @and_v1i64 +; CHECK: and v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = and <1 x i64> %op1, %op2 + ret <1 x i64> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x i64> @and_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 { +; CHECK-LABEL: @and_v2i64 +; CHECK: and v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = and <2 x i64> %op1, %op2 + ret <2 x i64> %res +} + +define void @and_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 { +; CHECK-LABEL: @and_v4i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <4 x i64>, <4 x i64>* %a + %op2 = load <4 x i64>, <4 x i64>* %b + %res = and <4 x i64> %op1, %op2 + store <4 x i64> %res, <4 x i64>* %a + ret void +} + +; NOTE: Check lines only cover the first VBYTES because the and_v#i8 tests +; already cover the general legalisation cases. +define void @and_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 { +; CHECK-LABEL: @and_v8i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),8)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <8 x i64>, <8 x i64>* %a + %op2 = load <8 x i64>, <8 x i64>* %b + %res = and <8 x i64> %op1, %op2 + store <8 x i64> %res, <8 x i64>* %a + ret void +} + +; NOTE: Check lines only cover the first VBYTES because the and_v#i8 tests +; already cover the general legalisation cases. +define void @and_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 { +; CHECK-LABEL: @and_v16i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),16)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i64>, <16 x i64>* %a + %op2 = load <16 x i64>, <16 x i64>* %b + %res = and <16 x i64> %op1, %op2 + store <16 x i64> %res, <16 x i64>* %a + ret void +} + +; NOTE: Check lines only cover the first VBYTES because the and_v#i8 tests +; already cover the general legalisation cases. +define void @and_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 { +; CHECK-LABEL: @and_v32i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),32)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: and [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i64>, <32 x i64>* %a + %op2 = load <32 x i64>, <32 x i64>* %b + %res = and <32 x i64> %op1, %op2 + store <32 x i64> %res, <32 x i64>* %a + ret void +} + +; +; NOTE: Tests beyond this point only have CHECK lines to validate the first +; VBYTES because the and tests already validate the legalisation code paths. +; + +; Don't use SVE for 64-bit vectors. +define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { +; CHECK-LABEL: @or_v8i8 +; CHECK: orr v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = or <8 x i8> %op1, %op2 + ret <8 x i8> %res +} + +; Don't use SVE for 128-bit vectors. +define <16 x i8> @or_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { +; CHECK-LABEL: @or_v16i8 +; CHECK: orr v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = or <16 x i8> %op1, %op2 + ret <16 x i8> %res +} + +define void @or_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 { +; CHECK-LABEL: @or_v32i8 +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,32)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i8>, <32 x i8>* %a + %op2 = load <32 x i8>, <32 x i8>* %b + %res = or <32 x i8> %op1, %op2 + store <32 x i8> %res, <32 x i8>* %a + ret void +} + +define void @or_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { +; CHECK-LABEL: @or_v64i8 +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,64)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i8>, <64 x i8>* %a + %op2 = load <64 x i8>, <64 x i8>* %b + %res = or <64 x i8> %op1, %op2 + store <64 x i8> %res, <64 x i8>* %a + ret void +} + +define void @or_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 { +; CHECK-LABEL: @or_v128i8 +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,128)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <128 x i8>, <128 x i8>* %a + %op2 = load <128 x i8>, <128 x i8>* %b + %res = or <128 x i8> %op1, %op2 + store <128 x i8> %res, <128 x i8>* %a + ret void +} + +define void @or_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 { +; CHECK-LABEL: @or_v256i8 +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,256)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <256 x i8>, <256 x i8>* %a + %op2 = load <256 x i8>, <256 x i8>* %b + %res = or <256 x i8> %op1, %op2 + store <256 x i8> %res, <256 x i8>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { +; CHECK-LABEL: @or_v4i16 +; CHECK: orr v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = or <4 x i16> %op1, %op2 + ret <4 x i16> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x i16> @or_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { +; CHECK-LABEL: @or_v8i16 +; CHECK: orr v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = or <8 x i16> %op1, %op2 + ret <8 x i16> %res +} + +define void @or_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 { +; CHECK-LABEL: @or_v16i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i16>, <16 x i16>* %a + %op2 = load <16 x i16>, <16 x i16>* %b + %res = or <16 x i16> %op1, %op2 + store <16 x i16> %res, <16 x i16>* %a + ret void +} + +define void @or_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 { +; CHECK-LABEL: @or_v32i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),32)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i16>, <32 x i16>* %a + %op2 = load <32 x i16>, <32 x i16>* %b + %res = or <32 x i16> %op1, %op2 + store <32 x i16> %res, <32 x i16>* %a + ret void +} + +define void @or_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 { +; CHECK-LABEL: @or_v64i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),64)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i16>, <64 x i16>* %a + %op2 = load <64 x i16>, <64 x i16>* %b + %res = or <64 x i16> %op1, %op2 + store <64 x i16> %res, <64 x i16>* %a + ret void +} + +define void @or_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 { +; CHECK-LABEL: @or_v128i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),128)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <128 x i16>, <128 x i16>* %a + %op2 = load <128 x i16>, <128 x i16>* %b + %res = or <128 x i16> %op1, %op2 + store <128 x i16> %res, <128 x i16>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 { +; CHECK-LABEL: @or_v2i32 +; CHECK: orr v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = or <2 x i32> %op1, %op2 + ret <2 x i32> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x i32> @or_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 { +; CHECK-LABEL: @or_v4i32 +; CHECK: orr v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = or <4 x i32> %op1, %op2 + ret <4 x i32> %res +} + +define void @or_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 { +; CHECK-LABEL: @or_v8i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <8 x i32>, <8 x i32>* %a + %op2 = load <8 x i32>, <8 x i32>* %b + %res = or <8 x i32> %op1, %op2 + store <8 x i32> %res, <8 x i32>* %a + ret void +} + +define void @or_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 { +; CHECK-LABEL: @or_v16i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = or <16 x i32> %op1, %op2 + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @or_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 { +; CHECK-LABEL: @or_v32i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i32>, <32 x i32>* %a + %op2 = load <32 x i32>, <32 x i32>* %b + %res = or <32 x i32> %op1, %op2 + store <32 x i32> %res, <32 x i32>* %a + ret void +} + +define void @or_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 { +; CHECK-LABEL: @or_v64i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i32>, <64 x i32>* %a + %op2 = load <64 x i32>, <64 x i32>* %b + %res = or <64 x i32> %op1, %op2 + store <64 x i32> %res, <64 x i32>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 { +; CHECK-LABEL: @or_v1i64 +; CHECK: orr v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = or <1 x i64> %op1, %op2 + ret <1 x i64> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x i64> @or_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 { +; CHECK-LABEL: @or_v2i64 +; CHECK: orr v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = or <2 x i64> %op1, %op2 + ret <2 x i64> %res +} + +define void @or_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 { +; CHECK-LABEL: @or_v4i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <4 x i64>, <4 x i64>* %a + %op2 = load <4 x i64>, <4 x i64>* %b + %res = or <4 x i64> %op1, %op2 + store <4 x i64> %res, <4 x i64>* %a + ret void +} + +define void @or_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 { +; CHECK-LABEL: @or_v8i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),8)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <8 x i64>, <8 x i64>* %a + %op2 = load <8 x i64>, <8 x i64>* %b + %res = or <8 x i64> %op1, %op2 + store <8 x i64> %res, <8 x i64>* %a + ret void +} + +define void @or_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 { +; CHECK-LABEL: @or_v16i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),16)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i64>, <16 x i64>* %a + %op2 = load <16 x i64>, <16 x i64>* %b + %res = or <16 x i64> %op1, %op2 + store <16 x i64> %res, <16 x i64>* %a + ret void +} + +define void @or_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 { +; CHECK-LABEL: @or_v32i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),32)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: orr [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i64>, <32 x i64>* %a + %op2 = load <32 x i64>, <32 x i64>* %b + %res = or <32 x i64> %op1, %op2 + store <32 x i64> %res, <32 x i64>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { +; CHECK-LABEL: @xor_v8i8 +; CHECK: eor v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = xor <8 x i8> %op1, %op2 + ret <8 x i8> %res +} + +; Don't use SVE for 128-bit vectors. +define <16 x i8> @xor_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { +; CHECK-LABEL: @xor_v16i8 +; CHECK: eor v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = xor <16 x i8> %op1, %op2 + ret <16 x i8> %res +} + +define void @xor_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 { +; CHECK-LABEL: @xor_v32i8 +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,32)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i8>, <32 x i8>* %a + %op2 = load <32 x i8>, <32 x i8>* %b + %res = xor <32 x i8> %op1, %op2 + store <32 x i8> %res, <32 x i8>* %a + ret void +} + +define void @xor_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { +; CHECK-LABEL: @xor_v64i8 +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,64)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i8>, <64 x i8>* %a + %op2 = load <64 x i8>, <64 x i8>* %b + %res = xor <64 x i8> %op1, %op2 + store <64 x i8> %res, <64 x i8>* %a + ret void +} + +define void @xor_v128i8(<128 x i8>* %a, <128 x i8>* %b) #0 { +; CHECK-LABEL: @xor_v128i8 +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,128)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <128 x i8>, <128 x i8>* %a + %op2 = load <128 x i8>, <128 x i8>* %b + %res = xor <128 x i8> %op1, %op2 + store <128 x i8> %res, <128 x i8>* %a + ret void +} + +define void @xor_v256i8(<256 x i8>* %a, <256 x i8>* %b) #0 { +; CHECK-LABEL: @xor_v256i8 +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,256)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK-DAG: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <256 x i8>, <256 x i8>* %a + %op2 = load <256 x i8>, <256 x i8>* %b + %res = xor <256 x i8> %op1, %op2 + store <256 x i8> %res, <256 x i8>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { +; CHECK-LABEL: @xor_v4i16 +; CHECK: eor v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = xor <4 x i16> %op1, %op2 + ret <4 x i16> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x i16> @xor_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { +; CHECK-LABEL: @xor_v8i16 +; CHECK: eor v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = xor <8 x i16> %op1, %op2 + ret <8 x i16> %res +} + +define void @xor_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 { +; CHECK-LABEL: @xor_v16i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i16>, <16 x i16>* %a + %op2 = load <16 x i16>, <16 x i16>* %b + %res = xor <16 x i16> %op1, %op2 + store <16 x i16> %res, <16 x i16>* %a + ret void +} + +define void @xor_v32i16(<32 x i16>* %a, <32 x i16>* %b) #0 { +; CHECK-LABEL: @xor_v32i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),32)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i16>, <32 x i16>* %a + %op2 = load <32 x i16>, <32 x i16>* %b + %res = xor <32 x i16> %op1, %op2 + store <32 x i16> %res, <32 x i16>* %a + ret void +} + +define void @xor_v64i16(<64 x i16>* %a, <64 x i16>* %b) #0 { +; CHECK-LABEL: @xor_v64i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),64)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i16>, <64 x i16>* %a + %op2 = load <64 x i16>, <64 x i16>* %b + %res = xor <64 x i16> %op1, %op2 + store <64 x i16> %res, <64 x i16>* %a + ret void +} + +define void @xor_v128i16(<128 x i16>* %a, <128 x i16>* %b) #0 { +; CHECK-LABEL: @xor_v128i16 +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),128)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <128 x i16>, <128 x i16>* %a + %op2 = load <128 x i16>, <128 x i16>* %b + %res = xor <128 x i16> %op1, %op2 + store <128 x i16> %res, <128 x i16>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) #0 { +; CHECK-LABEL: @xor_v2i32 +; CHECK: eor v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = xor <2 x i32> %op1, %op2 + ret <2 x i32> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x i32> @xor_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 { +; CHECK-LABEL: @xor_v4i32 +; CHECK: eor v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = xor <4 x i32> %op1, %op2 + ret <4 x i32> %res +} + +define void @xor_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 { +; CHECK-LABEL: @xor_v8i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <8 x i32>, <8 x i32>* %a + %op2 = load <8 x i32>, <8 x i32>* %b + %res = xor <8 x i32> %op1, %op2 + store <8 x i32> %res, <8 x i32>* %a + ret void +} + +define void @xor_v16i32(<16 x i32>* %a, <16 x i32>* %b) #0 { +; CHECK-LABEL: @xor_v16i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %op2 = load <16 x i32>, <16 x i32>* %b + %res = xor <16 x i32> %op1, %op2 + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @xor_v32i32(<32 x i32>* %a, <32 x i32>* %b) #0 { +; CHECK-LABEL: @xor_v32i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i32>, <32 x i32>* %a + %op2 = load <32 x i32>, <32 x i32>* %b + %res = xor <32 x i32> %op1, %op2 + store <32 x i32> %res, <32 x i32>* %a + ret void +} + +define void @xor_v64i32(<64 x i32>* %a, <64 x i32>* %b) #0 { +; CHECK-LABEL: @xor_v64i32 +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK-DAG: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i32>, <64 x i32>* %a + %op2 = load <64 x i32>, <64 x i32>* %b + %res = xor <64 x i32> %op1, %op2 + store <64 x i32> %res, <64 x i32>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 { +; CHECK-LABEL: @xor_v1i64 +; CHECK: eor v0.8b, v0.8b, v1.8b +; CHECK: ret + %res = xor <1 x i64> %op1, %op2 + ret <1 x i64> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x i64> @xor_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 { +; CHECK-LABEL: @xor_v2i64 +; CHECK: eor v0.16b, v0.16b, v1.16b +; CHECK: ret + %res = xor <2 x i64> %op1, %op2 + ret <2 x i64> %res +} + +define void @xor_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 { +; CHECK-LABEL: @xor_v4i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <4 x i64>, <4 x i64>* %a + %op2 = load <4 x i64>, <4 x i64>* %b + %res = xor <4 x i64> %op1, %op2 + store <4 x i64> %res, <4 x i64>* %a + ret void +} + +define void @xor_v8i64(<8 x i64>* %a, <8 x i64>* %b) #0 { +; CHECK-LABEL: @xor_v8i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),8)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <8 x i64>, <8 x i64>* %a + %op2 = load <8 x i64>, <8 x i64>* %b + %res = xor <8 x i64> %op1, %op2 + store <8 x i64> %res, <8 x i64>* %a + ret void +} + +define void @xor_v16i64(<16 x i64>* %a, <16 x i64>* %b) #0 { +; CHECK-LABEL: @xor_v16i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),16)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i64>, <16 x i64>* %a + %op2 = load <16 x i64>, <16 x i64>* %b + %res = xor <16 x i64> %op1, %op2 + store <16 x i64> %res, <16 x i64>* %a + ret void +} + +define void @xor_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 { +; CHECK-LABEL: @xor_v32i64 +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),32)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK-DAG: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1] +; CHECK: eor [[RES:z[0-9]+]].d, [[OP1]].d, [[OP2]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i64>, <32 x i64>* %a + %op2 = load <32 x i64>, <32 x i64>* %b + %res = xor <32 x i64> %op1, %op2 + store <32 x i64> %res, <32 x i64>* %a + ret void +} + +attributes #0 = { "target-features"="+sve" } From 3ab01550b632dad46f9595d74855749557ffd25c Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 5 Aug 2020 12:14:07 +0200 Subject: [PATCH 442/600] Revert "[CMake] Simplify CMake handling for zlib" This quietly disabled use of zlib on Windows even when building with -DLLVM_ENABLE_ZLIB=FORCE_ON. > Rather than handling zlib handling manually, use find_package from CMake > to find zlib properly. Use this to normalize the LLVM_ENABLE_ZLIB, > HAVE_ZLIB, HAVE_ZLIB_H. Furthermore, require zlib if LLVM_ENABLE_ZLIB is > set to YES, which requires the distributor to explicitly select whether > zlib is enabled or not. This simplifies the CMake handling and usage in > the rest of the tooling. > > This is a reland of abb0075 with all followup changes and fixes that > should address issues that were reported in PR44780. > > Differential Revision: https://reviews.llvm.org/D79219 This reverts commit 10b1b4a231a485f1711d576e6131f6755e008abe and follow-ups 64d99cc6abed78c00a2a7863b02ce54911a5264f and f9fec0447e12da9e8cf4b628f6d45f4941e7d182. --- clang/test/CMakeLists.txt | 11 ++- clang/test/lit.site.cfg.py.in | 2 +- compiler-rt/test/lit.common.configured.in | 2 +- lld/test/CMakeLists.txt | 11 ++- lld/test/lit.site.cfg.py.in | 2 +- lldb/cmake/modules/LLDBStandalone.cmake | 4 + .../gdb-remote/GDBRemoteCommunication.cpp | 4 +- .../GDBRemoteCommunicationClient.cpp | 2 +- llvm/cmake/config-ix.cmake | 88 +++++++++++-------- llvm/cmake/modules/LLVMConfig.cmake.in | 3 - llvm/include/llvm/Config/config.h.cmake | 6 ++ llvm/lib/Support/CMakeLists.txt | 35 ++------ llvm/lib/Support/CRC.cpp | 2 +- llvm/lib/Support/Compression.cpp | 4 +- llvm/test/CMakeLists.txt | 2 +- llvm/test/lit.site.cfg.py.in | 2 +- llvm/unittests/Support/CompressionTest.cpp | 2 +- mlir/examples/standalone/CMakeLists.txt | 1 - 18 files changed, 99 insertions(+), 84 deletions(-) diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 334a90498d0da..38bbc5be90d52 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -9,6 +9,15 @@ endif () string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) +if(CLANG_BUILT_STANDALONE) + # Set HAVE_LIBZ according to recorded LLVM_ENABLE_ZLIB value. This + # value is forced to 0 if zlib was not found, so it is fine to use it + # instead of HAVE_LIBZ (not recorded). + if(LLVM_ENABLE_ZLIB) + set(HAVE_LIBZ 1) + endif() +endif() + llvm_canonicalize_cmake_booleans( CLANG_BUILD_EXAMPLES CLANG_ENABLE_ARCMT @@ -16,7 +25,7 @@ llvm_canonicalize_cmake_booleans( CLANG_SPAWN_CC1 ENABLE_BACKTRACES ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER - LLVM_ENABLE_ZLIB + HAVE_LIBZ LLVM_ENABLE_PER_TARGET_RUNTIME_DIR LLVM_ENABLE_PLUGINS LLVM_ENABLE_THREADS) diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in index 286ea06d798c1..d9b5b2f2592e5 100644 --- a/clang/test/lit.site.cfg.py.in +++ b/clang/test/lit.site.cfg.py.in @@ -16,7 +16,7 @@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" config.host_cxx = "@CMAKE_CXX_COMPILER@" config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" -config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.have_zlib = @HAVE_LIBZ@ config.clang_arcmt = @CLANG_ENABLE_ARCMT@ config.clang_default_cxx_stdlib = "@CLANG_DEFAULT_CXX_STDLIB@" config.clang_staticanalyzer = @CLANG_ENABLE_STATIC_ANALYZER@ diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in index 000bf9b984709..1f746c067b84c 100644 --- a/compiler-rt/test/lit.common.configured.in +++ b/compiler-rt/test/lit.common.configured.in @@ -57,7 +57,7 @@ elif config.android: else: set_default("target_suffix", "-%s" % config.target_arch) -set_default("have_zlib", "@LLVM_ENABLE_ZLIB@") +set_default("have_zlib", "@HAVE_LIBZ@") set_default("libcxx_used", "@LLVM_LIBCXX_USED@") # LLVM tools dir can be passed in lit parameters, so try to diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt index 52e6118ba876b..74b29f5d65b89 100644 --- a/lld/test/CMakeLists.txt +++ b/lld/test/CMakeLists.txt @@ -4,8 +4,17 @@ set(LLVM_BUILD_MODE "%(build_mode)s") set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s") set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/%(build_config)s") +if(LLD_BUILT_STANDALONE) + # Set HAVE_LIBZ according to recorded LLVM_ENABLE_ZLIB value. This + # value is forced to 0 if zlib was not found, so it is fine to use it + # instead of HAVE_LIBZ (not recorded). + if(LLVM_ENABLE_ZLIB) + set(HAVE_LIBZ 1) + endif() +endif() + llvm_canonicalize_cmake_booleans( - LLVM_ENABLE_ZLIB + HAVE_LIBZ LLVM_LIBXML2_ENABLED ) diff --git a/lld/test/lit.site.cfg.py.in b/lld/test/lit.site.cfg.py.in index 3d4c51f4ab647..4aa2fcda73bb4 100644 --- a/lld/test/lit.site.cfg.py.in +++ b/lld/test/lit.site.cfg.py.in @@ -14,7 +14,7 @@ config.lld_libs_dir = "@LLVM_LIBRARY_OUTPUT_INTDIR@" config.lld_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@" config.target_triple = "@TARGET_TRIPLE@" config.python_executable = "@Python3_EXECUTABLE@" -config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.have_zlib = @HAVE_LIBZ@ config.sizeof_void_p = @CMAKE_SIZEOF_VOID_P@ # Support substitution of the tools and libs dirs with user parameters. This is diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake index 94781c3583744..edd2b34ec8655 100644 --- a/lldb/cmake/modules/LLDBStandalone.cmake +++ b/lldb/cmake/modules/LLDBStandalone.cmake @@ -74,6 +74,10 @@ endif() # CMake modules to be in that directory as well. list(APPEND CMAKE_MODULE_PATH "${LLVM_DIR}") +if(LLVM_ENABLE_ZLIB) + find_package(ZLIB) +endif() + include(AddLLVM) include(TableGen) include(HandleLLVMOptions) diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp index 832760f7f0dcc..71522d65bc4bf 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp @@ -50,7 +50,7 @@ #include #endif -#if LLVM_ENABLE_ZLIB +#if defined(HAVE_LIBZ) #include #endif @@ -582,7 +582,7 @@ bool GDBRemoteCommunication::DecompressPacket() { } #endif -#if LLVM_ENABLE_ZLIB +#if defined(HAVE_LIBZ) if (decompressed_bytes == 0 && decompressed_bufsize != ULONG_MAX && decompressed_buffer != nullptr && m_compression_type == CompressionType::ZlibDeflate) { diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index d77f7a0b5a379..c75d5e106cd02 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -1053,7 +1053,7 @@ void GDBRemoteCommunicationClient::MaybeEnableCompression( } #endif -#if LLVM_ENABLE_ZLIB +#if defined(HAVE_LIBZ) if (avail_type == CompressionType::None) { for (auto compression : supported_compressions) { if (compression == "zlib-deflate") { diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 9d0397fc79422..90e5d327c7577 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -56,6 +56,7 @@ check_include_file(sys/types.h HAVE_SYS_TYPES_H) check_include_file(termios.h HAVE_TERMIOS_H) check_include_file(unistd.h HAVE_UNISTD_H) check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H) +check_include_file(zlib.h HAVE_ZLIB_H) check_include_file(fenv.h HAVE_FENV_H) check_symbol_exists(FE_ALL_EXCEPT "fenv.h" HAVE_DECL_FE_ALL_EXCEPT) check_symbol_exists(FE_INEXACT "fenv.h" HAVE_DECL_FE_INEXACT) @@ -116,60 +117,68 @@ endif() # Don't look for these libraries if we're using MSan, since uninstrumented third # party code may call MSan interceptors like strlen, leading to false positives. -# Don't look for these libraries on Windows. -if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*" AND NOT PURE_WINDOWS) - # Skip libedit if using ASan as it contains memory leaks. - if (LLVM_ENABLE_LIBEDIT AND HAVE_HISTEDIT_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*") - check_library_exists(edit el_init "" HAVE_LIBEDIT) - else() - set(HAVE_LIBEDIT 0) - endif() - if(LLVM_ENABLE_TERMINFO) - set(HAVE_TERMINFO 0) - foreach(library terminfo tinfo curses ncurses ncursesw) +if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") + set(HAVE_LIBZ 0) + if(LLVM_ENABLE_ZLIB) + foreach(library z zlib_static zlib) string(TOUPPER ${library} library_suffix) - check_library_exists(${library} setupterm "" HAVE_TERMINFO_${library_suffix}) - if(HAVE_TERMINFO_${library_suffix}) - set(HAVE_TERMINFO 1) - set(TERMINFO_LIBS "${library}") + check_library_exists(${library} compress2 "" HAVE_LIBZ_${library_suffix}) + if(HAVE_LIBZ_${library_suffix}) + set(HAVE_LIBZ 1) + set(ZLIB_LIBRARIES "${library}") break() endif() endforeach() - else() - set(HAVE_TERMINFO 0) endif() - if(LLVM_ENABLE_ZLIB) - if(LLVM_ENABLE_ZLIB STREQUAL FORCE_ON) - find_package(ZLIB REQUIRED) + # Don't look for these libraries on Windows. + if (NOT PURE_WINDOWS) + # Skip libedit if using ASan as it contains memory leaks. + if (LLVM_ENABLE_LIBEDIT AND HAVE_HISTEDIT_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*") + check_library_exists(edit el_init "" HAVE_LIBEDIT) else() - find_package(ZLIB) + set(HAVE_LIBEDIT 0) + endif() + if(LLVM_ENABLE_TERMINFO) + set(HAVE_TERMINFO 0) + foreach(library terminfo tinfo curses ncurses ncursesw) + string(TOUPPER ${library} library_suffix) + check_library_exists(${library} setupterm "" HAVE_TERMINFO_${library_suffix}) + if(HAVE_TERMINFO_${library_suffix}) + set(HAVE_TERMINFO 1) + set(TERMINFO_LIBS "${library}") + break() + endif() + endforeach() + else() + set(HAVE_TERMINFO 0) endif() - set(LLVM_ENABLE_ZLIB "${ZLIB_FOUND}") - endif() - find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c) - set(LLVM_LIBXML2_ENABLED 0) - set(LIBXML2_FOUND 0) - if((LLVM_ENABLE_LIBXML2) AND ((CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH) OR APPLE)) - find_package(LibXml2) - if (LIBXML2_FOUND) - set(LLVM_LIBXML2_ENABLED 1) - if ((CMAKE_OSX_SYSROOT) AND (EXISTS ${CMAKE_OSX_SYSROOT}/${LIBXML2_INCLUDE_DIR})) - include_directories(${CMAKE_OSX_SYSROOT}/${LIBXML2_INCLUDE_DIR}) - else() - include_directories(${LIBXML2_INCLUDE_DIR}) + find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c) + set(LLVM_LIBXML2_ENABLED 0) + set(LIBXML2_FOUND 0) + if((LLVM_ENABLE_LIBXML2) AND ((CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH) OR APPLE)) + find_package(LibXml2) + if (LIBXML2_FOUND) + set(LLVM_LIBXML2_ENABLED 1) + if ((CMAKE_OSX_SYSROOT) AND (EXISTS ${CMAKE_OSX_SYSROOT}/${LIBXML2_INCLUDE_DIR})) + include_directories(${CMAKE_OSX_SYSROOT}/${LIBXML2_INCLUDE_DIR}) + else() + include_directories(${LIBXML2_INCLUDE_DIR}) + endif() endif() endif() endif() -else() - set(LLVM_ENABLE_ZLIB FALSE) endif() if (LLVM_ENABLE_LIBXML2 STREQUAL "FORCE_ON" AND NOT LLVM_LIBXML2_ENABLED) message(FATAL_ERROR "Failed to congifure libxml2") endif() +if (LLVM_ENABLE_ZLIB STREQUAL "FORCE_ON" AND NOT HAVE_LIBZ) + message(FATAL_ERROR "Failed to configure zlib") +endif() + check_library_exists(xar xar_open "" HAVE_LIBXAR) if(HAVE_LIBXAR) set(XAR_LIB xar) @@ -508,6 +517,13 @@ else( LLVM_ENABLE_THREADS ) message(STATUS "Threads disabled.") endif() +if (LLVM_ENABLE_ZLIB ) + # Check if zlib is available in the system. + if ( NOT HAVE_ZLIB_H OR NOT HAVE_LIBZ ) + set(LLVM_ENABLE_ZLIB 0) + endif() +endif() + if (LLVM_ENABLE_DOXYGEN) message(STATUS "Doxygen enabled.") find_package(Doxygen REQUIRED) diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index a5c370bbc25e4..4d8e33711d27b 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -51,9 +51,6 @@ set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@) set(LLVM_ENABLE_UNWIND_TABLES @LLVM_ENABLE_UNWIND_TABLES@) set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@) -if(LLVM_ENABLE_ZLIB) - find_package(ZLIB) -endif() set(LLVM_LIBXML2_ENABLED @LLVM_LIBXML2_ENABLED@) diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index b8c7e070eb341..70543bec24583 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -115,6 +115,9 @@ /* Define to 1 if you have the `pthread_setname_np' function. */ #cmakedefine HAVE_PTHREAD_SETNAME_NP ${HAVE_PTHREAD_SETNAME_NP} +/* Define to 1 if you have the `z' library (-lz). */ +#cmakedefine HAVE_LIBZ ${HAVE_LIBZ} + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_LINK_H ${HAVE_LINK_H} @@ -223,6 +226,9 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_VALGRIND_VALGRIND_H ${HAVE_VALGRIND_VALGRIND_H} +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_ZLIB_H ${HAVE_ZLIB_H} + /* Have host's _alloca */ #cmakedefine HAVE__ALLOCA ${HAVE__ALLOCA} diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 7b45dc628160e..17bef02307897 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -1,7 +1,7 @@ -if(LLVM_ENABLE_ZLIB) - set(imported_libs ZLIB::ZLIB) +set(system_libs) +if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ ) + set(system_libs ${system_libs} ${ZLIB_LIBRARIES}) endif() - if( MSVC OR MINGW ) # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc. # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc. @@ -194,35 +194,10 @@ add_llvm_component_library(LLVMSupport ${LLVM_MAIN_INCLUDE_DIR}/llvm/ADT ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support ${Backtrace_INCLUDE_DIRS} - LINK_LIBS ${system_libs} ${imported_libs} ${delayload_flags} ${Z3_LINK_FILES} + LINK_LIBS ${system_libs} ${delayload_flags} ${Z3_LINK_FILES} ) -set(llvm_system_libs ${system_libs}) - -if(LLVM_ENABLE_ZLIB) - # CMAKE_BUILD_TYPE is only meaningful to single-configuration generators. - if(CMAKE_BUILD_TYPE) - string(TOUPPER ${CMAKE_BUILD_TYPE} build_type) - get_property(zlib_library TARGET ZLIB::ZLIB PROPERTY LOCATION_${build_type}) - endif() - if(NOT zlib_library) - get_property(zlib_library TARGET ZLIB::ZLIB PROPERTY LOCATION) - endif() - get_filename_component(zlib_library ${zlib_library} NAME) - if(CMAKE_STATIC_LIBRARY_PREFIX AND CMAKE_STATIC_LIBRARY_SUFFIX AND - zlib_library MATCHES "^${CMAKE_STATIC_LIBRARY_PREFIX}.*${CMAKE_STATIC_LIBRARY_SUFFIX}$") - STRING(REGEX REPLACE "^${CMAKE_STATIC_LIBRARY_PREFIX}" "" zlib_library ${zlib_library}) - STRING(REGEX REPLACE "${CMAKE_STATIC_LIBRARY_SUFFIX}$" "" zlib_library ${zlib_library}) - endif() - if(CMAKE_SHARED_LIBRARY_PREFIX AND CMAKE_SHARED_LIBRARY_SUFFIX AND - zlib_library MATCHES "^${CMAKE_SHARED_LIBRARY_PREFIX}.*${CMAKE_SHARED_LIBRARY_SUFFIX}$") - STRING(REGEX REPLACE "^${CMAKE_SHARED_LIBRARY_PREFIX}" "" zlib_library ${zlib_library}) - STRING(REGEX REPLACE "${CMAKE_SHARED_LIBRARY_SUFFIX}$" "" zlib_library ${zlib_library}) - endif() - set(llvm_system_libs ${llvm_system_libs} "${zlib_library}") -endif() - -set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${llvm_system_libs}") +set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${system_libs}") if(LLVM_WITH_Z3) target_include_directories(LLVMSupport SYSTEM diff --git a/llvm/lib/Support/CRC.cpp b/llvm/lib/Support/CRC.cpp index 2bc668beed322..7ff09debe3b7c 100644 --- a/llvm/lib/Support/CRC.cpp +++ b/llvm/lib/Support/CRC.cpp @@ -25,7 +25,7 @@ using namespace llvm; -#if !LLVM_ENABLE_ZLIB +#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H static const uint32_t CRCTable[256] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index b8c77cf69b95f..27d92f0e0aec2 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -17,13 +17,13 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#if LLVM_ENABLE_ZLIB +#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H #include #endif using namespace llvm; -#if LLVM_ENABLE_ZLIB +#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ static Error createError(StringRef Err) { return make_error(Err, inconvertibleErrorCode()); } diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt index cde80035a09bf..a01e278079f9e 100644 --- a/llvm/test/CMakeLists.txt +++ b/llvm/test/CMakeLists.txt @@ -1,12 +1,12 @@ llvm_canonicalize_cmake_booleans( BUILD_SHARED_LIBS HAVE_LIBXAR + HAVE_LIBZ HAVE_OCAMLOPT HAVE_OCAML_OUNIT LLVM_ENABLE_DIA_SDK LLVM_ENABLE_FFI LLVM_ENABLE_THREADS - LLVM_ENABLE_ZLIB LLVM_INCLUDE_GO_TESTS LLVM_LIBXML2_ENABLED LLVM_LINK_LLVM_DYLIB diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index 52f709f817ddd..c04ef04fea601 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -33,7 +33,7 @@ config.host_cxx = "@HOST_CXX@" config.host_ldflags = '@HOST_LDFLAGS@' config.llvm_use_intel_jitevents = @LLVM_USE_INTEL_JITEVENTS@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" -config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.have_zlib = @HAVE_LIBZ@ config.have_libxar = @HAVE_LIBXAR@ config.have_dia_sdk = @LLVM_ENABLE_DIA_SDK@ config.enable_ffi = @LLVM_ENABLE_FFI@ diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp index 51723898e950d..cc7be431b62bc 100644 --- a/llvm/unittests/Support/CompressionTest.cpp +++ b/llvm/unittests/Support/CompressionTest.cpp @@ -21,7 +21,7 @@ using namespace llvm; namespace { -#if LLVM_ENABLE_ZLIB +#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ void TestZlibCompression(StringRef Input, int Level) { SmallString<32> Compressed; diff --git a/mlir/examples/standalone/CMakeLists.txt b/mlir/examples/standalone/CMakeLists.txt index 3f46dda4e4f64..45dc80804aa9a 100644 --- a/mlir/examples/standalone/CMakeLists.txt +++ b/mlir/examples/standalone/CMakeLists.txt @@ -28,7 +28,6 @@ set(MLIR_BINARY_DIR ${CMAKE_BINARY_DIR}) list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}") list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") - include(TableGen) include(AddLLVM) include(AddMLIR) From 300899b9c4ed505f43840a9876e96dbb5ba4ce52 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 5 Aug 2020 11:33:02 +0100 Subject: [PATCH 443/600] [X86][AVX] Add test showing unnecessary duplicate HADD instructions Taken from internal fuzz test --- llvm/test/CodeGen/X86/haddsub-undef.ll | 45 ++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll index de2408c1f6bda..e13ea21be2991 100644 --- a/llvm/test/CodeGen/X86/haddsub-undef.ll +++ b/llvm/test/CodeGen/X86/haddsub-undef.ll @@ -627,6 +627,51 @@ define <4 x float> @add_ps_018(<4 x float> %x) { ret <4 x float> %shuffle2 } +define <4 x double> @add_pd_011(<4 x double> %0, <4 x double> %1) { +; SSE-SLOW-LABEL: add_pd_011: +; SSE-SLOW: # %bb.0: +; SSE-SLOW-NEXT: movapd %xmm2, %xmm1 +; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] +; SSE-SLOW-NEXT: movapd %xmm0, %xmm3 +; SSE-SLOW-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] +; SSE-SLOW-NEXT: addpd %xmm0, %xmm3 +; SSE-SLOW-NEXT: addpd %xmm2, %xmm1 +; SSE-SLOW-NEXT: movapd %xmm3, %xmm0 +; SSE-SLOW-NEXT: retq +; +; SSE-FAST-LABEL: add_pd_011: +; SSE-FAST: # %bb.0: +; SSE-FAST-NEXT: movapd %xmm2, %xmm1 +; SSE-FAST-NEXT: haddpd %xmm0, %xmm0 +; SSE-FAST-NEXT: haddpd %xmm2, %xmm1 +; SSE-FAST-NEXT: retq +; +; AVX1-SLOW-LABEL: add_pd_011: +; AVX1-SLOW: # %bb.0: +; AVX1-SLOW-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 +; AVX1-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX1-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-SLOW-NEXT: retq +; +; AVX1-FAST-LABEL: add_pd_011: +; AVX1-FAST: # %bb.0: +; AVX1-FAST-NEXT: vhaddpd %xmm0, %xmm1, %xmm2 +; AVX1-FAST-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-FAST-NEXT: retq +; +; AVX512-LABEL: add_pd_011: +; AVX512: # %bb.0: +; AVX512-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] +; AVX512-NEXT: retq + %3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> + %4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> + %5 = fadd <4 x double> %3, %4 + %6 = shufflevector <4 x double> %5, <4 x double> undef, <4 x i32> + ret <4 x double> %6 +} + define <4 x float> @v8f32_inputs_v4f32_output_0101(<8 x float> %a, <8 x float> %b) { ; SSE-LABEL: v8f32_inputs_v4f32_output_0101: ; SSE: # %bb.0: From e3d3657b9be12dec947dbbc672a004f802ff2338 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 5 Aug 2020 11:35:11 +0100 Subject: [PATCH 444/600] CallLowering.h - remove unnecessary CCState forward declaration. NFCI. Already defined in CallingConvLower.h --- llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 6eb5c853bb362..3d446c2c1d4b7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -26,7 +26,6 @@ namespace llvm { -class CCState; class CallBase; class DataLayout; class Function; From 315e1daf7f452979e298bae368bb71b92314751f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 5 Aug 2020 12:00:12 +0100 Subject: [PATCH 445/600] GISelWorkList.h - remove unnecessary includes. NFCI. --- llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h index b0bb519283b10..9e7ade3ee3293 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h @@ -11,9 +11,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Support/Debug.h" namespace llvm { From a44161692ae879068d4086a7e568a348800ba01d Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 5 Aug 2020 07:14:28 -0400 Subject: [PATCH 446/600] Support member expressions in bugprone-bool-pointer-implicit-conversion. This addresses PR45189. --- .../BoolPointerImplicitConversionCheck.cpp | 67 ++++++++++++------- ...prone-bool-pointer-implicit-conversion.cpp | 26 ++++++- 2 files changed, 65 insertions(+), 28 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.cpp index b764bdbf7c4c5..17dab1b0f73e3 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.cpp @@ -20,53 +20,68 @@ void BoolPointerImplicitConversionCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher( traverse( ast_type_traits::TK_AsIs, - ifStmt(hasCondition(findAll(implicitCastExpr( - unless(hasParent(unaryOperator(hasOperatorName("!")))), - hasSourceExpression(expr( - hasType(pointerType(pointee(booleanType()))), - ignoringParenImpCasts(declRefExpr().bind("expr")))), - hasCastKind(CK_PointerToBoolean)))), - unless(isInTemplateInstantiation())) + ifStmt( + hasCondition(findAll(implicitCastExpr( + unless(hasParent(unaryOperator(hasOperatorName("!")))), + hasSourceExpression(expr( + hasType(pointerType(pointee(booleanType()))), + ignoringParenImpCasts(anyOf(declRefExpr().bind("expr"), + memberExpr().bind("expr"))))), + hasCastKind(CK_PointerToBoolean)))), + unless(isInTemplateInstantiation())) .bind("if")), this); } -void BoolPointerImplicitConversionCheck::check( - const MatchFinder::MatchResult &Result) { - auto *If = Result.Nodes.getNodeAs("if"); - auto *Var = Result.Nodes.getNodeAs("expr"); - +static void checkImpl(const MatchFinder::MatchResult &Result, const Expr *Ref, + const IfStmt *If, + const ast_matchers::internal::Matcher &RefMatcher, + ClangTidyCheck &Check) { // Ignore macros. - if (Var->getBeginLoc().isMacroID()) + if (Ref->getBeginLoc().isMacroID()) return; - // Only allow variable accesses for now, no function calls or member exprs. + // Only allow variable accesses and member exprs for now, no function calls. // Check that we don't dereference the variable anywhere within the if. This // avoids false positives for checks of the pointer for nullptr before it is // dereferenced. If there is a dereferencing operator on this variable don't // emit a diagnostic. Also ignore array subscripts. - const Decl *D = Var->getDecl(); - auto DeclRef = ignoringParenImpCasts(declRefExpr(to(equalsNode(D)))); - if (!match(findAll( - unaryOperator(hasOperatorName("*"), hasUnaryOperand(DeclRef))), + if (!match(findAll(unaryOperator(hasOperatorName("*"), + hasUnaryOperand(RefMatcher))), *If, *Result.Context) .empty() || - !match(findAll(arraySubscriptExpr(hasBase(DeclRef))), *If, + !match(findAll(arraySubscriptExpr(hasBase(RefMatcher))), *If, *Result.Context) .empty() || // FIXME: We should still warn if the paremater is implicitly converted to // bool. - !match(findAll(callExpr(hasAnyArgument(ignoringParenImpCasts(DeclRef)))), - *If, *Result.Context) + !match( + findAll(callExpr(hasAnyArgument(ignoringParenImpCasts(RefMatcher)))), + *If, *Result.Context) .empty() || - !match(findAll(cxxDeleteExpr(has(ignoringParenImpCasts(expr(DeclRef))))), - *If, *Result.Context) + !match( + findAll(cxxDeleteExpr(has(ignoringParenImpCasts(expr(RefMatcher))))), + *If, *Result.Context) .empty()) return; - diag(Var->getBeginLoc(), "dubious check of 'bool *' against 'nullptr', did " - "you mean to dereference it?") - << FixItHint::CreateInsertion(Var->getBeginLoc(), "*"); + Check.diag(Ref->getBeginLoc(), + "dubious check of 'bool *' against 'nullptr', did " + "you mean to dereference it?") + << FixItHint::CreateInsertion(Ref->getBeginLoc(), "*"); +} + +void BoolPointerImplicitConversionCheck::check( + const MatchFinder::MatchResult &Result) { + const auto *If = Result.Nodes.getNodeAs("if"); + if (const auto *E = Result.Nodes.getNodeAs("expr")) { + const Decl *D = isa(E) ? cast(E)->getDecl() + : cast(E)->getMemberDecl(); + const auto M = + ignoringParenImpCasts(anyOf(declRefExpr(to(equalsNode(D))), + memberExpr(hasDeclaration(equalsNode(D))))); + checkImpl(Result, E, If, M, *this); + } } } // namespace bugprone diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-bool-pointer-implicit-conversion.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-bool-pointer-implicit-conversion.cpp index 37c6939b590f0..926fd68321a7e 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-bool-pointer-implicit-conversion.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-bool-pointer-implicit-conversion.cpp @@ -74,9 +74,31 @@ void foo() { bool *b; } d = { SomeFunction() }; - if (d.b) + if (d.b) { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: dubious check of 'bool *' against 'nullptr' + // CHECK-FIXES: if (*d.b) { + } + + if (d.b) { (void)*d.b; // no-warning + } -#define CHECK(b) if (b) {} +#define CHECK(b) \ + if (b) { \ + } CHECK(c) } + +struct H { + bool *b; + void foo() const { + if (b) { + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: dubious check of 'bool *' against 'nullptr' + // CHECK-FIXES: if (*b) { + } + + if (b) { + (void)*b; // no-warning + } + } +}; From bd7f3f8a3ed70586f2b6a68b267b83d18e6fbdb4 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Wed, 5 Aug 2020 19:18:33 +0800 Subject: [PATCH 447/600] [obj2yaml] Add support for dumping the .debug_aranges section. This patch adds support for dumping DWARF sections to obj2yaml. The .debug_aranges section is used to illustrate the basic idea. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D85094 --- .../obj2yaml/ELF/DWARF/debug-aranges.yaml | 172 ++++++++++++++++++ llvm/tools/obj2yaml/elf2yaml.cpp | 84 +++++++-- llvm/tools/obj2yaml/obj2yaml.h | 3 +- 3 files changed, 245 insertions(+), 14 deletions(-) create mode 100644 llvm/test/tools/obj2yaml/ELF/DWARF/debug-aranges.yaml diff --git a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-aranges.yaml b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-aranges.yaml new file mode 100644 index 0000000000000..39ee4baa97fee --- /dev/null +++ b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-aranges.yaml @@ -0,0 +1,172 @@ +## Test how we dump the .debug_aranges section. + +## a) Test dumping the DWARF32/64 address range table from 32/64-bit, little/big endian object files. +## The .debug_aranges should be written to the 'DWARF' entry and the 'Sections' entry should remain empty. + +# RUN: yaml2obj --docnum=1 -DBITS=32 -DENDIAN=LSB %s | obj2yaml | \ +# RUN: FileCheck -DLENGTH1=24 -DLENGTH2=24 -DADDRSIZE=0x04 %s --check-prefix=BASIC --implicit-check-not=Sections + +# RUN: yaml2obj --docnum=1 -DBITS=32 -DENDIAN=MSB %s | obj2yaml | \ +# RUN: FileCheck -DLENGTH1=24 -DLENGTH2=24 -DADDRSIZE=0x04 %s --check-prefix=BASIC --implicit-check-not=Sections + +# RUN: yaml2obj --docnum=1 -DBITS=64 -DENDIAN=LSB %s | obj2yaml | \ +# RUN: FileCheck -DLENGTH1=3C -DLENGTH2=44 -DADDRSIZE=0x08 %s --check-prefix=BASIC --implicit-check-not=Sections + +# RUN: yaml2obj --docnum=1 -DBITS=64 -DENDIAN=MSB %s | obj2yaml | \ +# RUN: FileCheck -DLENGTH1=3C -DLENGTH2=44 -DADDRSIZE=0x08 %s --check-prefix=BASIC --implicit-check-not=Sections + +# BASIC: DWARF: +# BASIC-NEXT: debug_aranges: +# BASIC-NEXT: - Length: 0x00000000000000[[LENGTH1]] +# BASIC-NEXT: Version: 2 +# BASIC-NEXT: CuOffset: 0x0000000000001234 +# BASIC-NEXT: AddressSize: [[ADDRSIZE]] +# BASIC-NEXT: Descriptors: +# BASIC-NEXT: - Address: 0x0000000000001234 +# BASIC-NEXT: Length: 0x0000000000005678 +# BASIC-NEXT: - Address: 0x0000000000001234 +# BASIC-NEXT: Length: 0x0000000000005678 +# BASIC-NEXT: - Format: DWARF64 +# BASIC-NEXT: Length: 0x00000000000000[[LENGTH2]] +# BASIC-NEXT: Version: 2 +# BASIC-NEXT: CuOffset: 0x1234567890ABCDEF +# BASIC-NEXT: AddressSize: [[ADDRSIZE]] +# BASIC-NEXT: Descriptors: +# BASIC-NEXT: - Address: 0x0000000000001234 +# BASIC-NEXT: Length: 0x0000000000005678 +# BASIC-NEXT: - Address: 0x0000000000001234 +# BASIC-NEXT: Length: 0x0000000000005678 +# BASIC-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS]] + Data: ELFDATA2[[ENDIAN]] + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_aranges: + - Version: 2 + CuOffset: 0x1234 + Descriptors: + - Address: 0x1234 + Length: 0x5678 + - Address: 0x1234 + Length: 0x5678 + - Format: DWARF64 + Version: 2 + CuOffset: 0x1234567890abcdef + Descriptors: + - Address: 0x1234 + Length: 0x5678 + - Address: 0x1234 + Length: 0x5678 + +## b) Test dumping an .debug_aranges section whose section header properties are overridden. + +# RUN: yaml2obj --docnum=2 -DTYPE=SHT_STRTAB %s | obj2yaml | FileCheck %s -DTYPE=STRTAB --check-prefixes=ARANGE,SHDR +# RUN: yaml2obj --docnum=2 -DFLAGS=[SHF_ALLOC] %s | obj2yaml | FileCheck %s -DTYPE=PROGBITS --check-prefixes=ARANGE,SHDR,FLAGS +# RUN: yaml2obj --docnum=2 -DLINK='.sec' %s | obj2yaml | FileCheck %s -DTYPE=PROGBITS --check-prefixes=ARANGE,SHDR,LINK +# RUN: yaml2obj --docnum=2 -DENTSIZE=3 %s | obj2yaml | FileCheck %s -DTYPE=PROGBITS --check-prefixes=ARANGE,SHDR,ENTSIZE +# RUN: yaml2obj --docnum=2 -DINFO=3 %s | obj2yaml | FileCheck %s -DTYPE=PROGBITS --check-prefixes=ARANGE,SHDR,INFO +# RUN: yaml2obj --docnum=2 -DADDRALIGN=3 %s | obj2yaml | FileCheck %s -DTYPE=PROGBITS --check-prefixes=ARANGE,SHDR,ADDRALIGN +# RUN: yaml2obj --docnum=2 -DADDRESS=0x2020 %s | obj2yaml | FileCheck %s -DTYPE=PROGBITS --check-prefixes=ARANGE,SHDR,ADDRESS + +# SHDR: - Name: .debug_aranges +# SHDR-NEXT: Type: SHT_[[TYPE]] +# FLAGS-NEXT: Flags: [ SHF_ALLOC ] +# LINK-NEXT: Link: .sec +# ENTSIZE-NEXT: EntSize: 0x0000000000000003 +# INFO-NEXT: Info: 0x0000000000000003 +# ADDRALIGN-NEXT: AddressAlign: 0x0000000000000003 +# ADDRESS-NEXT: Address: 0x0000000000002020 + +# ARANGE: DWARF: +# ARANGE-NEXT: debug_aranges: +# ARANGE-NEXT: - Length: 0x000000000000001C +# ARANGE-NEXT: Version: 2 +# ARANGE-NEXT: CuOffset: 0x0000000000001234 +# ARANGE-NEXT: AddressSize: 0x08 +# ARANGE-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .debug_aranges + Type: [[TYPE=SHT_PROGBITS]] + Flags: [[FLAGS=]] + Link: [[LINK='']] + EntSize: [[ENTSIZE=]] + Info: [[INFO=]] + AddressAlign: [[ADDRALIGN=0]] + Address: [[ADDRESS=]] + - Name: .sec + Type: SHT_PROGBITS +DWARF: + debug_aranges: + - Version: 2 + CuOffset: 0x1234 + +## c) Test dumping a .debug_aranges section whose address_size doesn't match the +## object file's address size. + +# RUN: yaml2obj --docnum=3 %s | obj2yaml | \ +# RUN: FileCheck %s -DLENGTH=0x000000000000001C -DADDRSIZE=0x04 -DADDRLEN=0x0000000012345678 --check-prefix=ADDRSIZE + +# ADDRSIZE: DWARF: +# ADDRSIZE-NEXT: debug_aranges: +# ADDRSIZE-NEXT: - Length: [[LENGTH]] +# ADDRSIZE-NEXT: Version: 2 +# ADDRSIZE-NEXT: CuOffset: 0x0000000000001234 +# ADDRSIZE-NEXT: AddressSize: [[ADDRSIZE]] +# ADDRSIZE-NEXT: Descriptors: +# ADDRSIZE-NEXT: - Address: [[ADDRLEN]] +# ADDRSIZE-NEXT: Length: [[ADDRLEN]] +# ADDRSIZE-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS[[BITS=64]] + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_aranges: + - Version: 2 + CuOffset: 0x1234 + AddressSize: [[ADDRSIZE=0x04]] + Descriptors: + - Address: [[ADDRLEN=0x12345678]] + Length: [[ADDRLEN=0x12345678]] + +# RUN: yaml2obj --docnum=3 -DBITS=32 -DADDRSIZE=0x08 -DADDRLEN=0x1234567890abcdef %s | \ +# RUN: obj2yaml | \ +# RUN: FileCheck %s -DLENGTH=0x000000000000002C -DADDRSIZE=0x08 -DADDRLEN=0x1234567890ABCDEF --check-prefix=ADDRSIZE + +## d) Test dumping a .debug_aranges section whose length field doesn't match the actual length. +## This makes the DWARF parser fail to parse it and we will dump it as a raw content section. + +# RUN: yaml2obj --docnum=4 %s | obj2yaml | FileCheck %s --check-prefix=RAW-CONTENT + +# RAW-CONTENT: Sections: +# RAW-CONTENT-NEXT: - Name: .debug_aranges +# RAW-CONTENT-NEXT: Type: SHT_PROGBITS +# RAW-CONTENT-NEXT: AddressAlign: 0x0000000000000001 +# RAW-CONTENT-NEXT: Content: '3412000002003412000008000000000000000000000000000000000000000000' +# RAW-CONTENT-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_aranges: + - Length: 0x1234 + Version: 2 + CuOffset: 0x1234 diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index b45098538f87d..d36d6227334da 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -7,10 +7,13 @@ //===----------------------------------------------------------------------===// #include "Error.h" +#include "obj2yaml.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/ELFObjectFile.h" +#include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/ObjectYAML/ELFYAML.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/ErrorHandling.h" @@ -50,11 +53,15 @@ class ELFDumper { Expected getSymbolName(uint32_t SymtabNdx, uint32_t SymbolNdx); const object::ELFFile &Obj; + std::unique_ptr DWARFCtx; ArrayRef ShndxTable; Expected> dumpProgramHeaders(ArrayRef> Sections); + Optional + dumpDWARFSections(std::vector> &Sections); + Error dumpSymbols(const Elf_Shdr *Symtab, std::vector &Symbols); Error dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab, @@ -95,18 +102,20 @@ class ELFDumper { Expected dumpPlaceholderSection(const Elf_Shdr *Shdr); - bool shouldPrintSection(const ELFYAML::Section &S, const Elf_Shdr &SHdr); + bool shouldPrintSection(const ELFYAML::Section &S, const Elf_Shdr &SHdr, + Optional DWARF); public: - ELFDumper(const object::ELFFile &O); + ELFDumper(const object::ELFFile &O, std::unique_ptr DCtx); Expected dump(); }; } template -ELFDumper::ELFDumper(const object::ELFFile &O) - : Obj(O) {} +ELFDumper::ELFDumper(const object::ELFFile &O, + std::unique_ptr DCtx) + : Obj(O), DWARFCtx(std::move(DCtx)) {} template Expected @@ -173,7 +182,8 @@ ELFDumper::getUniquedSymbolName(const Elf_Sym *Sym, StringRef StrTable, template bool ELFDumper::shouldPrintSection(const ELFYAML::Section &S, - const Elf_Shdr &SHdr) { + const Elf_Shdr &SHdr, + Optional DWARF) { // We only print the SHT_NULL section at index 0 when it // has at least one non-null field, because yaml2obj // normally creates the zero section at index 0 implicitly. @@ -183,6 +193,19 @@ bool ELFDumper::shouldPrintSection(const ELFYAML::Section &S, return std::find_if(Begin, End, [](uint8_t V) { return V != 0; }) != End; } + // Normally we use "DWARF:" to describe contents of DWARF sections. Sometimes + // the content of DWARF sections can be successfully parsed into the "DWARF:" + // entry but their section headers may have special flags, entry size, address + // alignment, etc. We will preserve the header for them under such + // circumstances. + if (DWARF && DWARF->getNonEmptySectionNames().count(S.Name.substr(1))) { + if (const ELFYAML::RawContentSection *RawSec = + dyn_cast(&S)) + return RawSec->Type != ELF::SHT_PROGBITS || RawSec->Flags || + !RawSec->Link.empty() || RawSec->Info || + RawSec->AddressAlign != 1 || RawSec->EntSize; + } + // Normally we use "Symbols:" and "DynamicSymbols:" to describe contents of // symbol tables. We also build and emit corresponding string tables // implicitly. But sometimes it is important to preserve positions and virtual @@ -284,9 +307,12 @@ template Expected ELFDumper::dump() { return PhdrsOrErr.takeError(); Y->ProgramHeaders = std::move(*PhdrsOrErr); - llvm::erase_if(Chunks, [this](const std::unique_ptr &C) { + // Dump DWARF sections. + Y->DWARF = dumpDWARFSections(Chunks); + + llvm::erase_if(Chunks, [this, &Y](const std::unique_ptr &C) { const ELFYAML::Section &S = cast(*C.get()); - return !shouldPrintSection(S, Sections[S.OriginalSecNdx]); + return !shouldPrintSection(S, Sections[S.OriginalSecNdx], Y->DWARF); }); Y->Chunks = std::move(Chunks); @@ -363,6 +389,36 @@ ELFDumper::dumpProgramHeaders( return Ret; } +template +Optional ELFDumper::dumpDWARFSections( + std::vector> &Sections) { + DWARFYAML::Data DWARF; + for (std::unique_ptr &C : Sections) { + if (!C->Name.startswith(".debug_")) + continue; + + if (ELFYAML::RawContentSection *RawSec = + dyn_cast(C.get())) { + Error Err = Error::success(); + cantFail(std::move(Err)); + + if (RawSec->Name == ".debug_aranges") + Err = dumpDebugARanges(*DWARFCtx.get(), DWARF); + + // If the DWARF section cannot be successfully parsed, emit raw content + // instead of an entry in the DWARF section of the YAML. + if (Err) + consumeError(std::move(Err)); + else + RawSec->Content.reset(); + } + } + + if (DWARF.getNonEmptySectionNames().empty()) + return None; + return DWARF; +} + template Expected ELFDumper::dumpPlaceholderSection(const Elf_Shdr *Shdr) { @@ -1298,8 +1354,9 @@ ELFDumper::dumpMipsABIFlags(const Elf_Shdr *Shdr) { } template -static Error elf2yaml(raw_ostream &Out, const object::ELFFile &Obj) { - ELFDumper Dumper(Obj); +static Error elf2yaml(raw_ostream &Out, const object::ELFFile &Obj, + std::unique_ptr DWARFCtx) { + ELFDumper Dumper(Obj, std::move(DWARFCtx)); Expected YAMLOrErr = Dumper.dump(); if (!YAMLOrErr) return YAMLOrErr.takeError(); @@ -1312,17 +1369,18 @@ static Error elf2yaml(raw_ostream &Out, const object::ELFFile &Obj) { } Error elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) { + std::unique_ptr DWARFCtx = DWARFContext::create(Obj); if (const auto *ELFObj = dyn_cast(&Obj)) - return elf2yaml(Out, *ELFObj->getELFFile()); + return elf2yaml(Out, *ELFObj->getELFFile(), std::move(DWARFCtx)); if (const auto *ELFObj = dyn_cast(&Obj)) - return elf2yaml(Out, *ELFObj->getELFFile()); + return elf2yaml(Out, *ELFObj->getELFFile(), std::move(DWARFCtx)); if (const auto *ELFObj = dyn_cast(&Obj)) - return elf2yaml(Out, *ELFObj->getELFFile()); + return elf2yaml(Out, *ELFObj->getELFFile(), std::move(DWARFCtx)); if (const auto *ELFObj = dyn_cast(&Obj)) - return elf2yaml(Out, *ELFObj->getELFFile()); + return elf2yaml(Out, *ELFObj->getELFFile(), std::move(DWARFCtx)); llvm_unreachable("unknown ELF file format"); } diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h index b538ff87ac2b8..9dcb2fac3b830 100644 --- a/llvm/tools/obj2yaml/obj2yaml.h +++ b/llvm/tools/obj2yaml/obj2yaml.h @@ -41,5 +41,6 @@ struct Data; } llvm::Error dwarf2yaml(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); - +llvm::Error dumpDebugARanges(llvm::DWARFContext &DCtx, + llvm::DWARFYAML::Data &Y); #endif From bdb9295664aa2ea0ee195505a0ca78ea8e34e657 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Wed, 5 Aug 2020 13:29:35 +0200 Subject: [PATCH 448/600] [mlir] Fix convert-to-llvmir.mlir test broken due to syntax change The syntax of the LLVM dialect types changed between the time the code was written and it was submitted, leading to a test failure. Update the syntax. --- mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir index 9042bf36c1b3b..a247c35a09156 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir @@ -1325,8 +1325,8 @@ func @rank_of_unranked(%unranked: memref<*xi32>) { // CHECK-NEXT: llvm.mlir.undef // CHECK-NEXT: llvm.insertvalue // CHECK-NEXT: llvm.insertvalue -// CHECK-NEXT: llvm.extractvalue %{{.*}}[0] : !llvm<"{ i64, i8* }"> -// CHECK32: llvm.extractvalue %{{.*}}[0] : !llvm<"{ i64, i8* }"> +// CHECK-NEXT: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(i64, ptr)> +// CHECK32: llvm.extractvalue %{{.*}}[0] : !llvm.struct<(i64, ptr)> // CHECK-LABEL: func @rank_of_ranked // CHECK32-LABEL: func @rank_of_ranked From 7b993903e0448a1f804882c97f3071e799cbe99e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 5 Aug 2020 12:42:09 +0100 Subject: [PATCH 449/600] DWARFVerifier.h - remove unnecessary forward declarations and includes. NFCI. --- llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index 22b1d722fc89c..18d889f5cadbb 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -12,25 +12,22 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" -#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" - #include #include #include namespace llvm { class raw_ostream; +struct DWARFAddressRange; struct DWARFAttribute; class DWARFContext; -class DWARFDie; -class DWARFUnit; -class DWARFCompileUnit; class DWARFDataExtractor; class DWARFDebugAbbrev; class DataExtractor; struct DWARFSection; +class DWARFUnit; /// A class that verifies DWARF debug information given a DWARF Context. class DWARFVerifier { From f2675ab45fbb41bb7c1e1b0b86533fc83e877b6b Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 5 Aug 2020 09:37:53 +0100 Subject: [PATCH 450/600] [ARM][CostModel] Implement getCFInstrCost As with other targets, set the throughput cost of control-flow instructions to free so that we don't miss out of vectorization opportunities. Differential Revision: https://reviews.llvm.org/D85283 --- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 12 + llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 3 + .../Analysis/CostModel/ARM/arith-overflow.ll | 24 +- .../test/Analysis/CostModel/ARM/arith-ssat.ll | 8 +- .../test/Analysis/CostModel/ARM/arith-usat.ll | 8 +- llvm/test/Analysis/CostModel/ARM/arith.ll | 40 ++ llvm/test/Analysis/CostModel/ARM/cast.ll | 12 +- llvm/test/Analysis/CostModel/ARM/cast_ldst.ll | 48 +-- llvm/test/Analysis/CostModel/ARM/cmps.ll | 8 + .../Analysis/CostModel/ARM/control-flow.ll | 252 ++++++++++-- llvm/test/Analysis/CostModel/ARM/divrem.ll | 105 +++++ llvm/test/Analysis/CostModel/ARM/fparith.ll | 24 +- llvm/test/Analysis/CostModel/ARM/gep.ll | 379 ++++++++++++++---- .../test/Analysis/CostModel/ARM/load_store.ll | 8 +- llvm/test/Analysis/CostModel/ARM/memcpy.ll | 222 ++++++---- .../Analysis/CostModel/ARM/mul-cast-vect.ll | 79 ++-- .../CostModel/ARM/mve-gather-scatter-cost.ll | 22 +- .../test/Analysis/CostModel/ARM/reduce-add.ll | 8 +- .../Analysis/CostModel/ARM/reduce-smax.ll | 16 +- .../Analysis/CostModel/ARM/reduce-smin.ll | 16 +- .../Analysis/CostModel/ARM/reduce-umax.ll | 16 +- .../Analysis/CostModel/ARM/reduce-umin.ll | 16 +- llvm/test/Analysis/CostModel/ARM/select.ll | 8 + .../Analysis/CostModel/ARM/shl-cast-vect.ll | 79 ++-- llvm/test/Analysis/CostModel/ARM/shuffle.ll | 8 +- .../Analysis/CostModel/ARM/sub-cast-vect.ll | 79 ++-- 26 files changed, 1118 insertions(+), 382 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 68767398191aa..6160bc875a070 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -300,6 +300,18 @@ int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Im return getIntImmCost(Imm, Ty, CostKind); } +int ARMTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { + if (CostKind == TTI::TCK_RecipThroughput && + (ST->hasNEON() || ST->hasMVEIntegerOps())) { + // FIXME: The vectorizer is highly sensistive to the cost of these + // instructions, which suggests that it may be using the costs incorrectly. + // But, for now, just make them free to avoid performance regressions for + // vector targets. + return 0; + } + return BaseT::getCFInstrCost(Opcode, CostKind); +} + int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index ac7d0378d90bd..91f6d97accc6c 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -209,6 +209,9 @@ class ARMTTIImpl : public BasicTTIImplBase { } } + int getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); diff --git a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll index 0a681a7c2a8f4..050f2a790533c 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll @@ -63,7 +63,7 @@ define i32 @sadd(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'sadd' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) @@ -82,7 +82,7 @@ define i32 @sadd(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'sadd' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) @@ -221,7 +221,7 @@ define i32 @uadd(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'uadd' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) @@ -240,7 +240,7 @@ define i32 @uadd(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'uadd' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) @@ -379,7 +379,7 @@ define i32 @ssub(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'ssub' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) @@ -398,7 +398,7 @@ define i32 @ssub(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'ssub' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) @@ -537,7 +537,7 @@ define i32 @usub(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'usub' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) @@ -556,7 +556,7 @@ define i32 @usub(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'usub' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) @@ -695,7 +695,7 @@ define i32 @smul(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'smul' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) @@ -714,7 +714,7 @@ define i32 @smul(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 424 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'smul' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) @@ -853,7 +853,7 @@ define i32 @umul(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'umul' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) @@ -872,7 +872,7 @@ define i32 @umul(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'umul' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll index df5535ded070c..d5afce84b1360 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll @@ -63,7 +63,7 @@ define i32 @add(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'add' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) @@ -82,7 +82,7 @@ define i32 @add(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'add' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) @@ -221,7 +221,7 @@ define i32 @sub(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'sub' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) @@ -240,7 +240,7 @@ define i32 @sub(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'sub' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll index 5377e0dde8c45..1059c2ee551cc 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll @@ -63,7 +63,7 @@ define i32 @add(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'add' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) @@ -82,7 +82,7 @@ define i32 @add(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'add' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) @@ -221,7 +221,7 @@ define i32 @sub(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'sub' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) @@ -240,7 +240,7 @@ define i32 @sub(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'sub' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith.ll b/llvm/test/Analysis/CostModel/ARM/arith.ll index 451c5f50b7136..55b60fb9c2877 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith.ll @@ -17,6 +17,7 @@ define void @i8() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i8' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef @@ -28,6 +29,7 @@ define void @i8() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'i8' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef @@ -39,6 +41,7 @@ define void @i8() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'i8' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i8 undef, undef @@ -50,6 +53,7 @@ define void @i8() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i8 undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i8 undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i8 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c = add i8 undef, undef %d = sub i8 undef, undef @@ -74,6 +78,7 @@ define void @i16() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i16' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef @@ -85,6 +90,7 @@ define void @i16() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'i16' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef @@ -96,6 +102,7 @@ define void @i16() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'i16' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i16 undef, undef @@ -107,6 +114,7 @@ define void @i16() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i16 undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i16 undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i16 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c = add i16 undef, undef %d = sub i16 undef, undef @@ -131,6 +139,7 @@ define void @i32() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i32' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef @@ -142,6 +151,7 @@ define void @i32() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'i32' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef @@ -153,6 +163,7 @@ define void @i32() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'i32' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = add i32 undef, undef @@ -164,6 +175,7 @@ define void @i32() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i = and i32 undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j = or i32 undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k = xor i32 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c = add i32 undef, undef %d = sub i32 undef, undef @@ -188,6 +200,7 @@ define void @i64() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i64' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef @@ -199,6 +212,7 @@ define void @i64() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'i64' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef @@ -210,6 +224,7 @@ define void @i64() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'i64' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c = add i64 undef, undef @@ -221,6 +236,7 @@ define void @i64() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i = and i64 undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j = or i64 undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k = xor i64 undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c = add i64 undef, undef %d = sub i64 undef, undef @@ -273,6 +289,7 @@ define void @vi8() { ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE2-LABEL: 'vi8' ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i8> undef, undef @@ -311,6 +328,7 @@ define void @vi8() { ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i8> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i8> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE4-LABEL: 'vi8' ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef @@ -349,6 +367,7 @@ define void @vi8() { ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i8> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i8> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i8> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi8' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i8> undef, undef @@ -387,6 +406,7 @@ define void @vi8() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i8> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i8> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi8' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i8> undef, undef @@ -425,6 +445,7 @@ define void @vi8() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i8> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i8> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi8' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i8> undef, undef @@ -463,6 +484,7 @@ define void @vi8() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i16 = and <16 x i8> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j16 = or <16 x i8> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k16 = xor <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c2 = add <2 x i8> undef, undef %d2 = sub <2 x i8> undef, undef @@ -541,6 +563,7 @@ define void @vi16() { ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE2-LABEL: 'vi16' ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i16> undef, undef @@ -579,6 +602,7 @@ define void @vi16() { ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i16> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i16> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE4-LABEL: 'vi16' ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef @@ -617,6 +641,7 @@ define void @vi16() { ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i16> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i16> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i16> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi16' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i16> undef, undef @@ -655,6 +680,7 @@ define void @vi16() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i16> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i16> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi16' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i16> undef, undef @@ -693,6 +719,7 @@ define void @vi16() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i16> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i16> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi16' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i16> undef, undef @@ -731,6 +758,7 @@ define void @vi16() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i16 = and <16 x i16> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j16 = or <16 x i16> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k16 = xor <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c2 = add <2 x i16> undef, undef %d2 = sub <2 x i16> undef, undef @@ -809,6 +837,7 @@ define void @vi32() { ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE2-LABEL: 'vi32' ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i32> undef, undef @@ -847,6 +876,7 @@ define void @vi32() { ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i32> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i32> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE4-LABEL: 'vi32' ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef @@ -885,6 +915,7 @@ define void @vi32() { ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi32' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i32> undef, undef @@ -923,6 +954,7 @@ define void @vi32() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi32' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = add <2 x i32> undef, undef @@ -961,6 +993,7 @@ define void @vi32() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i32> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i32> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi32' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i32> undef, undef @@ -999,6 +1032,7 @@ define void @vi32() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i16 = and <16 x i32> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j16 = or <16 x i32> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k16 = xor <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c2 = add <2 x i32> undef, undef %d2 = sub <2 x i32> undef, undef @@ -1077,6 +1111,7 @@ define void @vi64() { ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE2-LABEL: 'vi64' ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c2 = add <2 x i64> undef, undef @@ -1115,6 +1150,7 @@ define void @vi64() { ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE4-LABEL: 'vi64' ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %c2 = add <2 x i64> undef, undef @@ -1153,6 +1189,7 @@ define void @vi64() { ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi64' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i64> undef, undef @@ -1191,6 +1228,7 @@ define void @vi64() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi64' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i64> undef, undef @@ -1229,6 +1267,7 @@ define void @vi64() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi64' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c2 = add <2 x i64> undef, undef @@ -1267,6 +1306,7 @@ define void @vi64() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c2 = add <2 x i64> undef, undef %d2 = sub <2 x i64> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll index 28f7c6cfcf36e..b539dae1585e3 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -396,7 +396,7 @@ define i32 @casts() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'casts' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -783,7 +783,7 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'casts' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -1944,7 +1944,7 @@ define i32 @casts() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'casts' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -3935,7 +3935,7 @@ define i32 @bitcasts() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'bitcasts' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -3946,7 +3946,7 @@ define i32 @bitcasts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'bitcasts' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -3979,7 +3979,7 @@ define i32 @bitcasts() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'bitcasts' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 diff --git a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll index 54029289661c0..839d3e47dd98c 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll @@ -75,7 +75,7 @@ define i32 @load_extends() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'load_extends' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -142,7 +142,7 @@ define i32 @load_extends() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_extends' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -343,7 +343,7 @@ define i32 @load_extends() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'load_extends' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -739,7 +739,7 @@ define i32 @store_trunc() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'store_trunc' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -794,7 +794,7 @@ define i32 @store_trunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'store_trunc' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -959,7 +959,7 @@ define i32 @store_trunc() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'store_trunc' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -1268,7 +1268,7 @@ define i32 @load_fpextends() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'load_fpextends' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -1296,7 +1296,7 @@ define i32 @load_fpextends() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fpextends' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -1380,7 +1380,7 @@ define i32 @load_fpextends() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'load_fpextends' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -1550,7 +1550,7 @@ define i32 @load_fptrunc() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'load_fptrunc' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -1575,7 +1575,7 @@ define i32 @load_fptrunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fptrunc' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %i1632 = fptrunc float undef to half @@ -1650,7 +1650,7 @@ define i32 @load_fptrunc() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'load_fptrunc' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -1832,7 +1832,7 @@ define i32 @maskedload_extends() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'maskedload_extends' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) @@ -1884,7 +1884,7 @@ define i32 @maskedload_extends() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_extends' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) @@ -2040,7 +2040,7 @@ define i32 @maskedload_extends() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'maskedload_extends' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 1, <2 x i1> undef, <2 x i8> undef) @@ -2348,7 +2348,7 @@ define i32 @maskedstore_trunc() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'maskedstore_trunc' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8> @@ -2391,7 +2391,7 @@ define i32 @maskedstore_trunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedstore_trunc' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8> @@ -2520,7 +2520,7 @@ define i32 @maskedstore_trunc() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %v81664, <8 x i16>* undef, i32 2, <8 x i1> undef) ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> %v23264, <2 x i32>* undef, i32 4, <2 x i1> undef) ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v43264, <4 x i32>* undef, i32 4, <4 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'maskedstore_trunc' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2816 = trunc <2 x i16> undef to <2 x i8> @@ -2763,7 +2763,7 @@ define i32 @maskedload_fpextends() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'maskedload_fpextends' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) @@ -2786,7 +2786,7 @@ define i32 @maskedload_fpextends() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fpextends' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) @@ -2855,7 +2855,7 @@ define i32 @maskedload_fpextends() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv4f16ou = load <4 x half>, <4 x half>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v2ou = fpext <4 x half> %loadv4f16ou to <4 x float> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'maskedload_fpextends' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadv2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 2, <2 x i1> undef, <2 x half> undef) @@ -2993,7 +2993,7 @@ define i32 @maskedload_fptrunc() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'maskedload_fptrunc' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half> @@ -3012,7 +3012,7 @@ define i32 @maskedload_fptrunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'maskedload_fptrunc' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half> @@ -3069,7 +3069,7 @@ define i32 @maskedload_fptrunc() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %v81664, <8 x half>* undef, i32 2, <8 x i1> undef) ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %v23264, <2 x float>* undef, i32 4, <2 x i1> undef) ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v43264, <4 x float>* undef, i32 4, <4 x i1> undef) -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'maskedload_fptrunc' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21632 = fptrunc <2 x float> undef to <2 x half> diff --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll index 95a01bc7af4f6..e2e7c0492c315 100644 --- a/llvm/test/Analysis/CostModel/ARM/cmps.ll +++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll @@ -23,6 +23,7 @@ define i32 @cmps() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'cmps' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef @@ -38,6 +39,7 @@ define i32 @cmps() { ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'cmps' ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef @@ -53,6 +55,7 @@ define i32 @cmps() { ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'cmps' ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef @@ -68,6 +71,7 @@ define i32 @cmps() { ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'cmps' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef @@ -83,6 +87,7 @@ define i32 @cmps() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'cmps' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef @@ -98,6 +103,7 @@ define i32 @cmps() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'cmps' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef @@ -113,6 +119,7 @@ define i32 @cmps() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'cmps' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = icmp slt i8 undef, undef @@ -128,6 +135,7 @@ define i32 @cmps() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %a = icmp slt i8 undef, undef %b = icmp ult i16 undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/control-flow.ll b/llvm/test/Analysis/CostModel/ARM/control-flow.ll index f4c55a9b62e0a..d2d649d4df499 100644 --- a/llvm/test/Analysis/CostModel/ARM/control-flow.ll +++ b/llvm/test/Analysis/CostModel/ARM/control-flow.ll @@ -1,12 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-SIZE ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-V8M-SIZE +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-MVE-SIZE +; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mtriple=armv8a -mattr=+neon | FileCheck %s --check-prefix=CHECK-NEON-SIZE ; RUN: opt < %s -cost-model -analyze -cost-kind=latency -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-LATENCY ; RUN: opt < %s -cost-model -analyze -cost-kind=latency -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-V8M-LATENCY ; RUN: opt < %s -cost-model -analyze -cost-kind=latency -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-LATENCY ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.base | FileCheck %s --check-prefix=CHECK-T1-THROUGHPUT ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8m.main | FileCheck %s --check-prefix=CHECK-V8M-THROUGHPUT ; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-V8_1M-THROUGHPUT +; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=thumbv8.1m.main -mattr=+mve | FileCheck %s --check-prefix=CHECK-MVE-THROUGHPUT +; RUN: opt < %s -cost-model -analyze -cost-kind=throughput -mtriple=armv8a -mattr=+neon | FileCheck %s --check-prefix=CHECK-NEON-THROUGHPUT define i32 @simple_loop_cost(i32 %N) { ; CHECK-T1-SIZE-LABEL: 'simple_loop_cost' @@ -31,6 +35,28 @@ define i32 @simple_loop_cost(i32 %N) { ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res ; +; CHECK-MVE-SIZE-LABEL: 'simple_loop_cost' +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-NEON-SIZE-LABEL: 'simple_loop_cost' +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; ; CHECK-T1-LATENCY-LABEL: 'simple_loop_cost' ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader @@ -97,6 +123,28 @@ define i32 @simple_loop_cost(i32 %N) { ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res ; +; CHECK-MVE-THROUGHPUT-LABEL: 'simple_loop_cost' +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +; CHECK-NEON-THROUGHPUT-LABEL: 'simple_loop_cost' +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; entry: %zero = icmp eq i32 %N, 0 br i1 %zero, label %exit, label %preheader @@ -122,10 +170,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) { ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4 ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit @@ -138,26 +186,58 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) { ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv -; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4 ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv -; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res ; +; CHECK-MVE-SIZE-LABEL: 'simple_mul_loop' +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-NEON-SIZE-LABEL: 'simple_mul_loop' +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; ; CHECK-T1-LATENCY-LABEL: 'simple_mul_loop' ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit @@ -170,10 +250,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) { ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv -; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4 ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv -; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit @@ -186,10 +266,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) { ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv -; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i32, i32* %addr.a, align 4 ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv -; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit @@ -202,10 +282,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) { ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4 ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit @@ -218,10 +298,10 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) { ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv -; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4 ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv -; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit @@ -234,16 +314,48 @@ define i32 @simple_mul_loop(i32* %A, i32* %B, i32 %N) { ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv -; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4 ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv -; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res ; +; CHECK-MVE-THROUGHPUT-LABEL: 'simple_mul_loop' +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +; CHECK-NEON-THROUGHPUT-LABEL: 'simple_mul_loop' +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = getelementptr i32, i32* %A, i32 %iv +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i32, i32* %addr.a, align 4 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %load, %load +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = getelementptr i32, i32* %B, i32 %iv +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; entry: %zero = icmp eq i32 %N, 0 br i1 %zero, label %exit, label %preheader @@ -275,10 +387,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2 ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 -; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 ; CHECK-T1-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 @@ -294,10 +406,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] -; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2 ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 -; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 @@ -306,6 +418,44 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res ; +; CHECK-MVE-SIZE-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; +; CHECK-NEON-SIZE-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %loop +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res +; ; CHECK-T1-LATENCY-LABEL: 'simple_mul_ext_lsr_loop' ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br i1 %zero, label %exit, label %preheader @@ -313,10 +463,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 -; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 ; CHECK-T1-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 @@ -332,10 +482,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] -; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2 ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 -; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 ; CHECK-V8M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 @@ -351,10 +501,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] -; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load i16, i16* %addr.a, align 2 ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 -; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 ; CHECK-V8_1M-LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 @@ -370,10 +520,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2 ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 -; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 ; CHECK-T1-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 @@ -389,10 +539,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] -; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2 ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 -; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 ; CHECK-V8M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 @@ -408,10 +558,10 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] -; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2 ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 -; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b +; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 @@ -420,6 +570,44 @@ define i32 @simple_mul_ext_lsr_loop(i16* %A, i32* %B, i32 %N) { ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-V8_1M-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %res ; +; CHECK-MVE-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-MVE-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; +; CHECK-NEON-THROUGHPUT-LABEL: 'simple_mul_ext_lsr_loop' +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = icmp eq i32 %N, 0 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %zero, label %exit, label %preheader +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br label %loop +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %iv = phi i32 [ 0, %preheader ], [ %iv.next, %loop ] +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a = phi i16* [ %A, %preheader ], [ %addr.a, %loop ] +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b = phi i32* [ %B, %preheader ], [ %addr.b, %loop ] +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load i16, i16* %addr.a, align 2 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext = sext i16 %load to i32 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul = mul i32 %sext, 7 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %mul, i32* %addr.b, align 4 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %iv.next = add nuw i32 %iv, 1 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.a.next = getelementptr i16, i16* %addr.a, i32 1 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %addr.b.next = getelementptr i32, i32* %addr.b, i32 1 +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ne i32 %iv.next, %N +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: br i1 %cmp, label %loop, label %exit +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-NEON-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %res +; entry: %zero = icmp eq i32 %N, 0 br i1 %zero, label %exit, label %preheader diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll index d3ebfb8ff0df8..182a86296e71b 100644 --- a/llvm/test/Analysis/CostModel/ARM/divrem.ll +++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll @@ -15,6 +15,7 @@ define void @i8() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i8 undef, 2 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'i8' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef @@ -25,6 +26,7 @@ define void @i8() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i8' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef @@ -35,6 +37,7 @@ define void @i8() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'i8' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef @@ -45,6 +48,7 @@ define void @i8() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'i8' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i8 undef, undef @@ -55,6 +59,7 @@ define void @i8() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i8 undef, 2 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i8 undef, 2 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i8 undef, 2 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = sdiv i8 undef, undef %2 = udiv i8 undef, undef @@ -77,6 +82,7 @@ define void @i16() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i16 undef, 2 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'i16' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef @@ -87,6 +93,7 @@ define void @i16() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i16' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef @@ -97,6 +104,7 @@ define void @i16() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'i16' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef @@ -107,6 +115,7 @@ define void @i16() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'i16' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i16 undef, undef @@ -117,6 +126,7 @@ define void @i16() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i16 undef, 2 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i16 undef, 2 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i16 undef, 2 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = sdiv i16 undef, undef %2 = udiv i16 undef, undef @@ -139,6 +149,7 @@ define void @i32() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i32 undef, 2 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'i32' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef @@ -149,6 +160,7 @@ define void @i32() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i32' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef @@ -159,6 +171,7 @@ define void @i32() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'i32' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef @@ -169,6 +182,7 @@ define void @i32() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'i32' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = sdiv i32 undef, undef @@ -179,6 +193,7 @@ define void @i32() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = udiv i32 undef, 2 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i32 undef, 2 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i32 undef, 2 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = sdiv i32 undef, undef %2 = udiv i32 undef, undef @@ -201,6 +216,7 @@ define void @i64() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = udiv i64 undef, 2 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'i64' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef @@ -211,6 +227,7 @@ define void @i64() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'i64' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef @@ -221,6 +238,7 @@ define void @i64() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'i64' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef @@ -231,6 +249,7 @@ define void @i64() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'i64' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = sdiv i64 undef, undef @@ -241,6 +260,7 @@ define void @i64() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = udiv i64 undef, 2 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = srem i64 undef, 2 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = urem i64 undef, 2 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = sdiv i64 undef, undef %2 = udiv i64 undef, undef @@ -259,30 +279,35 @@ define void @f16() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem half undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv half undef, 0xH4000 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem half undef, 0xH4000 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'f16' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'f16' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'f16' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv half undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem half undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv half undef, 0xH4000 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem half undef, 0xH4000 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'f16' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv half undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem half undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv half undef, 0xH4000 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem half undef, 0xH4000 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv half undef, undef %2 = frem half undef, undef @@ -297,30 +322,35 @@ define void @f32() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem float undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv float undef, 2.000000e+00 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem float undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'f32' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'f32' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'f32' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv float undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem float undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv float undef, 2.000000e+00 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem float undef, 2.000000e+00 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'f32' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv float undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem float undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv float undef, 2.000000e+00 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem float undef, 2.000000e+00 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv float undef, undef %2 = frem float undef, undef @@ -335,30 +365,35 @@ define void @f64() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'f64' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = fdiv double undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = frem double undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = fdiv double undef, 2.000000e+00 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = frem double undef, 2.000000e+00 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'f64' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'f64' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'f64' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv double undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = frem double undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = fdiv double undef, 2.000000e+00 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = frem double undef, 2.000000e+00 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv double undef, undef %2 = frem double undef, undef @@ -385,6 +420,7 @@ define void @vi8() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi8' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i8> undef, undef @@ -403,6 +439,7 @@ define void @vi8() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i8> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i8> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi8' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, undef @@ -421,6 +458,7 @@ define void @vi8() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi8' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, undef @@ -439,6 +477,7 @@ define void @vi8() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi8' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, undef @@ -457,6 +496,7 @@ define void @vi8() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i8> undef, undef %t2 = udiv <2 x i8> undef, undef @@ -495,6 +535,7 @@ define void @vi16() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi16' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i16> undef, undef @@ -513,6 +554,7 @@ define void @vi16() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i16> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i16> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi16' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, undef @@ -531,6 +573,7 @@ define void @vi16() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi16' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, undef @@ -549,6 +592,7 @@ define void @vi16() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi16' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, undef @@ -567,6 +611,7 @@ define void @vi16() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i16> undef, undef %t2 = udiv <2 x i16> undef, undef @@ -605,6 +650,7 @@ define void @vi32() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi32' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i32> undef, undef @@ -623,6 +669,7 @@ define void @vi32() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i32> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i32> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi32' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, undef @@ -641,6 +688,7 @@ define void @vi32() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi32' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, undef @@ -659,6 +707,7 @@ define void @vi32() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi32' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, undef @@ -677,6 +726,7 @@ define void @vi32() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i32> undef, undef %t2 = udiv <2 x i32> undef, undef @@ -715,6 +765,7 @@ define void @vi64() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi64' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t1 = sdiv <2 x i64> undef, undef @@ -733,6 +784,7 @@ define void @vi64() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %s2 = udiv <16 x i64> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i64> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi64' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, undef @@ -751,6 +803,7 @@ define void @vi64() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi64' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, undef @@ -769,6 +822,7 @@ define void @vi64() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi64' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, undef @@ -787,6 +841,7 @@ define void @vi64() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i64> undef, undef %t2 = udiv <2 x i64> undef, undef @@ -815,6 +870,7 @@ define void @vf16() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf16' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x half> undef, undef @@ -823,6 +879,7 @@ define void @vf16() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x half> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x half> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf16' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, undef @@ -831,6 +888,7 @@ define void @vf16() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x half> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x half> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf16' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, undef @@ -839,6 +897,7 @@ define void @vf16() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x half> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x half> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf16' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, undef @@ -847,6 +906,7 @@ define void @vf16() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x half> undef, undef %2 = fdiv <2 x half> undef, undef @@ -865,6 +925,7 @@ define void @vf32() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf32' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x float> undef, undef @@ -873,6 +934,7 @@ define void @vf32() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x float> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x float> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf32' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, undef @@ -881,6 +943,7 @@ define void @vf32() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x float> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x float> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf32' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, undef @@ -889,6 +952,7 @@ define void @vf32() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x float> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x float> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf32' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, undef @@ -897,6 +961,7 @@ define void @vf32() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x float> undef, undef %2 = fdiv <2 x float> undef, undef @@ -915,6 +980,7 @@ define void @vf64() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, undef ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, undef +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf64' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x double> undef, undef @@ -923,6 +989,7 @@ define void @vf64() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x double> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x double> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf64' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, undef @@ -931,6 +998,7 @@ define void @vf64() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x double> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, undef ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x double> undef, undef +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf64' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, undef @@ -939,6 +1007,7 @@ define void @vf64() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x double> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, undef ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x double> undef, undef +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf64' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, undef @@ -947,6 +1016,7 @@ define void @vf64() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, undef ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, undef +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x double> undef, undef %2 = fdiv <2 x double> undef, undef @@ -975,6 +1045,7 @@ define void @vi8_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi8_2' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i8> undef, @@ -993,6 +1064,7 @@ define void @vi8_2() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i8> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i8> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi8_2' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, @@ -1011,6 +1083,7 @@ define void @vi8_2() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi8_2' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, @@ -1029,6 +1102,7 @@ define void @vi8_2() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi8_2' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, @@ -1047,6 +1121,7 @@ define void @vi8_2() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i8> undef, %t2 = udiv <2 x i8> undef, @@ -1085,6 +1160,7 @@ define void @vi16_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi16_2' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i16> undef, @@ -1103,6 +1179,7 @@ define void @vi16_2() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i16> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i16> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi16_2' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, @@ -1121,6 +1198,7 @@ define void @vi16_2() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi16_2' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, @@ -1139,6 +1217,7 @@ define void @vi16_2() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi16_2' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, @@ -1157,6 +1236,7 @@ define void @vi16_2() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i16> undef, %t2 = udiv <2 x i16> undef, @@ -1195,6 +1275,7 @@ define void @vi32_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi32_2' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i32> undef, @@ -1213,6 +1294,7 @@ define void @vi32_2() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i32> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i32> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi32_2' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, @@ -1231,6 +1313,7 @@ define void @vi32_2() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi32_2' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, @@ -1249,6 +1332,7 @@ define void @vi32_2() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi32_2' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, @@ -1267,6 +1351,7 @@ define void @vi32_2() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i32> undef, %t2 = udiv <2 x i32> undef, @@ -1305,6 +1390,7 @@ define void @vi64_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi64_2' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t1 = sdiv <2 x i64> undef, @@ -1323,6 +1409,7 @@ define void @vi64_2() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %s2 = udiv <16 x i64> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i64> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi64_2' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, @@ -1341,6 +1428,7 @@ define void @vi64_2() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vi64_2' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, @@ -1359,6 +1447,7 @@ define void @vi64_2() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vi64_2' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, @@ -1377,6 +1466,7 @@ define void @vi64_2() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %t1 = sdiv <2 x i64> undef, %t2 = udiv <2 x i64> undef, @@ -1405,6 +1495,7 @@ define void @vf16_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf16_2' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x half> undef, @@ -1413,6 +1504,7 @@ define void @vf16_2() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x half> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x half> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf16_2' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, @@ -1421,6 +1513,7 @@ define void @vf16_2() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x half> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x half> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf16_2' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, @@ -1429,6 +1522,7 @@ define void @vf16_2() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x half> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x half> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf16_2' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, @@ -1437,6 +1531,7 @@ define void @vf16_2() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x half> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x half> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x half> undef, %2 = fdiv <2 x half> undef, @@ -1455,6 +1550,7 @@ define void @vf32_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf32_2' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x float> undef, @@ -1463,6 +1559,7 @@ define void @vf32_2() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x float> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x float> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf32_2' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, @@ -1471,6 +1568,7 @@ define void @vf32_2() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x float> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x float> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf32_2' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, @@ -1479,6 +1577,7 @@ define void @vf32_2() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %4 = fdiv <4 x float> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %6 = fdiv <8 x float> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf32_2' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, @@ -1487,6 +1586,7 @@ define void @vf32_2() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %4 = fdiv <4 x float> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %6 = fdiv <8 x float> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x float> undef, %2 = fdiv <2 x float> undef, @@ -1505,6 +1605,7 @@ define void @vf64_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf64_2' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x double> undef, @@ -1513,6 +1614,7 @@ define void @vf64_2() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = fdiv <4 x double> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x double> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = fdiv <8 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf64_2' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, @@ -1521,6 +1623,7 @@ define void @vf64_2() { ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x double> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x double> undef, +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-LABEL: 'vf64_2' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, @@ -1529,6 +1632,7 @@ define void @vf64_2() { ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = fdiv <4 x double> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = fdiv <8 x double> undef, +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-LABEL: 'vf64_2' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, @@ -1537,6 +1641,7 @@ define void @vf64_2() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %4 = fdiv <4 x double> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %6 = fdiv <8 x double> undef, +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %1 = fdiv <2 x double> undef, %2 = fdiv <2 x double> undef, diff --git a/llvm/test/Analysis/CostModel/ARM/fparith.ll b/llvm/test/Analysis/CostModel/ARM/fparith.ll index 31a73c1868e57..cb3d66edfa20c 100644 --- a/llvm/test/Analysis/CostModel/ARM/fparith.ll +++ b/llvm/test/Analysis/CostModel/ARM/fparith.ll @@ -7,13 +7,13 @@ define void @f32() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd float undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub float undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul float undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'f32' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd float undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub float undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul float undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c = fadd float undef, undef %d = fsub float undef, undef @@ -26,13 +26,13 @@ define void @f16() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd half undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub half undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul half undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'f16' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd half undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub half undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul half undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c = fadd half undef, undef %d = fsub half undef, undef @@ -45,13 +45,13 @@ define void @f64() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd double undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub double undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul double undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'f64' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = fadd double undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = fsub double undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fmul double undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c = fadd double undef, undef %d = fsub double undef, undef @@ -70,7 +70,7 @@ define void @vf32() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x float> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x float> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'vf32' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = fadd <2 x float> undef, undef @@ -82,7 +82,7 @@ define void @vf32() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c8 = fadd <8 x float> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d8 = fsub <8 x float> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e8 = fmul <8 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c2 = fadd <2 x float> undef, undef %d2 = fsub <2 x float> undef, undef @@ -107,7 +107,7 @@ define void @vf16() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x half> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x half> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'vf16' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c2 = fadd <2 x half> undef, undef @@ -119,7 +119,7 @@ define void @vf16() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %c8 = fadd <8 x half> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %d8 = fsub <8 x half> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e8 = fmul <8 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c2 = fadd <2 x half> undef, undef %d2 = fsub <2 x half> undef, undef @@ -144,7 +144,7 @@ define void @vf64() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x double> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x double> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'vf64' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x double> undef, undef @@ -156,7 +156,7 @@ define void @vf64() { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x double> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x double> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c2 = fadd <2 x double> undef, undef %d2 = fsub <2 x double> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/gep.ll b/llvm/test/Analysis/CostModel/ARM/gep.ll index e5ab9ef7a0e16..3956c2bb1561c 100644 --- a/llvm/test/Analysis/CostModel/ARM/gep.ll +++ b/llvm/test/Analysis/CostModel/ARM/gep.ll @@ -21,18 +21,57 @@ define void @testi8(i8* %a, i32 %i) { ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; CHECK-V7M-LABEL: 'testi8' -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-LABEL: 'testi8' +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-V7M-FP-LABEL: 'testi8' +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-MVE-LABEL: 'testi8' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'testi8' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i8, i8* %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i8, i8* %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i8, i8* %a, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i8, i8* %a, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i8, i8* %a, i32 4095 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i8, i8* %a, i32 4096 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-T32-LABEL: 'testi8' ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0 @@ -45,7 +84,7 @@ define void @testi8(i8* %a, i32 %i) { ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-A32-LABEL: 'testi8' ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* %a, i32 0 @@ -58,7 +97,7 @@ define void @testi8(i8* %a, i32 %i) { ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i8, i8* %a, i32 -255 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i8, i8* %a, i32 -256 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i8, i8* %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0 = getelementptr inbounds i8, i8* %a, i32 0 %a1 = getelementptr inbounds i8, i8* %a, i32 1 @@ -88,18 +127,57 @@ define void @testi16(i16* %a, i32 %i) { ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; CHECK-V7M-LABEL: 'testi16' -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-LABEL: 'testi16' +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-V7M-FP-LABEL: 'testi16' +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-MVE-LABEL: 'testi16' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'testi16' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i16, i16* %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i16, i16* %a, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i16, i16* %a, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4095 = getelementptr inbounds i16, i16* %a, i32 2046 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i16, i16* %a, i32 2048 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-T32-LABEL: 'testi16' ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0 @@ -112,7 +190,7 @@ define void @testi16(i16* %a, i32 %i) { ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-A32-LABEL: 'testi16' ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i16, i16* %a, i32 0 @@ -125,7 +203,7 @@ define void @testi16(i16* %a, i32 %i) { ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i16, i16* %a, i32 -127 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i16, i16* %a, i32 -128 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i16, i16* %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0 = getelementptr inbounds i16, i16* %a, i32 0 %a1 = getelementptr inbounds i16, i16* %a, i32 1 @@ -155,18 +233,57 @@ define void @testi32(i32* %a, i32 %i) { ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; CHECK-V7M-LABEL: 'testi32' -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-LABEL: 'testi32' +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-V7M-FP-LABEL: 'testi32' +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-MVE-LABEL: 'testi32' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'testi32' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i32, i32* %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i32, i32* %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i32, i32* %a, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i32, i32* %a, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1023 = getelementptr inbounds i32, i32* %a, i32 1023 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds i32, i32* %a, i32 1024 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-T32-LABEL: 'testi32' ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0 @@ -179,7 +296,7 @@ define void @testi32(i32* %a, i32 %i) { ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-A32-LABEL: 'testi32' ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i32, i32* %a, i32 0 @@ -192,7 +309,7 @@ define void @testi32(i32* %a, i32 %i) { ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i32, i32* %a, i32 -63 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i32, i32* %a, i32 -64 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ai = getelementptr inbounds i32, i32* %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0 = getelementptr inbounds i32, i32* %a, i32 0 %a1 = getelementptr inbounds i32, i32* %a, i32 1 @@ -224,20 +341,65 @@ define void @testi64(i64* %a, i32 %i) { ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; CHECK-V7M-LABEL: 'testi64' -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-LABEL: 'testi64' +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-V7M-FP-LABEL: 'testi64' +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-MVE-LABEL: 'testi64' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'testi64' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i64, i64* %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am4 = getelementptr inbounds i64, i64* %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a15 = getelementptr inbounds i64, i64* %a, i32 15 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a16 = getelementptr inbounds i64, i64* %a, i32 16 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a31 = getelementptr inbounds i64, i64* %a, i32 31 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a32 = getelementptr inbounds i64, i64* %a, i32 32 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4095 = getelementptr inbounds i64, i64* %a, i32 1023 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a4096 = getelementptr inbounds i64, i64* %a, i32 1024 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-T32-LABEL: 'testi64' ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0 @@ -252,7 +414,7 @@ define void @testi64(i64* %a, i32 %i) { ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-A32-LABEL: 'testi64' ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i64, i64* %a, i32 0 @@ -267,7 +429,7 @@ define void @testi64(i64* %a, i32 %i) { ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am255 = getelementptr inbounds i64, i64* %a, i32 -63 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds i64, i64* %a, i32 -64 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds i64, i64* %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0 = getelementptr inbounds i64, i64* %a, i32 0 %a1 = getelementptr inbounds i64, i64* %a, i32 1 @@ -344,7 +506,7 @@ define void @testhalf(half* %a, i32 %i) { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'testhalf' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0 @@ -359,7 +521,7 @@ define void @testhalf(half* %a, i32 %i) { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63 ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64 ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-T32-LABEL: 'testhalf' ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0 @@ -374,7 +536,7 @@ define void @testhalf(half* %a, i32 %i) { ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-A32-LABEL: 'testhalf' ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds half, half* %a, i32 0 @@ -389,7 +551,7 @@ define void @testhalf(half* %a, i32 %i) { ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am63 = getelementptr inbounds half, half* %a, i32 -63 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds half, half* %a, i32 -64 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds half, half* %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0 = getelementptr inbounds half, half* %a, i32 0 %a1 = getelementptr inbounds half, half* %a, i32 1 @@ -466,7 +628,7 @@ define void @testfloat(float* %a, i32 %i) { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'testfloat' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0 @@ -481,7 +643,7 @@ define void @testfloat(float* %a, i32 %i) { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63 ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64 ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-T32-LABEL: 'testfloat' ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0 @@ -496,7 +658,7 @@ define void @testfloat(float* %a, i32 %i) { ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-A32-LABEL: 'testfloat' ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds float, float* %a, i32 0 @@ -511,7 +673,7 @@ define void @testfloat(float* %a, i32 %i) { ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds float, float* %a, i32 -63 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds float, float* %a, i32 -64 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds float, float* %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0 = getelementptr inbounds float, float* %a, i32 0 %a1 = getelementptr inbounds float, float* %a, i32 1 @@ -545,20 +707,65 @@ define void @testdouble(double* %a, i32 %i) { ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i ; CHECK-V6M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; CHECK-V7M-LABEL: 'testdouble' -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32 -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i -; CHECK-V7M-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-V7M-NOFP-LABEL: 'testdouble' +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32 +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i +; CHECK-V7M-NOFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-V7M-FP-LABEL: 'testdouble' +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32 +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i +; CHECK-V7M-FP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-MVE-LABEL: 'testdouble' +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'testdouble' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds double, double* %a, i32 1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am1 = getelementptr inbounds double, double* %a, i32 -1 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a255 = getelementptr inbounds double, double* %a, i32 127 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a256 = getelementptr inbounds double, double* %a, i32 128 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am255 = getelementptr inbounds double, double* %a, i32 -127 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %am256 = getelementptr inbounds double, double* %a, i32 -128 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1023 = getelementptr inbounds double, double* %a, i32 511 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a1024 = getelementptr inbounds double, double* %a, i32 512 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32 +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-T32-LABEL: 'testdouble' ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0 @@ -573,7 +780,7 @@ define void @testdouble(double* %a, i32 %i) { ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32 ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-A32-LABEL: 'testdouble' ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds double, double* %a, i32 0 @@ -588,7 +795,7 @@ define void @testdouble(double* %a, i32 %i) { ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am63 = getelementptr inbounds double, double* %a, i32 -31 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %am64 = getelementptr inbounds double, double* %a, i32 -32 ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ai = getelementptr inbounds double, double* %a, i32 %i -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0 = getelementptr inbounds double, double* %a, i32 0 %a1 = getelementptr inbounds double, double* %a, i32 1 @@ -845,7 +1052,7 @@ define void @testvecs(i32 %i) { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'testvecs' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 @@ -905,7 +1112,7 @@ define void @testvecs(i32 %i) { ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1 -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-T32-LABEL: 'testvecs' ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 @@ -965,7 +1172,7 @@ define void @testvecs(i32 %i) { ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i ; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1 -; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-T32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-A32-LABEL: 'testvecs' ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 @@ -1025,7 +1232,7 @@ define void @testvecs(i32 %i) { ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c12 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c13 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i ; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d0 = getelementptr inbounds i8, i8* undef, i32 -1 -; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-A32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 diff --git a/llvm/test/Analysis/CostModel/ARM/load_store.ll b/llvm/test/Analysis/CostModel/ARM/load_store.ll index 8d346ea1c8078..2ca4acda0fc2d 100644 --- a/llvm/test/Analysis/CostModel/ARM/load_store.ll +++ b/llvm/test/Analysis/CostModel/ARM/load_store.ll @@ -85,7 +85,7 @@ define void @stores() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, <4 x float>* undef, align 1 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'stores' ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4 @@ -111,7 +111,7 @@ define void @stores() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 1 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> undef, <4 x float>* undef, align 1 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <2 x double> undef, <2 x double>* undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8-SIZE-LABEL: 'stores' ; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i8 undef, i8* undef, align 4 @@ -272,7 +272,7 @@ define void @loads() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'loads' ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4 @@ -298,7 +298,7 @@ define void @loads() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = load <8 x i16>, <8 x i16>* undef, align 1 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = load <4 x float>, <4 x float>* undef, align 1 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %23 = load <2 x double>, <2 x double>* undef, align 1 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8-SIZE-LABEL: 'loads' ; CHECK-V8-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load i8, i8* undef, align 4 diff --git a/llvm/test/Analysis/CostModel/ARM/memcpy.ll b/llvm/test/Analysis/CostModel/ARM/memcpy.ll index 4e8717ef0b903..5ebf945a887bb 100644 --- a/llvm/test/Analysis/CostModel/ARM/memcpy.ll +++ b/llvm/test/Analysis/CostModel/ARM/memcpy.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size | \ ; RUN: FileCheck %s --check-prefixes=COMMON,CHECK-NO-SA ; RUN: opt < %s -cost-model -analyze -cost-kind=code-size -mattr=+strict-align | \ @@ -17,9 +18,9 @@ define void @memcpy_1(i8* %d, i8* %s) { ; ldrb r1, [r1] ; strb r1, [r0] ; -; COMMON: function 'memcpy_1' -; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_1' +; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 1, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 1, i1 false) @@ -40,9 +41,13 @@ define void @memcpy_2(i8* %d, i8* %s) { ; strb r1, [r0, #1] ; strb r2, [r0] ; -; COMMON: function 'memcpy_2' -; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_2' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_2' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 2, i1 false) @@ -67,9 +72,13 @@ define void @memcpy_3(i8* %d, i8* %s) { ; strb r3, [r0, #1] ; strb r2, [r0] ; -; COMMON: function 'memcpy_3' -; CHECK-NO-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_3' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_3' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 3, i1 false) @@ -94,9 +103,13 @@ define void @memcpy_4(i8* %d, i8* %s) { ; strb r3, [r0, #1] ; strb.w r12, [r0] ; -; COMMON: function 'memcpy_4' -; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_4' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_4' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 4, i1 false) @@ -119,8 +132,9 @@ define void @memcpy_8(i8* %d, i8* %s) { ; bl __aeabi_memcpy ; pop {r7, pc} ; -; COMMON: function 'memcpy_8' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_8' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 8, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 8, i1 false) @@ -147,9 +161,13 @@ define void @memcpy_16(i8* %d, i8* %s) { ; bl __aeabi_memcpy ; pop {r7, pc} ; -; COMMON: function 'memcpy_16' -; CHECK-NO-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_16' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_16' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 16, i1 false) @@ -163,8 +181,9 @@ define void @memcpy_32(i8* %d, i8* %s, i32 %N) { ; movs r2, #32 ; bl __aeabi_memcpy ; -; COMMON: function 'memcpy_32' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_32' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 32, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 32, i1 false) @@ -177,8 +196,9 @@ define void @memcpy_N(i8* %d, i8* %s, i32 %N) { ; ; bl __aeabi_memcpy ; -; COMMON: function 'memcpy_N' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_N' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 %N, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 1 %s, i32 %N, i1 false) @@ -196,8 +216,9 @@ define void @memcpy_1_al2(i8* %d, i8* %s) { ; ldrb r1, [r1] ; strb r1, [r0] ; -; COMMON: function 'memcpy_1_al2' -; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_1_al2' +; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 1, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 1, i1 false) @@ -211,8 +232,9 @@ define void @memcpy_2_al2(i8* %d, i8* %s) { ; ldrh r1, [r1] ; strh r1, [r0] ; -; COMMON: function 'memcpy_2_al2' -; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_2_al2' +; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 2, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 2, i1 false) @@ -228,8 +250,9 @@ define void @memcpy_3_al2(i8* %d, i8* %s) { ; ldrh r1, [r1] ; strh r1, [r0] ; -; COMMON: function 'memcpy_3_al2' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_3_al2' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 3, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 3, i1 false) @@ -250,9 +273,13 @@ define void @memcpy_4_al2(i8* %d, i8* %s) { ; ldrh r1, [r1] ; strh r1, [r0] ; -; COMMON: function 'memcpy_4_al2' -; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_4_al2' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_4_al2' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 4, i1 false) @@ -279,9 +306,13 @@ define void @memcpy_8_al2(i8* %d, i8* %s) { ; ldrh r1, [r1] ; strh r1, [r0] ; -; COMMON: function 'memcpy_8_al2' -; CHECK-NO-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_8_al2' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_8_al2' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 8, i1 false) @@ -306,9 +337,13 @@ define void @memcpy_16_al2(i8* %d, i8* %s) { ; movs r2, #16 ; bl __aeabi_memcpy ; -; COMMON: function 'memcpy_16_al2' -; CHECK-NO-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_16_al2' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_16_al2' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 16, i1 false) @@ -322,8 +357,9 @@ define void @memcpy_32_al2(i8* %d, i8* %s, i32 %N) { ; movs r2, #32 ; bl __aeabi_memcpy ; -; COMMON: function 'memcpy_32_al2' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_32_al2' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 32, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 32, i1 false) @@ -336,8 +372,9 @@ define void @memcpy_N_al2(i8* %d, i8* %s, i32 %N) { ; ; bl __aeabi_memcpy ; -; COMMON: function 'memcpy_N_al2' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_N_al2' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 %N, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %d, i8* align 2 %s, i32 %N, i1 false) @@ -355,8 +392,9 @@ define void @memcpy_1_al4(i8* %d, i8* %s) { ; ldrb r1, [r1] ; strb r1, [r0] ; -; COMMON: function 'memcpy_1_al4' -; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_1_al4' +; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 1, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 1, i1 false) @@ -370,8 +408,9 @@ define void @memcpy_2_al4(i8* %d, i8* %s) { ; ldrh r1, [r1] ; strh r1, [r0] ; -; COMMON: function 'memcpy_2_al4' -; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_2_al4' +; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 2, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 2, i1 false) @@ -387,8 +426,9 @@ define void @memcpy_3_al4(i8* %d, i8* %s) { ; ldrh r1, [r1] ; strh r1, [r0] ; -; COMMON: function 'memcpy_3_al4' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_3_al4' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 3, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 3, i1 false) @@ -402,8 +442,9 @@ define void @memcpy_4_al4(i8* %d, i8* %s) { ; ldr r1, [r1] ; str r1, [r0] ; -; COMMON: function 'memcpy_4_al4' -; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_4_al4' +; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 4, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 4, i1 false) @@ -417,8 +458,9 @@ define void @memcpy_8_al4(i8* %d, i8* %s) { ; ldrd r2, r1, [r1] ; strd r2, r1, [r0] ; -; COMMON: function 'memcpy_8_al4' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_8_al4' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 8, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 8, i1 false) @@ -434,8 +476,9 @@ define void @memcpy_16_al4(i8* %d, i8* %s) { ; stm.w r0, {r2, r3, r12} ; str r1, [r0, #12] ; -; COMMON: function 'memcpy_16_al4' -; COMMON-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_16_al4' +; COMMON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 16, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 16, i1 false) @@ -451,8 +494,9 @@ define void @memcpy_32_al4(i8* %d, i8* %s, i32 %N) { ; ldm.w r1, {r2, r3, r12, lr} ; stm.w r0, {r2, r3, r12, lr} ; -; COMMON: function 'memcpy_32_al4' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_32_al4' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 32, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 32, i1 false) @@ -465,8 +509,9 @@ define void @memcpy_N_al4(i8* %d, i8* %s, i32 %N) { ; ; bl __aeabi_memcpy4 ; -; COMMON: function 'memcpy_N_al4' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_N_al4' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 %N, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 4 %s, i32 %N, i1 false) @@ -484,8 +529,9 @@ define void @memcpy_1_al14(i8* %d, i8* %s) { ; ldrb r1, [r1] ; strb r1, [r0] ; -; COMMON: function 'memcpy_1_al14' -; COMMON-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_1_al14' +; COMMON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 1, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 1, i1 false) @@ -506,9 +552,13 @@ define void @memcpy_2_al14(i8* %d, i8* %s) { ; strb r1, [r0, #1] ; strb r2, [r0] ; -; COMMON: function 'memcpy_2_al14' -; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_2_al14' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_2_al14' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 2, i1 false) @@ -533,9 +583,13 @@ define void @memcpy_3_al14(i8* %d, i8* %s) { ; strb r3, [r0, #1] ; strb r2, [r0] ; -; COMMON: function 'memcpy_3_al14' -; CHECK-NO-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_3_al14' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_3_al14' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 3, i1 false) @@ -560,9 +614,13 @@ define void @memcpy_4_al14(i8* %d, i8* %s) { ; strb r3, [r0, #1] ; strb.w r12, [r0] ; -; COMMON: function 'memcpy_4_al14' -; CHECK-NO-SA-NEXT: cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_4_al14' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_4_al14' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 4, i1 false) @@ -585,8 +643,9 @@ define void @memcpy_8_al14(i8* %d, i8* %s) { ; bl __aeabi_memcpy ; pop {r7, pc} ; -; COMMON: function 'memcpy_8_al14' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_8_al14' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 8, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 8, i1 false) @@ -611,9 +670,13 @@ define void @memcpy_16_al14(i8* %d, i8* %s) { ; movs r2, #16 ; bl __aeabi_memcpy ; -; COMMON: function 'memcpy_16_al14' -; CHECK-NO-SA-NEXT: cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 -; CHECK-SA-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NO-SA-LABEL: 'memcpy_16_al14' +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false) +; CHECK-NO-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-SA-LABEL: 'memcpy_16_al14' +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false) +; CHECK-SA-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 16, i1 false) @@ -627,8 +690,9 @@ define void @memcpy_32_al14(i8* %d, i8* %s) { ; movs r2, #32 ; bl __aeabi_memcpy ; -; COMMON: function 'memcpy_32_al14' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_32_al14' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 32, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 32, i1 false) @@ -641,8 +705,9 @@ define void @memcpy_N_al14(i8* %d, i8* %s, i32 %N) { ; ; bl __aeabi_memcpy4 ; -; COMMON: function 'memcpy_N_al14' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_N_al14' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 %N, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %d, i8* align 4 %s, i32 %N, i1 false) @@ -660,8 +725,9 @@ define void @memcpy_1_al41(i8* %d, i8* %s) { ; ldrb r1, [r1] ; strb r1, [r0] ; -; COMMON: function 'memcpy_1_al41' -; COMMON-NEXT: cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32 +; COMMON-LABEL: 'memcpy_1_al41' +; COMMON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 1 %s, i32 1, i1 false) +; COMMON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; entry: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %d, i8* align 1 %s, i32 1, i1 false) diff --git a/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll index e88fcca1225cd..63dcbe9982219 100644 --- a/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/mul-cast-vect.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM @@ -15,13 +16,18 @@ target triple = "armv7--linux-gnueabihf" %T464 = type <4 x i64> define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) { -; COST: function 'direct': +; COST-LABEL: 'direct' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T432, %T432* %loadaddr ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = mul %T432 %v0, %v1 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> + %r3 = mul %T432 %v0, %v1 ; ASM: vmul.i32 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 @@ -29,16 +35,22 @@ define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) { } define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { -; COST: function 'ups1632': +; COST-LABEL: 'ups1632' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T416, %T416* %loadaddr ; ASM: vldr %v1 = load %T416, %T416* %loadaddr2 ; ASM: vldr %r1 = sext %T416 %v0 to %T432 %r2 = sext %T416 %v1 to %T432 -; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32> - %r3 = mul %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> + %r3 = mul %T432 %r1, %r2 ; ASM: vmull.s16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 @@ -46,16 +58,22 @@ define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { } define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { -; COST: function 'upu1632': +; COST-LABEL: 'upu1632' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T416, %T416* %loadaddr ; ASM: vldr %v1 = load %T416, %T416* %loadaddr2 ; ASM: vldr %r1 = zext %T416 %v0 to %T432 %r2 = zext %T416 %v1 to %T432 -; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32> - %r3 = mul %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> + %r3 = mul %T432 %r1, %r2 ; ASM: vmull.u16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 @@ -63,51 +81,66 @@ define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { } define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) { -; COST: function 'ups3264': +; COST-LABEL: 'ups3264' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T232, %T232* %loadaddr ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = mul %T232 %v0, %v1 + %r3 = mul %T232 %v0, %v1 ; ASM: vmul.i32 -; COST: cost of 1 for instruction: {{.*}} mul <2 x i32> %st = sext %T232 %r3 to %T264 ; ASM: vmovl.s32 -; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64> store %T264 %st, %T264* %storeaddr ; ASM: vst1.64 ret void } define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) { -; COST: function 'upu3264': +; COST-LABEL: 'upu3264' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = mul <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T232, %T232* %loadaddr ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = mul %T232 %v0, %v1 + %r3 = mul %T232 %v0, %v1 ; ASM: vmul.i32 -; COST: cost of 1 for instruction: {{.*}} mul <2 x i32> %st = zext %T232 %r3 to %T264 ; ASM: vmovl.u32 -; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64> store %T264 %st, %T264* %storeaddr ; ASM: vst1.64 ret void } define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) { -; COST: function 'dn3216': +; COST-LABEL: 'dn3216' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = mul <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, <4 x i16>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T432, %T432* %loadaddr ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = mul %T432 %v0, %v1 + %r3 = mul %T432 %v0, %v1 ; ASM: vmul.i32 -; COST: cost of 2 for instruction: {{.*}} mul <4 x i32> %st = trunc %T432 %r3 to %T416 ; ASM: vmovn.i32 -; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16> store %T416 %st, %T416* %storeaddr ; ASM: vstr ret void diff --git a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll index 8c5c7415f67fb..02bb080b65b64 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll @@ -28,7 +28,7 @@ define i32 @masked_gather() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> undef, i32 1, <4 x i1> undef, <4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 4, <4 x i1> undef, <4 x double> undef) %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 4, <2 x i1> undef, <2 x double> undef) @@ -92,7 +92,7 @@ define i32 @masked_scatter() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> undef, <4 x i8*> undef, i32 1, <4 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> undef, <2 x i8*> undef, i32 1, <2 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 4, <4 x i1> undef) call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 4, <2 x i1> undef) @@ -153,7 +153,7 @@ define void @gep_v4i32(i32* %base, i16* %base16, i8* %base8, <4 x i32> %ind32, < ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x i16*> %gepbs to <4 x i32*> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %resbs = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gepbsb, i32 4, <4 x i1> %mask, <4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %resbs, <4 x i32*> %gepbsb, i32 4, <4 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %gep1 = getelementptr i32, i32* %base, <4 x i32> %ind32 %res1 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep1, i32 4, <4 x i1> %mask, <4 x i32> undef) @@ -212,7 +212,7 @@ define void @gep_v4f32(float* %base, i16* %base16, i8* %base8, <4 x i32> %ind32, ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <4 x i16*> %gepbs to <4 x float*> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %resbs = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gepbsb, i32 4, <4 x i1> %mask, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resbs, <4 x float*> %gepbsb, i32 4, <4 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %gep1 = getelementptr float, float* %base, <4 x i32> %ind32 %res1 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep1, i32 4, <4 x i1> %mask, <4 x float> undef) @@ -269,7 +269,7 @@ define void @gep_v4i16(i16* %base, <4 x i32> %ind32, <4 x i16> %ind16, <4 x i1> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <4 x i16> %res6 to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %res6trunc, <4 x i16*> %gep5, i32 4, <4 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %gep1 = getelementptr i16, i16* %base, <4 x i32> %ind32 %res1 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %gep1, i32 2, <4 x i1> %mask, <4 x i16> undef) @@ -312,7 +312,7 @@ define void @gep_v4i8(i8* %base, <4 x i8> %ind8, <4 x i1> %mask) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res6sext = sext <4 x i8> %res6 to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <4 x i32> %res6sext to <4 x i8> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %res6trunc, <4 x i8*> %gep5, i32 4, <4 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; result zext %gep5 = getelementptr i8, i8* %base, <4 x i8> %ind8 @@ -361,7 +361,7 @@ define void @gep_v8i16(i16* %base, i8* %base8, i32* %base32, <8 x i32> %ind32, < ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %ressext = sext <8 x i16> %res to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %restrunc = trunc <8 x i32> %ressext to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %restrunc, <8 x i16*> %gep4, i32 4, <8 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; no offset ext %gep1 = getelementptr i16, i16* %base, <8 x i32> %ind32 @@ -434,7 +434,7 @@ define void @gep_v8f16(half* %base, i8* %base8, i32* %base32, <8 x i32> %ind32, ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x i32*> %gepbs to <8 x half*> ; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resbs, <8 x half*> %gepbsb, i32 2, <8 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; no offset ext %gep1 = getelementptr half, half* %base, <8 x i32> %ind32 @@ -484,7 +484,7 @@ define void @gep_v8i8(i8* %base, <8 x i8> %ind8, <8 x i1> %mask) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res6sext = sext <8 x i8> %res6 to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %res6trunc = trunc <8 x i16> %res6sext to <8 x i8> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %res6trunc, <8 x i8*> %gep5, i32 4, <8 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; result zext %indzext = zext <8 x i8> %ind8 to <8 x i32> @@ -524,7 +524,7 @@ define void @gep_v16i8(i8* %base, i16* %base16, <16 x i8> %ind8, <16 x i32> %ind ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i8, i8* %base, <16 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indtrunc = trunc <16 x i32> %ind32 to <16 x i8> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %indtrunc, <16 x i8*> %gep4, i32 2, <16 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; no offset ext %gep1 = getelementptr i8, i8* %base, <16 x i32> %ind32 @@ -563,7 +563,7 @@ define void @gep_v16i8p(<16 x i8*> %base, i32 %off, <16 x i1> %mask) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i8, <16 x i8*> %base, i32 %off ; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbs, i32 2, <16 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %gepbs = getelementptr i8, <16 x i8*> %base, i32 %off %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll index 0cd02e3e9c514..a03f283cd1e44 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-add.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-add.ll @@ -21,7 +21,7 @@ define i32 @reduce_i64(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'reduce_i64' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) @@ -29,7 +29,7 @@ define i32 @reduce_i64(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'reduce_i64' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef) @@ -82,7 +82,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-RECIP-LABEL: 'reduce_i32' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) @@ -92,7 +92,7 @@ define i32 @reduce_i32(i32 %arg) { ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5658 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 11806 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36390 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'reduce_i32' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll index 562a6fed4b86c..8b8ef521764f4 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll @@ -18,7 +18,7 @@ define i32 @reduce_i64(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) @@ -26,7 +26,7 @@ define i32 @reduce_i64(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef) %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef) @@ -51,7 +51,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) @@ -59,7 +59,7 @@ define i32 @reduce_i32(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef) %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef) @@ -86,7 +86,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) @@ -95,7 +95,7 @@ define i32 @reduce_i16(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef) %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef) @@ -125,7 +125,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) @@ -135,7 +135,7 @@ define i32 @reduce_i8(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef) %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll index 6cae1f1fc3122..a39f2ffaf648e 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll @@ -18,7 +18,7 @@ define i32 @reduce_i64(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) @@ -26,7 +26,7 @@ define i32 @reduce_i64(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smin.v4i64(<4 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smin.v8i64(<8 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smin.v16i64(<16 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.smin.v1i64(<1 x i64> undef) %V2 = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> undef) @@ -51,7 +51,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) @@ -59,7 +59,7 @@ define i32 @reduce_i32(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smin.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smin.v16i32(<16 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smin.v32i32(<32 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.smin.v2i32(<2 x i32> undef) %V4 = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> undef) @@ -86,7 +86,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) @@ -95,7 +95,7 @@ define i32 @reduce_i16(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smin.v16i16(<16 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smin.v32i16(<32 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smin.v64i16(<64 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i16 @llvm.experimental.vector.reduce.smin.v2i16(<2 x i16> undef) %V4 = call i16 @llvm.experimental.vector.reduce.smin.v4i16(<4 x i16> undef) @@ -125,7 +125,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) @@ -135,7 +135,7 @@ define i32 @reduce_i8(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smin.v32i8(<32 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smin.v64i8(<64 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smin.v128i8(<128 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> undef) %V4 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll index dac7ab6e5c3ab..bb3205ab33360 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll @@ -18,7 +18,7 @@ define i32 @reduce_i64(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) @@ -26,7 +26,7 @@ define i32 @reduce_i64(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umax.v4i64(<4 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umax.v8i64(<8 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umax.v16i64(<16 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> undef) %V2 = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> undef) @@ -51,7 +51,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) @@ -59,7 +59,7 @@ define i32 @reduce_i32(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umax.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umax.v32i32(<32 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.umax.v2i32(<2 x i32> undef) %V4 = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> undef) @@ -86,7 +86,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) @@ -95,7 +95,7 @@ define i32 @reduce_i16(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umax.v16i16(<16 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umax.v32i16(<32 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umax.v64i16(<64 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i16 @llvm.experimental.vector.reduce.umax.v2i16(<2 x i16> undef) %V4 = call i16 @llvm.experimental.vector.reduce.umax.v4i16(<4 x i16> undef) @@ -125,7 +125,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) @@ -135,7 +135,7 @@ define i32 @reduce_i8(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umax.v32i8(<32 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umax.v64i8(<64 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umax.v128i8(<128 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> undef) %V4 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll index ddcd052853b72..a5e0e56e0465d 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll @@ -18,7 +18,7 @@ define i32 @reduce_i64(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) @@ -26,7 +26,7 @@ define i32 @reduce_i64(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.umin.v4i64(<4 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.umin.v8i64(<8 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.umin.v16i64(<16 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.experimental.vector.reduce.umin.v1i64(<1 x i64> undef) %V2 = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> undef) @@ -51,7 +51,7 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) @@ -59,7 +59,7 @@ define i32 @reduce_i32(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.umin.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.umin.v16i32(<16 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 2184 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.umin.v32i32(<32 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.experimental.vector.reduce.umin.v2i32(<2 x i32> undef) %V4 = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> undef) @@ -86,7 +86,7 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 303 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 503 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) @@ -95,7 +95,7 @@ define i32 @reduce_i16(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.umin.v16i16(<16 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.umin.v32i16(<32 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 8880 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.umin.v64i16(<64 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i16 @llvm.experimental.vector.reduce.umin.v2i16(<2 x i16> undef) %V4 = call i16 @llvm.experimental.vector.reduce.umin.v4i16(<4 x i16> undef) @@ -125,7 +125,7 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 493 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 689 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) ; NEON-NEXT: Cost Model: Found an estimated cost of 1081 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) -; NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' ; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) @@ -135,7 +135,7 @@ define i32 @reduce_i8(i32 %arg) { ; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.umin.v32i8(<32 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 11820 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.umin.v64i8(<64 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 36412 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.umin.v128i8(<128 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> undef) %V4 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll index 32f46b1b53eba..f7e7ae0d19835 100644 --- a/llvm/test/Analysis/CostModel/ARM/select.ll +++ b/llvm/test/Analysis/CostModel/ARM/select.ll @@ -39,6 +39,7 @@ define void @selects() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-RECIP-LABEL: 'selects' ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef @@ -69,6 +70,7 @@ define void @selects() { ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-THUMB1-RECIP-LABEL: 'selects' ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef @@ -99,6 +101,7 @@ define void @selects() { ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef ; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB1-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB2-RECIP-LABEL: 'selects' ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef @@ -129,6 +132,7 @@ define void @selects() { ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef ; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB2-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-MVE-SIZE-LABEL: 'selects' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef @@ -159,6 +163,7 @@ define void @selects() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-NEON-SIZE-LABEL: 'selects' ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef @@ -189,6 +194,7 @@ define void @selects() { ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef ; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB1-SIZE-LABEL: 'selects' ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef @@ -219,6 +225,7 @@ define void @selects() { ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB2-SIZE-LABEL: 'selects' ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = select i1 undef, i8 undef, i8 undef @@ -249,6 +256,7 @@ define void @selects() { ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v1 = select i1 undef, i8 undef, i8 undef %v2 = select i1 undef, i16 undef, i16 undef diff --git a/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll index 02986ea20b4fc..d171b4f35fbf0 100644 --- a/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/shl-cast-vect.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM @@ -15,13 +16,18 @@ target triple = "armv7--linux-gnueabihf" %T464 = type <4 x i64> define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) { -; COST: function 'direct': +; COST-LABEL: 'direct' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T432, %T432* %loadaddr ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = shl %T432 %v0, %v1 -; COST: cost of 2 for instruction: {{.*}} shl <4 x i32> + %r3 = shl %T432 %v0, %v1 ; ASM: vshl.i32 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 @@ -29,16 +35,22 @@ define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) { } define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { -; COST: function 'ups1632': +; COST-LABEL: 'ups1632' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T416, %T416* %loadaddr ; ASM: vldr %v1 = load %T416, %T416* %loadaddr2 ; ASM: vldr %r1 = sext %T416 %v0 to %T432 %r2 = sext %T416 %v1 to %T432 -; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32> - %r3 = shl %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} shl <4 x i32> + %r3 = shl %T432 %r1, %r2 ; ASM: vshll.s16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 @@ -46,16 +58,22 @@ define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { } define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { -; COST: function 'upu1632': +; COST-LABEL: 'upu1632' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T416, %T416* %loadaddr ; ASM: vldr %v1 = load %T416, %T416* %loadaddr2 ; ASM: vldr %r1 = zext %T416 %v0 to %T432 %r2 = zext %T416 %v1 to %T432 -; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32> - %r3 = shl %T432 %r1, %r2 -; COST: cost of 2 for instruction: {{.*}} shl <4 x i32> + %r3 = shl %T432 %r1, %r2 ; ASM: vshll.u16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 @@ -63,51 +81,66 @@ define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { } define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) { -; COST: function 'ups3264': +; COST-LABEL: 'ups3264' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T232, %T232* %loadaddr ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = shl %T232 %v0, %v1 + %r3 = shl %T232 %v0, %v1 ; ASM: vshl.i32 -; COST: cost of 2 for instruction: {{.*}} shl <2 x i32> %st = sext %T232 %r3 to %T264 ; ASM: vmovl.s32 -; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64> store %T264 %st, %T264* %storeaddr ; ASM: vst1.64 ret void } define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) { -; COST: function 'upu3264': +; COST-LABEL: 'upu3264' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T232, %T232* %loadaddr ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = shl %T232 %v0, %v1 + %r3 = shl %T232 %v0, %v1 ; ASM: vshl.i32 -; COST: cost of 2 for instruction: {{.*}} shl <2 x i32> %st = zext %T232 %r3 to %T264 ; ASM: vmovl.u32 -; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64> store %T264 %st, %T264* %storeaddr ; ASM: vst1.64 ret void } define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) { -; COST: function 'dn3216': +; COST-LABEL: 'dn3216' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r3 = shl <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, <4 x i16>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T432, %T432* %loadaddr ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = shl %T432 %v0, %v1 + %r3 = shl %T432 %v0, %v1 ; ASM: vshl.i32 -; COST: cost of 2 for instruction: {{.*}} shl <4 x i32> %st = trunc %T432 %r3 to %T416 ; ASM: vmovn.i32 -; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16> store %T416 %st, %T416* %storeaddr ; ASM: vstr ret void diff --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll index 6a5d0ec306d7e..5d9f698f1e5a0 100644 --- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll +++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll @@ -16,7 +16,7 @@ define void @broadcast() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Unknown cost for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <4 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'broadcast' ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer @@ -31,7 +31,7 @@ define void @broadcast() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer ; CHECK-NEON-NEXT: Cost Model: Unknown cost for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <4 x i32> zeroinitializer -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer @@ -67,7 +67,7 @@ define void @reverse() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'reverse' ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> @@ -82,7 +82,7 @@ define void @reverse() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v18 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll index d736fcc0c47a8..8b691973405f8 100644 --- a/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll +++ b/llvm/test/Analysis/CostModel/ARM/sub-cast-vect.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -cost-model -analyze -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 | FileCheck --check-prefix=COST %s ; To see the assembly output: llc -mcpu=cortex-a9 < %s | FileCheck --check-prefix=ASM %s ; ASM lines below are only for reference, tests on that direction should go to tests/CodeGen/ARM @@ -15,13 +16,18 @@ target triple = "armv7--linux-gnueabihf" %T464 = type <4 x i64> define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) { -; COST: function 'direct': +; COST-LABEL: 'direct' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T432, %T432* %loadaddr ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = sub %T432 %v0, %v1 -; COST: cost of 1 for instruction: {{.*}} sub <4 x i32> + %r3 = sub %T432 %v0, %v1 ; ASM: vsub.i32 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 @@ -29,16 +35,22 @@ define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) { } define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { -; COST: function 'ups1632': +; COST-LABEL: 'ups1632' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = sext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = sext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T416, %T416* %loadaddr ; ASM: vldr %v1 = load %T416, %T416* %loadaddr2 ; ASM: vldr %r1 = sext %T416 %v0 to %T432 %r2 = sext %T416 %v1 to %T432 -; COST: cost of 0 for instruction: {{.*}} sext <4 x i16> {{.*}} to <4 x i32> - %r3 = sub %T432 %r1, %r2 -; COST: cost of 1 for instruction: {{.*}} sub <4 x i32> + %r3 = sub %T432 %r1, %r2 ; ASM: vsubl.s16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 @@ -46,16 +58,22 @@ define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { } define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { -; COST: function 'upu1632': +; COST-LABEL: 'upu1632' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i16>, <4 x i16>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i16>, <4 x i16>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext <4 x i16> %v0 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = zext <4 x i16> %v1 to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %r1, %r2 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %r3, <4 x i32>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T416, %T416* %loadaddr ; ASM: vldr %v1 = load %T416, %T416* %loadaddr2 ; ASM: vldr %r1 = zext %T416 %v0 to %T432 %r2 = zext %T416 %v1 to %T432 -; COST: cost of 0 for instruction: {{.*}} zext <4 x i16> {{.*}} to <4 x i32> - %r3 = sub %T432 %r1, %r2 -; COST: cost of 1 for instruction: {{.*}} sub <4 x i32> + %r3 = sub %T432 %r1, %r2 ; ASM: vsubl.u16 store %T432 %r3, %T432* %storeaddr ; ASM: vst1.64 @@ -63,51 +81,66 @@ define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) { } define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) { -; COST: function 'ups3264': +; COST-LABEL: 'ups3264' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = sext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T232, %T232* %loadaddr ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = sub %T232 %v0, %v1 + %r3 = sub %T232 %v0, %v1 ; ASM: vsub.i32 -; COST: cost of 1 for instruction: {{.*}} sub <2 x i32> %st = sext %T232 %r3 to %T264 ; ASM: vmovl.s32 -; COST: cost of 1 for instruction: {{.*}} sext <2 x i32> {{.*}} to <2 x i64> store %T264 %st, %T264* %storeaddr ; ASM: vst1.64 ret void } define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) { -; COST: function 'upu3264': +; COST-LABEL: 'upu3264' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <2 x i32>, <2 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <2 x i32>, <2 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <2 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = zext <2 x i32> %r3 to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %st, <2 x i64>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T232, %T232* %loadaddr ; ASM: vldr %v1 = load %T232, %T232* %loadaddr2 ; ASM: vldr - %r3 = sub %T232 %v0, %v1 + %r3 = sub %T232 %v0, %v1 ; ASM: vsub.i32 -; COST: cost of 1 for instruction: {{.*}} sub <2 x i32> %st = zext %T232 %r3 to %T264 ; ASM: vmovl.u32 -; COST: cost of 1 for instruction: {{.*}} zext <2 x i32> {{.*}} to <2 x i64> store %T264 %st, %T264* %storeaddr ; ASM: vst1.64 ret void } define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) { -; COST: function 'dn3216': +; COST-LABEL: 'dn3216' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v0 = load <4 x i32>, <4 x i32>* %loadaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load <4 x i32>, <4 x i32>* %loadaddr2, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r3 = sub <4 x i32> %v0, %v1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %st = trunc <4 x i32> %r3 to <4 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %st, <4 x i16>* %storeaddr, align 8 +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; %v0 = load %T432, %T432* %loadaddr ; ASM: vld1.64 %v1 = load %T432, %T432* %loadaddr2 ; ASM: vld1.64 - %r3 = sub %T432 %v0, %v1 + %r3 = sub %T432 %v0, %v1 ; ASM: vsub.i32 -; COST: cost of 1 for instruction: {{.*}} sub <4 x i32> %st = trunc %T432 %r3 to %T416 ; ASM: vmovn.i32 -; COST: cost of 1 for instruction: {{.*}} trunc <4 x i32> {{.*}} to <4 x i16> store %T416 %st, %T416* %storeaddr ; ASM: vstr ret void From 45f2a56856e29b8cb038b2e559289b91fb98fedf Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 4 Aug 2020 12:13:16 -0400 Subject: [PATCH 451/600] [CUDA][HIP] Support accessing static device variable in host code for -fno-gpu-rdc nvcc supports accessing file-scope static device variables in host code by host APIs like cudaMemcpyToSymbol etc. CUDA/HIP let users access device variables in host code by shadow variables. In host compilation, clang emits a shadow variable for each device variable, and calls __*RegisterVariable to register it in init function. The address of the shadow variable and the device side mangled name of the device variable is passed to __*RegisterVariable. Runtime looks up the symbol by name in the device binary to find the address of the device variable. The problem with static device variables is that they have internal linkage, therefore their name may be changed by the linker if there are multiple symbols with the same name. Also they end up as local symbols in the elf file, whereas the runtime only looks up the global symbols. Another reason for making the static device variables external linkage is that they may be initialized externally by host code and their final value may be accessed by host code after kernel execution, therefore they actually have external linkage. Giving them internal linkage will cause incorrect optimizations on them. To support accessing static device var in host code for -fno-gpu-rdc mode, change the intnernal linkage to external linkage. The name does not need change since there is only one TU for -fno-gpu-rdc mode. Also the externalization is done only if the device static var is referenced by host code. Differential Revision: https://reviews.llvm.org/D80858 --- clang/include/clang/AST/ASTContext.h | 7 ++ clang/lib/AST/ASTContext.cpp | 19 +++- clang/lib/Sema/SemaExpr.cpp | 19 ++++ clang/test/CodeGenCUDA/constexpr-variables.cu | 6 +- .../CodeGenCUDA/static-device-var-no-rdc.cu | 94 +++++++++++++++++++ 5 files changed, 139 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGenCUDA/static-device-var-no-rdc.cu diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 6c00fe86f282d..78207a4aad31b 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -43,6 +43,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/MapVector.h" @@ -999,6 +1000,9 @@ class ASTContext : public RefCountedBase { // Implicitly-declared type 'struct _GUID'. mutable TagDecl *MSGuidTagDecl = nullptr; + /// Keep track of CUDA/HIP static device variables referenced by host code. + llvm::DenseSet CUDAStaticDeviceVarReferencedByHost; + ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents, SelectorTable &sels, Builtin::Context &builtins); ASTContext(const ASTContext &) = delete; @@ -3030,6 +3034,9 @@ OPT_LIST(V) /// Return a new OMPTraitInfo object owned by this context. OMPTraitInfo &getNewOMPTraitInfo(); + /// Whether a C++ static variable should be externalized. + bool shouldExternalizeStaticVar(const Decl *D) const; + private: /// All OMPTraitInfo objects live in this collection, one per /// `pragma omp [begin] declare variant` directive. diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 43bbe41fb6112..04a4c5482db75 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -10325,12 +10325,17 @@ static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context, } else if (D->hasAttr()) { if (L == GVA_DiscardableODR) return GVA_StrongODR; - } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice && - D->hasAttr()) { + } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) { // Device-side functions with __global__ attribute must always be // visible externally so they can be launched from host. - if (L == GVA_DiscardableODR || L == GVA_Internal) + if (D->hasAttr() && + (L == GVA_DiscardableODR || L == GVA_Internal)) return GVA_StrongODR; + // Single source offloading languages like CUDA/HIP need to be able to + // access static device variables from host code of the same compilation + // unit. This is done by externalizing the static variable. + if (Context.shouldExternalizeStaticVar(D)) + return GVA_StrongExternal; } return L; } @@ -11185,3 +11190,11 @@ clang::operator<<(const DiagnosticBuilder &DB, return DB << Section.Decl; return DB << "a prior #pragma section"; } + +bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const { + return !getLangOpts().GPURelocatableDeviceCode && + (D->hasAttr() || D->hasAttr()) && + isa(D) && cast(D)->isFileVarDecl() && + cast(D)->getStorageClass() == SC_Static && + CUDAStaticDeviceVarReferencedByHost.count(cast(D)); +} diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index bb0b1fa49851d..dc867ba8f165d 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -17864,6 +17864,25 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc, if (Var->isInvalidDecl()) return; + // Record a CUDA/HIP static device/constant variable if it is referenced + // by host code. This is done conservatively, when the variable is referenced + // in any of the following contexts: + // - a non-function context + // - a host function + // - a host device function + // This also requires the reference of the static device/constant variable by + // host code to be visible in the device compilation for the compiler to be + // able to externalize the static device/constant variable. + if ((Var->hasAttr() || Var->hasAttr()) && + Var->isFileVarDecl() && Var->getStorageClass() == SC_Static) { + auto *CurContext = SemaRef.CurContext; + if (!CurContext || !isa(CurContext) || + cast(CurContext)->hasAttr() || + (!cast(CurContext)->hasAttr() && + !cast(CurContext)->hasAttr())) + SemaRef.getASTContext().CUDAStaticDeviceVarReferencedByHost.insert(Var); + } + auto *MSI = Var->getMemberSpecializationInfo(); TemplateSpecializationKind TSK = MSI ? MSI->getTemplateSpecializationKind() : Var->getTemplateSpecializationKind(); diff --git a/clang/test/CodeGenCUDA/constexpr-variables.cu b/clang/test/CodeGenCUDA/constexpr-variables.cu index b8b0782b4f62f..7ae56341cdf57 100644 --- a/clang/test/CodeGenCUDA/constexpr-variables.cu +++ b/clang/test/CodeGenCUDA/constexpr-variables.cu @@ -19,7 +19,7 @@ struct Q { // CXX14: @_ZN1Q2k2E = {{.*}}externally_initialized constant i32 6 // CXX17: @_ZN1Q2k2E = internal {{.*}}constant i32 6 // CXX14: @_ZN1Q2k1E = available_externally {{.*}}constant i32 5 - // CXX17: @_ZN1Q2k1E = linkonce_odr {{.*}}constant i32 5 + // CXX17: @_ZN1Q2k1E = {{.*}} externally_initialized constant i32 5 static constexpr int k1 = 5; static constexpr int k2 = 6; }; @@ -30,14 +30,14 @@ __constant__ const int &use_Q_k2 = Q::k2; template struct X { // CXX14: @_ZN1XIiE1aE = available_externally {{.*}}constant i32 123 - // CXX17: @_ZN1XIiE1aE = linkonce_odr {{.*}}constant i32 123 + // CXX17: @_ZN1XIiE1aE = {{.*}}externally_initialized constant i32 123 static constexpr int a = 123; }; __constant__ const int &use_X_a = X::a; template struct A { // CXX14: @_ZN1AIiLi1ELi2EE1xE = available_externally {{.*}}constant i32 2 - // CXX17: @_ZN1AIiLi1ELi2EE1xE = linkonce_odr {{.*}}constant i32 2 + // CXX17: @_ZN1AIiLi1ELi2EE1xE = {{.*}}externally_initialized constant i32 2 constexpr static T x = a * b; }; __constant__ const int &y = A::x; diff --git a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu new file mode 100644 index 0000000000000..1aea467c2d490 --- /dev/null +++ b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu @@ -0,0 +1,94 @@ +// REQUIRES: x86-registered-target +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=DEV %s + +// RUN: %clang_cc1 -triple x86_64-gnu-linux \ +// RUN: -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: -check-prefixes=HOST %s + +#include "Inputs/cuda.h" + +// Test function scope static device variable, which should not be externalized. +// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1 + +// Check a static device variable referenced by host function is externalized. +// DEV-DAG: @_ZL1x = addrspace(1) externally_initialized global i32 0 +// HOST-DAG: @_ZL1x = internal global i32 undef +// HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00" + +static __device__ int x; + +// Check a static device variables referenced only by device functions and kernels +// is not externalized. +// DEV-DAG: @_ZL2x2 = internal addrspace(1) global i32 0 +static __device__ int x2; + +// Check a static device variable referenced by host device function is externalized. +// DEV-DAG: @_ZL2x3 = addrspace(1) externally_initialized global i32 0 +static __device__ int x3; + +// Check a static device variable referenced in file scope is externalized. +// DEV-DAG: @_ZL2x4 = addrspace(1) externally_initialized global i32 0 +static __device__ int x4; +int& x4_ref = x4; + +// Check a static device variable in anonymous namespace. +// DEV-DAG: @_ZN12_GLOBAL__N_12x5E = addrspace(1) externally_initialized global i32 0 +namespace { +static __device__ int x5; +} + +// Check a static constant variable referenced by host is externalized. +// DEV-DAG: @_ZL1y = addrspace(4) externally_initialized global i32 0 +// HOST-DAG: @_ZL1y = internal global i32 undef +// HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00" + +static __constant__ int y; + +// Test static host variable, which should not be externalized nor registered. +// HOST-DAG: @_ZL1z = internal global i32 0 +// DEV-NOT: @_ZL1z +static int z; + +// Test static device variable in inline function, which should not be +// externalized nor registered. +// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat + +inline __device__ void devfun(const int ** b) { + const static int p = 2; + b[0] = &p; + b[1] = &x2; +} + +__global__ void kernel(int *a, const int **b) { + const static int w = 1; + a[0] = x; + a[1] = y; + a[2] = x2; + a[3] = x3; + a[4] = x4; + a[5] = x5; + b[0] = &w; + devfun(b); +} + +__host__ __device__ void hdf(int *a) { + a[0] = x3; +} + +int* getDeviceSymbol(int *x); + +void foo(int *a) { + getDeviceSymbol(&x); + getDeviceSymbol(&x5); + getDeviceSymbol(&y); + z = 123; +} + +// HOST: __hipRegisterVar({{.*}}@_ZL1x {{.*}}@[[DEVNAMEX]] +// HOST: __hipRegisterVar({{.*}}@_ZL1y {{.*}}@[[DEVNAMEY]] +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w +// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p From 4e491570b5ecff17d3ac7cf6dbb328d379cd4fb6 Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Tue, 4 Aug 2020 18:52:42 +0200 Subject: [PATCH 452/600] [mlir] Remove LLVMTypeTestDialect This dialect was introduced during the bring-up of the new LLVM dialect type system for testing purposes. The main LLVM dialect now uses the new type system and the test dialect is no longer necessary, so remove it. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D85224 --- mlir/test/Dialect/LLVMIR/types-invalid.mlir | 36 ++--- mlir/test/Target/llvmir-types.mlir | 124 +++++++++--------- mlir/test/lib/Dialect/CMakeLists.txt | 1 - mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt | 14 -- .../Dialect/LLVMIR/LLVMTypeTestDialect.cpp | 52 -------- mlir/tools/mlir-opt/CMakeLists.txt | 1 - mlir/tools/mlir-opt/mlir-opt.cpp | 2 - mlir/tools/mlir-translate/CMakeLists.txt | 3 - mlir/tools/mlir-translate/mlir-translate.cpp | 2 - 9 files changed, 80 insertions(+), 155 deletions(-) delete mode 100644 mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt delete mode 100644 mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp diff --git a/mlir/test/Dialect/LLVMIR/types-invalid.mlir b/mlir/test/Dialect/LLVMIR/types-invalid.mlir index bb281087412c9..f7a75da46775e 100644 --- a/mlir/test/Dialect/LLVMIR/types-invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/types-invalid.mlir @@ -1,95 +1,95 @@ // RUN: mlir-opt --allow-unregistered-dialect -split-input-file -verify-diagnostics %s func @repeated_struct_name() { - "some.op"() : () -> !llvm2.struct<"a", (ptr>)> + "some.op"() : () -> !llvm.struct<"a", (ptr>)> // expected-error @+2 {{identified type already used with a different body}} // expected-note @+1 {{existing body: (ptr>)}} - "some.op"() : () -> !llvm2.struct<"a", (i32)> + "some.op"() : () -> !llvm.struct<"a", (i32)> } // ----- func @repeated_struct_name_packed() { - "some.op"() : () -> !llvm2.struct<"a", packed (i32)> + "some.op"() : () -> !llvm.struct<"a", packed (i32)> // expected-error @+2 {{identified type already used with a different body}} // expected-note @+1 {{existing body: packed (i32)}} - "some.op"() : () -> !llvm2.struct<"a", (i32)> + "some.op"() : () -> !llvm.struct<"a", (i32)> } // ----- func @repeated_struct_opaque() { - "some.op"() : () -> !llvm2.struct<"a", opaque> + "some.op"() : () -> !llvm.struct<"a", opaque> // expected-error @+2 {{identified type already used with a different body}} // expected-note @+1 {{existing body: opaque}} - "some.op"() : () -> !llvm2.struct<"a", ()> + "some.op"() : () -> !llvm.struct<"a", ()> } // ----- func @repeated_struct_opaque_non_empty() { - "some.op"() : () -> !llvm2.struct<"a", opaque> + "some.op"() : () -> !llvm.struct<"a", opaque> // expected-error @+2 {{identified type already used with a different body}} // expected-note @+1 {{existing body: opaque}} - "some.op"() : () -> !llvm2.struct<"a", (i32, i32)> + "some.op"() : () -> !llvm.struct<"a", (i32, i32)> } // ----- func @repeated_struct_opaque_redefinition() { - "some.op"() : () -> !llvm2.struct<"a", ()> + "some.op"() : () -> !llvm.struct<"a", ()> // expected-error @+1 {{redeclaring defined struct as opaque}} - "some.op"() : () -> !llvm2.struct<"a", opaque> + "some.op"() : () -> !llvm.struct<"a", opaque> } // ----- func @struct_literal_opaque() { // expected-error @+1 {{only identified structs can be opaque}} - "some.op"() : () -> !llvm2.struct + "some.op"() : () -> !llvm.struct } // ----- func @unexpected_type() { // expected-error @+1 {{unexpected type, expected i* or keyword}} - "some.op"() : () -> !llvm2.f32 + "some.op"() : () -> !llvm.f32 } // ----- func @unexpected_type() { // expected-error @+1 {{unknown LLVM type}} - "some.op"() : () -> !llvm2.ifoo + "some.op"() : () -> !llvm.ifoo } // ----- func @explicitly_opaque_struct() { - "some.op"() : () -> !llvm2.struct<"a", opaque> + "some.op"() : () -> !llvm.struct<"a", opaque> // expected-error @+2 {{identified type already used with a different body}} // expected-note @+1 {{existing body: opaque}} - "some.op"() : () -> !llvm2.struct<"a", ()> + "some.op"() : () -> !llvm.struct<"a", ()> } // ----- func @dynamic_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm2.vec + "some.op"() : () -> !llvm.vec } // ----- func @dynamic_scalable_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm2.vec + "some.op"() : () -> !llvm.vec } // ----- func @unscalable_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm2.vec<4 x 4 x i32> + "some.op"() : () -> !llvm.vec<4 x 4 x i32> } diff --git a/mlir/test/Target/llvmir-types.mlir b/mlir/test/Target/llvmir-types.mlir index d807562d1a2d0..a3026e5515c38 100644 --- a/mlir/test/Target/llvmir-types.mlir +++ b/mlir/test/Target/llvmir-types.mlir @@ -3,180 +3,180 @@ llvm.func @primitives() { // CHECK: declare void @return_void() // CHECK: declare void @return_void_round() - "llvm.test_introduce_func"() { name = "return_void", type = !llvm2.void } : () -> () + "llvm.test_introduce_func"() { name = "return_void", type = !llvm.void } : () -> () // CHECK: declare half @return_half() // CHECK: declare half @return_half_round() - "llvm.test_introduce_func"() { name = "return_half", type = !llvm2.half } : () -> () + "llvm.test_introduce_func"() { name = "return_half", type = !llvm.half } : () -> () // CHECK: declare bfloat @return_bfloat() // CHECK: declare bfloat @return_bfloat_round() - "llvm.test_introduce_func"() { name = "return_bfloat", type = !llvm2.bfloat } : () -> () + "llvm.test_introduce_func"() { name = "return_bfloat", type = !llvm.bfloat } : () -> () // CHECK: declare float @return_float() // CHECK: declare float @return_float_round() - "llvm.test_introduce_func"() { name = "return_float", type = !llvm2.float } : () -> () + "llvm.test_introduce_func"() { name = "return_float", type = !llvm.float } : () -> () // CHECK: declare double @return_double() // CHECK: declare double @return_double_round() - "llvm.test_introduce_func"() { name = "return_double", type = !llvm2.double } : () -> () + "llvm.test_introduce_func"() { name = "return_double", type = !llvm.double } : () -> () // CHECK: declare fp128 @return_fp128() // CHECK: declare fp128 @return_fp128_round() - "llvm.test_introduce_func"() { name = "return_fp128", type = !llvm2.fp128 } : () -> () + "llvm.test_introduce_func"() { name = "return_fp128", type = !llvm.fp128 } : () -> () // CHECK: declare x86_fp80 @return_x86_fp80() // CHECK: declare x86_fp80 @return_x86_fp80_round() - "llvm.test_introduce_func"() { name = "return_x86_fp80", type = !llvm2.x86_fp80 } : () -> () + "llvm.test_introduce_func"() { name = "return_x86_fp80", type = !llvm.x86_fp80 } : () -> () // CHECK: declare ppc_fp128 @return_ppc_fp128() // CHECK: declare ppc_fp128 @return_ppc_fp128_round() - "llvm.test_introduce_func"() { name = "return_ppc_fp128", type = !llvm2.ppc_fp128 } : () -> () + "llvm.test_introduce_func"() { name = "return_ppc_fp128", type = !llvm.ppc_fp128 } : () -> () // CHECK: declare x86_mmx @return_x86_mmx() // CHECK: declare x86_mmx @return_x86_mmx_round() - "llvm.test_introduce_func"() { name = "return_x86_mmx", type = !llvm2.x86_mmx } : () -> () + "llvm.test_introduce_func"() { name = "return_x86_mmx", type = !llvm.x86_mmx } : () -> () llvm.return } llvm.func @funcs() { // CHECK: declare void @f_void_i32(i32) // CHECK: declare void @f_void_i32_round(i32) - "llvm.test_introduce_func"() { name ="f_void_i32", type = !llvm2.func } : () -> () + "llvm.test_introduce_func"() { name ="f_void_i32", type = !llvm.func } : () -> () // CHECK: declare i32 @f_i32_empty() // CHECK: declare i32 @f_i32_empty_round() - "llvm.test_introduce_func"() { name ="f_i32_empty", type = !llvm2.func } : () -> () + "llvm.test_introduce_func"() { name ="f_i32_empty", type = !llvm.func } : () -> () // CHECK: declare i32 @f_i32_half_bfloat_float_double(half, bfloat, float, double) // CHECK: declare i32 @f_i32_half_bfloat_float_double_round(half, bfloat, float, double) - "llvm.test_introduce_func"() { name ="f_i32_half_bfloat_float_double", type = !llvm2.func } : () -> () + "llvm.test_introduce_func"() { name ="f_i32_half_bfloat_float_double", type = !llvm.func } : () -> () // CHECK: declare i32 @f_i32_i32_i32(i32, i32) // CHECK: declare i32 @f_i32_i32_i32_round(i32, i32) - "llvm.test_introduce_func"() { name ="f_i32_i32_i32", type = !llvm2.func } : () -> () + "llvm.test_introduce_func"() { name ="f_i32_i32_i32", type = !llvm.func } : () -> () // CHECK: declare void @f_void_variadic(...) // CHECK: declare void @f_void_variadic_round(...) - "llvm.test_introduce_func"() { name ="f_void_variadic", type = !llvm2.func } : () -> () + "llvm.test_introduce_func"() { name ="f_void_variadic", type = !llvm.func } : () -> () // CHECK: declare void @f_void_i32_i32_variadic(i32, i32, ...) // CHECK: declare void @f_void_i32_i32_variadic_round(i32, i32, ...) - "llvm.test_introduce_func"() { name ="f_void_i32_i32_variadic", type = !llvm2.func } : () -> () + "llvm.test_introduce_func"() { name ="f_void_i32_i32_variadic", type = !llvm.func } : () -> () llvm.return } llvm.func @ints() { // CHECK: declare i1 @return_i1() // CHECK: declare i1 @return_i1_round() - "llvm.test_introduce_func"() { name = "return_i1", type = !llvm2.i1 } : () -> () + "llvm.test_introduce_func"() { name = "return_i1", type = !llvm.i1 } : () -> () // CHECK: declare i8 @return_i8() // CHECK: declare i8 @return_i8_round() - "llvm.test_introduce_func"() { name = "return_i8", type = !llvm2.i8 } : () -> () + "llvm.test_introduce_func"() { name = "return_i8", type = !llvm.i8 } : () -> () // CHECK: declare i16 @return_i16() // CHECK: declare i16 @return_i16_round() - "llvm.test_introduce_func"() { name = "return_i16", type = !llvm2.i16 } : () -> () + "llvm.test_introduce_func"() { name = "return_i16", type = !llvm.i16 } : () -> () // CHECK: declare i32 @return_i32() // CHECK: declare i32 @return_i32_round() - "llvm.test_introduce_func"() { name = "return_i32", type = !llvm2.i32 } : () -> () + "llvm.test_introduce_func"() { name = "return_i32", type = !llvm.i32 } : () -> () // CHECK: declare i64 @return_i64() // CHECK: declare i64 @return_i64_round() - "llvm.test_introduce_func"() { name = "return_i64", type = !llvm2.i64 } : () -> () + "llvm.test_introduce_func"() { name = "return_i64", type = !llvm.i64 } : () -> () // CHECK: declare i57 @return_i57() // CHECK: declare i57 @return_i57_round() - "llvm.test_introduce_func"() { name = "return_i57", type = !llvm2.i57 } : () -> () + "llvm.test_introduce_func"() { name = "return_i57", type = !llvm.i57 } : () -> () // CHECK: declare i129 @return_i129() // CHECK: declare i129 @return_i129_round() - "llvm.test_introduce_func"() { name = "return_i129", type = !llvm2.i129 } : () -> () + "llvm.test_introduce_func"() { name = "return_i129", type = !llvm.i129 } : () -> () llvm.return } llvm.func @pointers() { // CHECK: declare i8* @return_pi8() // CHECK: declare i8* @return_pi8_round() - "llvm.test_introduce_func"() { name = "return_pi8", type = !llvm2.ptr } : () -> () + "llvm.test_introduce_func"() { name = "return_pi8", type = !llvm.ptr } : () -> () // CHECK: declare float* @return_pfloat() // CHECK: declare float* @return_pfloat_round() - "llvm.test_introduce_func"() { name = "return_pfloat", type = !llvm2.ptr } : () -> () + "llvm.test_introduce_func"() { name = "return_pfloat", type = !llvm.ptr } : () -> () // CHECK: declare i8** @return_ppi8() // CHECK: declare i8** @return_ppi8_round() - "llvm.test_introduce_func"() { name = "return_ppi8", type = !llvm2.ptr> } : () -> () + "llvm.test_introduce_func"() { name = "return_ppi8", type = !llvm.ptr> } : () -> () // CHECK: declare i8***** @return_pppppi8() // CHECK: declare i8***** @return_pppppi8_round() - "llvm.test_introduce_func"() { name = "return_pppppi8", type = !llvm2.ptr>>>> } : () -> () + "llvm.test_introduce_func"() { name = "return_pppppi8", type = !llvm.ptr>>>> } : () -> () // CHECK: declare i8* @return_pi8_0() // CHECK: declare i8* @return_pi8_0_round() - "llvm.test_introduce_func"() { name = "return_pi8_0", type = !llvm2.ptr } : () -> () + "llvm.test_introduce_func"() { name = "return_pi8_0", type = !llvm.ptr } : () -> () // CHECK: declare i8 addrspace(1)* @return_pi8_1() // CHECK: declare i8 addrspace(1)* @return_pi8_1_round() - "llvm.test_introduce_func"() { name = "return_pi8_1", type = !llvm2.ptr } : () -> () + "llvm.test_introduce_func"() { name = "return_pi8_1", type = !llvm.ptr } : () -> () // CHECK: declare i8 addrspace(42)* @return_pi8_42() // CHECK: declare i8 addrspace(42)* @return_pi8_42_round() - "llvm.test_introduce_func"() { name = "return_pi8_42", type = !llvm2.ptr } : () -> () + "llvm.test_introduce_func"() { name = "return_pi8_42", type = !llvm.ptr } : () -> () // CHECK: declare i8 addrspace(42)* addrspace(9)* @return_ppi8_42_9() // CHECK: declare i8 addrspace(42)* addrspace(9)* @return_ppi8_42_9_round() - "llvm.test_introduce_func"() { name = "return_ppi8_42_9", type = !llvm2.ptr, 9> } : () -> () + "llvm.test_introduce_func"() { name = "return_ppi8_42_9", type = !llvm.ptr, 9> } : () -> () llvm.return } llvm.func @vectors() { // CHECK: declare <4 x i32> @return_v4_i32() // CHECK: declare <4 x i32> @return_v4_i32_round() - "llvm.test_introduce_func"() { name = "return_v4_i32", type = !llvm2.vec<4 x i32> } : () -> () + "llvm.test_introduce_func"() { name = "return_v4_i32", type = !llvm.vec<4 x i32> } : () -> () // CHECK: declare <4 x float> @return_v4_float() // CHECK: declare <4 x float> @return_v4_float_round() - "llvm.test_introduce_func"() { name = "return_v4_float", type = !llvm2.vec<4 x float> } : () -> () + "llvm.test_introduce_func"() { name = "return_v4_float", type = !llvm.vec<4 x float> } : () -> () // CHECK: declare @return_vs_4_i32() // CHECK: declare @return_vs_4_i32_round() - "llvm.test_introduce_func"() { name = "return_vs_4_i32", type = !llvm2.vec } : () -> () + "llvm.test_introduce_func"() { name = "return_vs_4_i32", type = !llvm.vec } : () -> () // CHECK: declare @return_vs_8_half() // CHECK: declare @return_vs_8_half_round() - "llvm.test_introduce_func"() { name = "return_vs_8_half", type = !llvm2.vec } : () -> () + "llvm.test_introduce_func"() { name = "return_vs_8_half", type = !llvm.vec } : () -> () // CHECK: declare <4 x i8*> @return_v_4_pi8() // CHECK: declare <4 x i8*> @return_v_4_pi8_round() - "llvm.test_introduce_func"() { name = "return_v_4_pi8", type = !llvm2.vec<4 x ptr> } : () -> () + "llvm.test_introduce_func"() { name = "return_v_4_pi8", type = !llvm.vec<4 x ptr> } : () -> () llvm.return } llvm.func @arrays() { // CHECK: declare [10 x i32] @return_a10_i32() // CHECK: declare [10 x i32] @return_a10_i32_round() - "llvm.test_introduce_func"() { name = "return_a10_i32", type = !llvm2.array<10 x i32> } : () -> () + "llvm.test_introduce_func"() { name = "return_a10_i32", type = !llvm.array<10 x i32> } : () -> () // CHECK: declare [8 x float] @return_a8_float() // CHECK: declare [8 x float] @return_a8_float_round() - "llvm.test_introduce_func"() { name = "return_a8_float", type = !llvm2.array<8 x float> } : () -> () + "llvm.test_introduce_func"() { name = "return_a8_float", type = !llvm.array<8 x float> } : () -> () // CHECK: declare [10 x i32 addrspace(4)*] @return_a10_pi32_4() // CHECK: declare [10 x i32 addrspace(4)*] @return_a10_pi32_4_round() - "llvm.test_introduce_func"() { name = "return_a10_pi32_4", type = !llvm2.array<10 x ptr> } : () -> () + "llvm.test_introduce_func"() { name = "return_a10_pi32_4", type = !llvm.array<10 x ptr> } : () -> () // CHECK: declare [10 x [4 x float]] @return_a10_a4_float() // CHECK: declare [10 x [4 x float]] @return_a10_a4_float_round() - "llvm.test_introduce_func"() { name = "return_a10_a4_float", type = !llvm2.array<10 x array<4 x float>> } : () -> () + "llvm.test_introduce_func"() { name = "return_a10_a4_float", type = !llvm.array<10 x array<4 x float>> } : () -> () llvm.return } llvm.func @literal_structs() { // CHECK: declare {} @return_struct_empty() // CHECK: declare {} @return_struct_empty_round() - "llvm.test_introduce_func"() { name = "return_struct_empty", type = !llvm2.struct<()> } : () -> () + "llvm.test_introduce_func"() { name = "return_struct_empty", type = !llvm.struct<()> } : () -> () // CHECK: declare { i32 } @return_s_i32() // CHECK: declare { i32 } @return_s_i32_round() - "llvm.test_introduce_func"() { name = "return_s_i32", type = !llvm2.struct<(i32)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_i32", type = !llvm.struct<(i32)> } : () -> () // CHECK: declare { float, i32 } @return_s_float_i32() // CHECK: declare { float, i32 } @return_s_float_i32_round() - "llvm.test_introduce_func"() { name = "return_s_float_i32", type = !llvm2.struct<(float, i32)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_float_i32", type = !llvm.struct<(float, i32)> } : () -> () // CHECK: declare { { i32 } } @return_s_s_i32() // CHECK: declare { { i32 } } @return_s_s_i32_round() - "llvm.test_introduce_func"() { name = "return_s_s_i32", type = !llvm2.struct<(struct<(i32)>)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_s_i32", type = !llvm.struct<(struct<(i32)>)> } : () -> () // CHECK: declare { i32, { i32 }, float } @return_s_i32_s_i32_float() // CHECK: declare { i32, { i32 }, float } @return_s_i32_s_i32_float_round() - "llvm.test_introduce_func"() { name = "return_s_i32_s_i32_float", type = !llvm2.struct<(i32, struct<(i32)>, float)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_i32_s_i32_float", type = !llvm.struct<(i32, struct<(i32)>, float)> } : () -> () // CHECK: declare <{}> @return_sp_empty() // CHECK: declare <{}> @return_sp_empty_round() - "llvm.test_introduce_func"() { name = "return_sp_empty", type = !llvm2.struct } : () -> () + "llvm.test_introduce_func"() { name = "return_sp_empty", type = !llvm.struct } : () -> () // CHECK: declare <{ i32 }> @return_sp_i32() // CHECK: declare <{ i32 }> @return_sp_i32_round() - "llvm.test_introduce_func"() { name = "return_sp_i32", type = !llvm2.struct } : () -> () + "llvm.test_introduce_func"() { name = "return_sp_i32", type = !llvm.struct } : () -> () // CHECK: declare <{ float, i32 }> @return_sp_float_i32() // CHECK: declare <{ float, i32 }> @return_sp_float_i32_round() - "llvm.test_introduce_func"() { name = "return_sp_float_i32", type = !llvm2.struct } : () -> () + "llvm.test_introduce_func"() { name = "return_sp_float_i32", type = !llvm.struct } : () -> () // CHECK: declare <{ i32, { i32, i1 }, float }> @return_sp_i32_s_i31_1_float() // CHECK: declare <{ i32, { i32, i1 }, float }> @return_sp_i32_s_i31_1_float_round() - "llvm.test_introduce_func"() { name = "return_sp_i32_s_i31_1_float", type = !llvm2.struct, float)> } : () -> () + "llvm.test_introduce_func"() { name = "return_sp_i32_s_i31_1_float", type = !llvm.struct, float)> } : () -> () // CHECK: declare { <{ i32 }> } @return_s_sp_i32() // CHECK: declare { <{ i32 }> } @return_s_sp_i32_round() - "llvm.test_introduce_func"() { name = "return_s_sp_i32", type = !llvm2.struct<(struct)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_sp_i32", type = !llvm.struct<(struct)> } : () -> () // CHECK: declare <{ { i32 } }> @return_sp_s_i32() // CHECK: declare <{ { i32 } }> @return_sp_s_i32_round() - "llvm.test_introduce_func"() { name = "return_sp_s_i32", type = !llvm2.struct)> } : () -> () + "llvm.test_introduce_func"() { name = "return_sp_s_i32", type = !llvm.struct)> } : () -> () llvm.return } @@ -199,30 +199,30 @@ llvm.func @literal_structs() { llvm.func @identified_structs() { // CHECK: declare %empty - "llvm.test_introduce_func"() { name = "return_s_empty", type = !llvm2.struct<"empty", ()> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_empty", type = !llvm.struct<"empty", ()> } : () -> () // CHECK: declare %opaque - "llvm.test_introduce_func"() { name = "return_s_opaque", type = !llvm2.struct<"opaque", opaque> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_opaque", type = !llvm.struct<"opaque", opaque> } : () -> () // CHECK: declare %long - "llvm.test_introduce_func"() { name = "return_s_long", type = !llvm2.struct<"long", (i32, struct<(i32, i1)>, float, ptr>)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_long", type = !llvm.struct<"long", (i32, struct<(i32, i1)>, float, ptr>)> } : () -> () // CHECK: declare %self-recursive - "llvm.test_introduce_func"() { name = "return_s_self_recurisve", type = !llvm2.struct<"self-recursive", (ptr>)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_self_recurisve", type = !llvm.struct<"self-recursive", (ptr>)> } : () -> () // CHECK: declare %unpacked - "llvm.test_introduce_func"() { name = "return_s_unpacked", type = !llvm2.struct<"unpacked", (i32)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_unpacked", type = !llvm.struct<"unpacked", (i32)> } : () -> () // CHECK: declare %packed - "llvm.test_introduce_func"() { name = "return_s_packed", type = !llvm2.struct<"packed", packed (i32)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_packed", type = !llvm.struct<"packed", packed (i32)> } : () -> () // CHECK: declare %"name with spaces and !^$@$#" - "llvm.test_introduce_func"() { name = "return_s_symbols", type = !llvm2.struct<"name with spaces and !^$@$#", packed (i32)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_symbols", type = !llvm.struct<"name with spaces and !^$@$#", packed (i32)> } : () -> () // CHECK: declare %mutually-a - "llvm.test_introduce_func"() { name = "return_s_mutually_a", type = !llvm2.struct<"mutually-a", (ptr, 3>)>>)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_mutually_a", type = !llvm.struct<"mutually-a", (ptr, 3>)>>)> } : () -> () // CHECK: declare %mutually-b - "llvm.test_introduce_func"() { name = "return_s_mutually_b", type = !llvm2.struct<"mutually-b", (ptr>)>, 3>)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_mutually_b", type = !llvm.struct<"mutually-b", (ptr>)>, 3>)> } : () -> () // CHECK: declare %struct-of-arrays - "llvm.test_introduce_func"() { name = "return_s_struct_of_arrays", type = !llvm2.struct<"struct-of-arrays", (array<10 x i32>)> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_struct_of_arrays", type = !llvm.struct<"struct-of-arrays", (array<10 x i32>)> } : () -> () // CHECK: declare [10 x %array-of-structs] - "llvm.test_introduce_func"() { name = "return_s_array_of_structs", type = !llvm2.array<10 x struct<"array-of-structs", (i32)>> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_array_of_structs", type = !llvm.array<10 x struct<"array-of-structs", (i32)>> } : () -> () // CHECK: declare %ptr-to-struct* - "llvm.test_introduce_func"() { name = "return_s_ptr_to_struct", type = !llvm2.ptr> } : () -> () + "llvm.test_introduce_func"() { name = "return_s_ptr_to_struct", type = !llvm.ptr> } : () -> () llvm.return } diff --git a/mlir/test/lib/Dialect/CMakeLists.txt b/mlir/test/lib/Dialect/CMakeLists.txt index 36a18f79a8cbf..9008b86314be0 100644 --- a/mlir/test/lib/Dialect/CMakeLists.txt +++ b/mlir/test/lib/Dialect/CMakeLists.txt @@ -1,4 +1,3 @@ add_subdirectory(Affine) -add_subdirectory(LLVMIR) add_subdirectory(SPIRV) add_subdirectory(Test) diff --git a/mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt deleted file mode 100644 index 2a42bc6974850..0000000000000 --- a/mlir/test/lib/Dialect/LLVMIR/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ - -add_mlir_library(MLIRLLVMTypeTestDialect - LLVMTypeTestDialect.cpp - - EXCLUDE_FROM_LIBMLIR - - LINK_COMPONENTS - Core - - LINK_LIBS PUBLIC - MLIRDialect - MLIRIR - MLIRLLVMIR - ) diff --git a/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp b/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp deleted file mode 100644 index 873ed16169093..0000000000000 --- a/mlir/test/lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef DIALECT_LLVMIR_LLVMTYPETESTDIALECT_H_ -#define DIALECT_LLVMIR_LLVMTYPETESTDIALECT_H_ - -#include "mlir/Dialect/LLVMIR/LLVMTypes.h" -#include "mlir/IR/Dialect.h" - -namespace mlir { -namespace LLVM { -namespace { -class LLVMDialectNewTypes : public Dialect { -public: - LLVMDialectNewTypes(MLIRContext *ctx) : Dialect(getDialectNamespace(), ctx) { - // clang-format off - // addTypes(); - // clang-format on - } - static StringRef getDialectNamespace() { return "llvm2"; } - - Type parseType(DialectAsmParser &parser) const override { - return detail::parseType(parser); - } - void printType(Type type, DialectAsmPrinter &printer) const override { - detail::printType(type.cast(), printer); - } -}; -} // namespace -} // namespace LLVM - -void registerLLVMTypeTestDialect() { - mlir::registerDialect(); -} -} // namespace mlir - -#endif // DIALECT_LLVMIR_LLVMTYPETESTDIALECT_H_ diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt index f52c5f41b22b6..483dcfec0c0ff 100644 --- a/mlir/tools/mlir-opt/CMakeLists.txt +++ b/mlir/tools/mlir-opt/CMakeLists.txt @@ -13,7 +13,6 @@ set(LLVM_LINK_COMPONENTS if(MLIR_INCLUDE_TESTS) set(test_libs MLIRAffineTransformsTestPasses - MLIRLLVMTypeTestDialect MLIRSPIRVTestPasses MLIRTestDialect MLIRTestIR diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 05fba34092cba..3be470d4e3de5 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -31,7 +31,6 @@ namespace mlir { // Defined in the test directory, no public header. void registerConvertToTargetEnvPass(); void registerInliner(); -void registerLLVMTypeTestDialect(); void registerMemRefBoundCheck(); void registerPassManagerTestPass(); void registerPatternsTestPass(); @@ -105,7 +104,6 @@ static cl::opt allowUnregisteredDialects( void registerTestPasses() { registerConvertToTargetEnvPass(); registerInliner(); - registerLLVMTypeTestDialect(); registerMemRefBoundCheck(); registerPassManagerTestPass(); registerPatternsTestPass(); diff --git a/mlir/tools/mlir-translate/CMakeLists.txt b/mlir/tools/mlir-translate/CMakeLists.txt index 1e6cdfe0f3b14..99b98f9288b92 100644 --- a/mlir/tools/mlir-translate/CMakeLists.txt +++ b/mlir/tools/mlir-translate/CMakeLists.txt @@ -15,9 +15,6 @@ target_link_libraries(mlir-translate ${translation_libs} ${test_libs} MLIRIR - # TODO: remove after LLVM dialect transition is complete; translation uses a - # registration function defined in this library unconditionally. - MLIRLLVMTypeTestDialect MLIRParser MLIRPass MLIRSPIRV diff --git a/mlir/tools/mlir-translate/mlir-translate.cpp b/mlir/tools/mlir-translate/mlir-translate.cpp index 70bf285112a4b..1f2ddca8c8565 100644 --- a/mlir/tools/mlir-translate/mlir-translate.cpp +++ b/mlir/tools/mlir-translate/mlir-translate.cpp @@ -49,7 +49,6 @@ static llvm::cl::opt verifyDiagnostics( namespace mlir { // Defined in the test directory, no public header. -void registerLLVMTypeTestDialect(); void registerTestLLVMTypeTranslation(); void registerTestRoundtripSPIRV(); void registerTestRoundtripDebugSPIRV(); @@ -63,7 +62,6 @@ static void registerTestTranslations() { int main(int argc, char **argv) { registerAllDialects(); - registerLLVMTypeTestDialect(); registerAllTranslations(); registerTestTranslations(); llvm::InitLLVM y(argc, argv); From 00b89f66f988e9ec6f366ed46a51ace39fac07c8 Mon Sep 17 00:00:00 2001 From: Bruno Ricci Date: Wed, 5 Aug 2020 11:39:36 +0100 Subject: [PATCH 453/600] [clang][NFC] Remove spurious +x flag on DeclTemplate.cpp and DeclTemplate.h --- clang/include/clang/AST/DeclTemplate.h | 0 clang/lib/AST/DeclTemplate.cpp | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 clang/include/clang/AST/DeclTemplate.h mode change 100755 => 100644 clang/lib/AST/DeclTemplate.cpp diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h old mode 100755 new mode 100644 diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp old mode 100755 new mode 100644 From 98b4b4570542a255e9a81e4a349183402a2d478d Mon Sep 17 00:00:00 2001 From: Bruno Ricci Date: Wed, 5 Aug 2020 11:41:51 +0100 Subject: [PATCH 454/600] [clang][NFC] Add a test showcasing an unnamed template parameter in a diagnostic --- .../test/SemaCXX/cxx1z-class-template-argument-deduction.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp b/clang/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp index 2a3f312ebd8eb..e992c7c916f37 100644 --- a/clang/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp +++ b/clang/test/SemaCXX/cxx1z-class-template-argument-deduction.cpp @@ -172,6 +172,10 @@ namespace nondeducible { template X(float) -> X; // ok + + template struct UnnamedTemplateParam {}; + template // expected-note {{non-deducible template parameter (anonymous)}} + UnnamedTemplateParam() -> UnnamedTemplateParam; // expected-error {{deduction guide template contains a template parameter that cannot be deduced}} } namespace default_args_from_ctor { From bc29634b93acf2e55c82dd906f0d9af196c66ff3 Mon Sep 17 00:00:00 2001 From: Bruno Ricci Date: Wed, 5 Aug 2020 11:42:48 +0100 Subject: [PATCH 455/600] [clang][NFC] Remove an old workaround for MSVC 2013 --- clang/include/clang/AST/DeclTemplate.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index e9c4879b41e89..4feb1d45251d5 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -204,10 +204,6 @@ class TemplateParameterList final bool OmitTemplateKW = false) const; void print(raw_ostream &Out, const ASTContext &Context, const PrintingPolicy &Policy, bool OmitTemplateKW = false) const; - -public: - // FIXME: workaround for MSVC 2013; remove when no longer needed - using FixedSizeStorageOwner = TrailingObjects::FixedSizeStorageOwner; }; /// Stores a list of template parameters and the associated From 6f2fa9d312fcea2448706a8e410c7bc1b6436ea7 Mon Sep 17 00:00:00 2001 From: Bruno Ricci Date: Wed, 5 Aug 2020 11:43:39 +0100 Subject: [PATCH 456/600] [clang][NFC] Document NamedDecl::printName --- clang/include/clang/AST/Decl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 4dd5e14d36e18..c2511514fe726 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -265,6 +265,8 @@ class NamedDecl : public Decl { // FIXME: Deprecated, move clients to getName(). std::string getNameAsString() const { return Name.getAsString(); } + /// Pretty-print the unqualified name of this declaration. Can be overloaded + /// by derived classes to provide a more user-friendly name when appropriate. virtual void printName(raw_ostream &os) const; /// Get the actual, stored name of the declaration, which may be a special From 19701458d4691ee7ec59e5aa7217a479b0fb10e7 Mon Sep 17 00:00:00 2001 From: Bruno Ricci Date: Wed, 5 Aug 2020 11:48:09 +0100 Subject: [PATCH 457/600] [clang][nearly-NFC] Remove some superfluous uses of NamedDecl::getNameAsString `OS << ND->getDeclName();` is equivalent to `OS << ND->getNameAsString();` without the extra temporary string. This is not quite a NFC since two uses of `getNameAsString` in a diagnostic are replaced, which results in the named entity being quoted with additional "'"s (ie: 'var' instead of var). --- .../find-all-symbols/FindAllSymbols.cpp | 2 +- clang-tools-extra/clang-move/HelperDeclRefGraph.cpp | 4 ++-- clang-tools-extra/clang-move/Move.cpp | 12 ++++++------ clang/lib/AST/ASTDiagnostic.cpp | 6 +++--- clang/lib/AST/Interp/Disasm.cpp | 4 ++-- clang/lib/AST/TextNodeDumper.cpp | 13 ++++++------- clang/lib/Frontend/FrontendAction.cpp | 2 +- clang/lib/Index/FileIndexRecord.cpp | 2 +- clang/lib/Sema/AnalysisBasedWarnings.cpp | 4 ++-- clang/lib/Sema/SemaChecking.cpp | 2 +- .../StaticAnalyzer/Checkers/CastValueChecker.cpp | 4 ++-- .../Checkers/FuchsiaHandleChecker.cpp | 2 +- clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp | 10 ++++++---- clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp | 2 +- .../RetainCountChecker/RetainCountDiagnostics.cpp | 2 +- .../StaticAnalyzer/Checkers/VirtualCallChecker.cpp | 4 ++-- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 2 +- clang/lib/Tooling/Refactoring/ASTSelection.cpp | 2 +- clang/test/Index/error-on-deserialized.c | 2 +- clang/test/SemaCXX/warn-msvc-enum-bitfield.cpp | 4 ++-- 20 files changed, 43 insertions(+), 42 deletions(-) diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbols.cpp b/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbols.cpp index 7d540d83037b6..70d4d7cfdff34 100644 --- a/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbols.cpp +++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbols.cpp @@ -99,7 +99,7 @@ CreateSymbolInfo(const NamedDecl *ND, const SourceManager &SM, SourceLocation Loc = SM.getExpansionLoc(ND->getLocation()); if (!Loc.isValid()) { - llvm::errs() << "Declaration " << ND->getNameAsString() << "(" + llvm::errs() << "Declaration " << ND->getDeclName() << "(" << ND->getDeclKindName() << ") has invalid declaration location."; return llvm::None; diff --git a/clang-tools-extra/clang-move/HelperDeclRefGraph.cpp b/clang-tools-extra/clang-move/HelperDeclRefGraph.cpp index 271bd3d6ef200..a9b773353fe69 100644 --- a/clang-tools-extra/clang-move/HelperDeclRefGraph.cpp +++ b/clang-tools-extra/clang-move/HelperDeclRefGraph.cpp @@ -116,7 +116,7 @@ void HelperDeclRGBuilder::run( const auto *DC = Result.Nodes.getNodeAs("dc"); assert(DC); LLVM_DEBUG(llvm::dbgs() << "Find helper function usage: " - << FuncRef->getDecl()->getNameAsString() << " (" + << FuncRef->getDecl()->getDeclName() << " (" << FuncRef->getDecl() << ")\n"); RG->addEdge( getOutmostClassOrFunDecl(DC->getCanonicalDecl()), @@ -126,7 +126,7 @@ void HelperDeclRGBuilder::run( const auto *DC = Result.Nodes.getNodeAs("dc"); assert(DC); LLVM_DEBUG(llvm::dbgs() - << "Find helper class usage: " << UsedClass->getNameAsString() + << "Find helper class usage: " << UsedClass->getDeclName() << " (" << UsedClass << ")\n"); RG->addEdge(getOutmostClassOrFunDecl(DC->getCanonicalDecl()), UsedClass); } diff --git a/clang-tools-extra/clang-move/Move.cpp b/clang-tools-extra/clang-move/Move.cpp index 3f09f68a8046f..24f819ca4ca29 100644 --- a/clang-tools-extra/clang-move/Move.cpp +++ b/clang-tools-extra/clang-move/Move.cpp @@ -675,8 +675,8 @@ void ClangMoveTool::run(const ast_matchers::MatchFinder::MatchResult &Result) { Result.Nodes.getNodeAs("helper_decls")) { MovedDecls.push_back(ND); HelperDeclarations.push_back(ND); - LLVM_DEBUG(llvm::dbgs() << "Add helper : " << ND->getNameAsString() << " (" - << ND << ")\n"); + LLVM_DEBUG(llvm::dbgs() + << "Add helper : " << ND->getDeclName() << " (" << ND << ")\n"); } else if (const auto *UD = Result.Nodes.getNodeAs("using_decl")) { MovedDecls.push_back(UD); } @@ -735,12 +735,12 @@ void ClangMoveTool::removeDeclsInOldFiles() { // We remove the helper declarations which are not used in the old.cc after // moving the given declarations. for (const auto *D : HelperDeclarations) { - LLVM_DEBUG(llvm::dbgs() << "Check helper is used: " - << D->getNameAsString() << " (" << D << ")\n"); + LLVM_DEBUG(llvm::dbgs() << "Check helper is used: " << D->getDeclName() + << " (" << D << ")\n"); if (!UsedDecls.count(HelperDeclRGBuilder::getOutmostClassOrFunDecl( D->getCanonicalDecl()))) { LLVM_DEBUG(llvm::dbgs() << "Helper removed in old.cc: " - << D->getNameAsString() << " (" << D << ")\n"); + << D->getDeclName() << " (" << D << ")\n"); RemovedDecls.push_back(D); } } @@ -820,7 +820,7 @@ void ClangMoveTool::moveDeclsToNewFiles() { D->getCanonicalDecl()))) continue; - LLVM_DEBUG(llvm::dbgs() << "Helper used in new.cc: " << D->getNameAsString() + LLVM_DEBUG(llvm::dbgs() << "Helper used in new.cc: " << D->getDeclName() << " " << D << "\n"); ActualNewCCDecls.push_back(D); } diff --git a/clang/lib/AST/ASTDiagnostic.cpp b/clang/lib/AST/ASTDiagnostic.cpp index 05adf226bae37..99ce46e83123e 100644 --- a/clang/lib/AST/ASTDiagnostic.cpp +++ b/clang/lib/AST/ASTDiagnostic.cpp @@ -1560,11 +1560,11 @@ class TemplateDiff { if (!Tree.HasChildren()) { // If we're dealing with a template specialization with zero // arguments, there are no children; special-case this. - OS << FromTD->getNameAsString() << "<>"; + OS << FromTD->getDeclName() << "<>"; return; } - OS << FromTD->getNameAsString() << '<'; + OS << FromTD->getDeclName() << '<'; Tree.MoveToChild(); unsigned NumElideArgs = 0; bool AllArgsElided = true; @@ -1724,7 +1724,7 @@ class TemplateDiff { } if (Same) { - OS << "template " << FromTD->getNameAsString(); + OS << "template " << FromTD->getDeclName(); } else if (!PrintTree) { OS << (FromDefault ? "(default) template " : "template "); Bold(); diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp index 293fdd4b3256e..c1c18f832d4fa 100644 --- a/clang/lib/AST/Interp/Disasm.cpp +++ b/clang/lib/AST/Interp/Disasm.cpp @@ -26,10 +26,10 @@ LLVM_DUMP_METHOD void Function::dump() const { dump(llvm::errs()); } LLVM_DUMP_METHOD void Function::dump(llvm::raw_ostream &OS) const { if (F) { if (auto *Cons = dyn_cast(F)) { - const std::string &Name = Cons->getParent()->getNameAsString(); + DeclarationName Name = Cons->getParent()->getDeclName(); OS << Name << "::" << Name << ":\n"; } else { - OS << F->getNameAsString() << ":\n"; + OS << F->getDeclName() << ":\n"; } } else { OS << "<>\n"; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 3d47d5cb66d2e..4aae63982542e 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -661,7 +661,7 @@ void TextNodeDumper::dumpBareDeclRef(const Decl *D) { void TextNodeDumper::dumpName(const NamedDecl *ND) { if (ND->getDeclName()) { ColorScope Color(OS, ShowColors, DeclNameColor); - OS << ' ' << ND->getNameAsString(); + OS << ' ' << ND->getDeclName(); } } @@ -1600,9 +1600,8 @@ void TextNodeDumper::VisitFunctionDecl(const FunctionDecl *D) { if (MD->size_overridden_methods() != 0) { auto dumpOverride = [=](const CXXMethodDecl *D) { SplitQualType T_split = D->getType().split(); - OS << D << " " << D->getParent()->getName() - << "::" << D->getNameAsString() << " '" - << QualType::getAsString(T_split, PrintPolicy) << "'"; + OS << D << " " << D->getParent()->getName() << "::" << D->getDeclName() + << " '" << QualType::getAsString(T_split, PrintPolicy) << "'"; }; AddChild([=] { @@ -2032,7 +2031,7 @@ void TextNodeDumper::VisitUsingDecl(const UsingDecl *D) { OS << ' '; if (D->getQualifier()) D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy()); - OS << D->getNameAsString(); + OS << D->getDeclName(); } void TextNodeDumper::VisitUnresolvedUsingTypenameDecl( @@ -2040,7 +2039,7 @@ void TextNodeDumper::VisitUnresolvedUsingTypenameDecl( OS << ' '; if (D->getQualifier()) D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy()); - OS << D->getNameAsString(); + OS << D->getDeclName(); } void TextNodeDumper::VisitUnresolvedUsingValueDecl( @@ -2048,7 +2047,7 @@ void TextNodeDumper::VisitUnresolvedUsingValueDecl( OS << ' '; if (D->getQualifier()) D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy()); - OS << D->getNameAsString(); + OS << D->getDeclName(); dumpType(D->getType()); } diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 59a968b5c7099..92654dbe8a10a 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -121,7 +121,7 @@ class DeserializedDeclsChecker : public DelegatingDeserializationListener { = Ctx.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, "%0 was deserialized"); Ctx.getDiagnostics().Report(Ctx.getFullLoc(D->getLocation()), DiagID) - << ND->getNameAsString(); + << ND; } DelegatingDeserializationListener::DeclRead(ID, D); diff --git a/clang/lib/Index/FileIndexRecord.cpp b/clang/lib/Index/FileIndexRecord.cpp index 753bdf2ce21d7..df18a9aed8b79 100644 --- a/clang/lib/Index/FileIndexRecord.cpp +++ b/clang/lib/Index/FileIndexRecord.cpp @@ -52,7 +52,7 @@ void FileIndexRecord::print(llvm::raw_ostream &OS) const { << ':' << PLoc.getColumn(); if (auto ND = dyn_cast(D)) { - OS << ' ' << ND->getNameAsString(); + OS << ' ' << ND->getDeclName(); } OS << '\n'; diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 3b73568938337..5cc215e08ea83 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -1849,8 +1849,8 @@ class ThreadSafetyReporter : public clang::threadSafety::ThreadSafetyHandler { << *PossibleMatch); if (Verbose && POK == POK_VarAccess) { PartialDiagnosticAt VNote(D->getLocation(), - S.PDiag(diag::note_guarded_by_declared_here) - << D->getNameAsString()); + S.PDiag(diag::note_guarded_by_declared_here) + << D->getDeclName()); Warnings.emplace_back(std::move(Warning), getNotes(Note, VNote)); } else Warnings.emplace_back(std::move(Warning), getNotes(Note)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 7e73c51c7150a..5c2092f1447ad 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -11027,7 +11027,7 @@ static bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init, BitfieldEnumDecl->getNumPositiveBits() > 0 && BitfieldEnumDecl->getNumNegativeBits() == 0) { S.Diag(InitLoc, diag::warn_no_underlying_type_specified_for_enum_bitfield) - << BitfieldEnumDecl->getNameAsString(); + << BitfieldEnumDecl; } } diff --git a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp index 1ef70b650414e..56c6f8d02e0f6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp @@ -119,10 +119,10 @@ static const NoteTag *getNoteTag(CheckerContext &C, Out << "Assuming "; if (const auto *DRE = dyn_cast(Object)) { - Out << '\'' << DRE->getDecl()->getNameAsString() << '\''; + Out << '\'' << DRE->getDecl()->getDeclName() << '\''; } else if (const auto *ME = dyn_cast(Object)) { Out << (IsKnownCast ? "Field '" : "field '") - << ME->getMemberDecl()->getNameAsString() << '\''; + << ME->getMemberDecl()->getDeclName() << '\''; } else { Out << (IsKnownCast ? "The object" : "the object"); } diff --git a/clang/lib/StaticAnalyzer/Checkers/FuchsiaHandleChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/FuchsiaHandleChecker.cpp index fc35082705fa6..b2822e5307f3a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/FuchsiaHandleChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/FuchsiaHandleChecker.cpp @@ -324,7 +324,7 @@ void FuchsiaHandleChecker::checkPostCall(const CallEvent &Call, if (auto IsInteresting = PathBR->getInterestingnessKind(RetSym)) { std::string SBuf; llvm::raw_string_ostream OS(SBuf); - OS << "Function '" << FuncDecl->getNameAsString() + OS << "Function '" << FuncDecl->getDeclName() << "' returns an open handle"; return OS.str(); } else diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index d5b0a5b2220ff..fc6d15371a2f3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -3301,14 +3301,16 @@ PathDiagnosticPieceRef MallocBugVisitor::VisitNode(const ExplodedNode *N, OS << "reallocated by call to '"; const Stmt *S = RSCurr->getStmt(); if (const auto *MemCallE = dyn_cast(S)) { - OS << MemCallE->getMethodDecl()->getNameAsString(); + OS << MemCallE->getMethodDecl()->getDeclName(); } else if (const auto *OpCallE = dyn_cast(S)) { - OS << OpCallE->getDirectCallee()->getNameAsString(); + OS << OpCallE->getDirectCallee()->getDeclName(); } else if (const auto *CallE = dyn_cast(S)) { auto &CEMgr = BRC.getStateManager().getCallEventManager(); CallEventRef<> Call = CEMgr.getSimpleCall(CallE, state, CurrentLC); - const auto *D = dyn_cast_or_null(Call->getDecl()); - OS << (D ? D->getNameAsString() : "unknown"); + if (const auto *D = dyn_cast_or_null(Call->getDecl())) + OS << D->getDeclName(); + else + OS << "unknown"; } OS << "'"; StackHint = std::make_unique( diff --git a/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp index 7f0519c695b0f..da3ce01d032be 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MoveChecker.cpp @@ -580,7 +580,7 @@ void MoveChecker::explainObject(llvm::raw_ostream &OS, const MemRegion *MR, if (const auto DR = dyn_cast_or_null(unwrapRValueReferenceIndirection(MR))) { const auto *RegionDecl = cast(DR->getDecl()); - OS << " '" << RegionDecl->getNameAsString() << "'"; + OS << " '" << RegionDecl->getDeclName() << "'"; } ObjectKind OK = classifyObject(MR, RD); diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp index 854646a8779d7..1d903530201f8 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp @@ -439,7 +439,7 @@ annotateStartParameter(const ExplodedNode *N, SymbolRef Sym, std::string s; llvm::raw_string_ostream os(s); - os << "Parameter '" << PVD->getNameAsString() << "' starts at +"; + os << "Parameter '" << PVD->getDeclName() << "' starts at +"; if (CurrT->getCount() == 1) { os << "1, as it is marked as consuming"; } else { diff --git a/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp index f49ee5fa5ad37..1c589e3468c2d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp @@ -125,8 +125,8 @@ void VirtualCallChecker::checkPreCall(const CallEvent &Call, OS << "Call to "; if (IsPure) OS << "pure "; - OS << "virtual method '" << MD->getParent()->getNameAsString() - << "::" << MD->getNameAsString() << "' during "; + OS << "virtual method '" << MD->getParent()->getDeclName() + << "::" << MD->getDeclName() << "' during "; if (*ObState == ObjectState::CtorCalled) OS << "construction "; else diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 265dcd134213d..7888029399f1d 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -169,7 +169,7 @@ class ConstructedObjectKey { if (S) { S->printJson(Out, Helper, PP, /*AddQuotes=*/true); } else { - Out << '\"' << I->getAnyMember()->getNameAsString() << '\"'; + Out << '\"' << I->getAnyMember()->getDeclName() << '\"'; } } diff --git a/clang/lib/Tooling/Refactoring/ASTSelection.cpp b/clang/lib/Tooling/Refactoring/ASTSelection.cpp index af1eb491a20a2..9485c8bc04ad0 100644 --- a/clang/lib/Tooling/Refactoring/ASTSelection.cpp +++ b/clang/lib/Tooling/Refactoring/ASTSelection.cpp @@ -218,7 +218,7 @@ static void dump(const SelectedASTNode &Node, llvm::raw_ostream &OS, if (const Decl *D = Node.Node.get()) { OS << D->getDeclKindName() << "Decl"; if (const auto *ND = dyn_cast(D)) - OS << " \"" << ND->getNameAsString() << '"'; + OS << " \"" << ND->getDeclName() << '"'; } else if (const Stmt *S = Node.Node.get()) { OS << S->getStmtClassName(); } diff --git a/clang/test/Index/error-on-deserialized.c b/clang/test/Index/error-on-deserialized.c index bf0d59a058804..78b77e5cddaa4 100644 --- a/clang/test/Index/error-on-deserialized.c +++ b/clang/test/Index/error-on-deserialized.c @@ -10,4 +10,4 @@ // RUN: -Xclang -error-on-deserialized-decl=NestedVar1 2>&1 \ // RUN: | FileCheck %s -// CHECK: error: NestedVar1 was deserialized +// CHECK: error: 'NestedVar1' was deserialized diff --git a/clang/test/SemaCXX/warn-msvc-enum-bitfield.cpp b/clang/test/SemaCXX/warn-msvc-enum-bitfield.cpp index 99e1669018ab1..496240809694c 100644 --- a/clang/test/SemaCXX/warn-msvc-enum-bitfield.cpp +++ b/clang/test/SemaCXX/warn-msvc-enum-bitfield.cpp @@ -6,8 +6,8 @@ void test0() { enum F { F1, F2 }; struct { E e1 : 1; E e2; F f1 : 1; F f2; } s; - s.e1 = E1; // expected-warning {{enums in the Microsoft ABI are signed integers by default; consider giving the enum E an unsigned underlying type to make this code portable}} - s.f1 = F1; // expected-warning {{enums in the Microsoft ABI are signed integers by default; consider giving the enum F an unsigned underlying type to make this code portable}} + s.e1 = E1; // expected-warning {{enums in the Microsoft ABI are signed integers by default; consider giving the enum 'E' an unsigned underlying type to make this code portable}} + s.f1 = F1; // expected-warning {{enums in the Microsoft ABI are signed integers by default; consider giving the enum 'F' an unsigned underlying type to make this code portable}} s.e2 = E2; s.f2 = F2; From 94b43118e2203fed8ca0377ae762c08189aa6f3d Mon Sep 17 00:00:00 2001 From: Bruno Ricci Date: Wed, 5 Aug 2020 11:53:24 +0100 Subject: [PATCH 458/600] [clang][NFCI] Get rid of ConstantMatrixTypeBitfields to avoid increasing the size of every type. sizeof(ConstantMatrixTypeBitfields) > 8 which increases the size of every type. This was not detected because no corresponding static_assert for its size was added. To prevent this from occuring again replace the various static_asserts for the size of each of the bit-field classes by a single static_assert for the size of Type. I have left ConstantMatrixType::MaxElementsPerDimension unchanged since the limit is exercised by multiple tests. --- clang/include/clang/AST/Type.h | 74 +++++++++------------------------- clang/lib/AST/Type.cpp | 6 +-- 2 files changed, 20 insertions(+), 60 deletions(-) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 7d943ebc78c01..df9c926ce9023 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -1680,19 +1680,6 @@ class alignas(8) Type : public ExtQualsTypeCommonBase { uint32_t NumElements; }; - class ConstantMatrixTypeBitfields { - friend class ConstantMatrixType; - - unsigned : NumTypeBits; - - /// Number of rows and columns. Using 20 bits allows supporting very large - /// matrixes, while keeping 24 bits to accommodate NumTypeBits. - unsigned NumRows : 20; - unsigned NumColumns : 20; - - static constexpr uint32_t MaxElementsPerDimension = (1 << 20) - 1; - }; - class AttributedTypeBitfields { friend class AttributedType; @@ -1802,46 +1789,11 @@ class alignas(8) Type : public ExtQualsTypeCommonBase { TypeWithKeywordBitfields TypeWithKeywordBits; ElaboratedTypeBitfields ElaboratedTypeBits; VectorTypeBitfields VectorTypeBits; - ConstantMatrixTypeBitfields ConstantMatrixTypeBits; SubstTemplateTypeParmPackTypeBitfields SubstTemplateTypeParmPackTypeBits; TemplateSpecializationTypeBitfields TemplateSpecializationTypeBits; DependentTemplateSpecializationTypeBitfields DependentTemplateSpecializationTypeBits; PackExpansionTypeBitfields PackExpansionTypeBits; - - static_assert(sizeof(TypeBitfields) <= 8, - "TypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(ArrayTypeBitfields) <= 8, - "ArrayTypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(AttributedTypeBitfields) <= 8, - "AttributedTypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(AutoTypeBitfields) <= 8, - "AutoTypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(BuiltinTypeBitfields) <= 8, - "BuiltinTypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(FunctionTypeBitfields) <= 8, - "FunctionTypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(ObjCObjectTypeBitfields) <= 8, - "ObjCObjectTypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(ReferenceTypeBitfields) <= 8, - "ReferenceTypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(TypeWithKeywordBitfields) <= 8, - "TypeWithKeywordBitfields is larger than 8 bytes!"); - static_assert(sizeof(ElaboratedTypeBitfields) <= 8, - "ElaboratedTypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(VectorTypeBitfields) <= 8, - "VectorTypeBitfields is larger than 8 bytes!"); - static_assert(sizeof(SubstTemplateTypeParmPackTypeBitfields) <= 8, - "SubstTemplateTypeParmPackTypeBitfields is larger" - " than 8 bytes!"); - static_assert(sizeof(TemplateSpecializationTypeBitfields) <= 8, - "TemplateSpecializationTypeBitfields is larger" - " than 8 bytes!"); - static_assert(sizeof(DependentTemplateSpecializationTypeBitfields) <= 8, - "DependentTemplateSpecializationTypeBitfields is larger" - " than 8 bytes!"); - static_assert(sizeof(PackExpansionTypeBitfields) <= 8, - "PackExpansionTypeBitfields is larger than 8 bytes"); }; private: @@ -1858,6 +1810,10 @@ class alignas(8) Type : public ExtQualsTypeCommonBase { Type(TypeClass tc, QualType canon, TypeDependence Dependence) : ExtQualsTypeCommonBase(this, canon.isNull() ? QualType(this_(), 0) : canon) { + static_assert(sizeof(*this) <= 8 + sizeof(ExtQualsTypeCommonBase), + "changing bitfields changed sizeof(Type)!"); + static_assert(alignof(decltype(*this)) % sizeof(void *) == 0, + "Insufficient alignment!"); TypeBits.TC = tc; TypeBits.Dependence = static_cast(Dependence); TypeBits.CacheValid = false; @@ -3469,8 +3425,15 @@ class ConstantMatrixType final : public MatrixType { friend class ASTContext; /// The element type of the matrix. + // FIXME: Appears to be unused? There is also MatrixType::ElementType... QualType ElementType; + /// Number of rows and columns. + unsigned NumRows; + unsigned NumColumns; + + static constexpr unsigned MaxElementsPerDimension = (1 << 20) - 1; + ConstantMatrixType(QualType MatrixElementType, unsigned NRows, unsigned NColumns, QualType CanonElementType); @@ -3479,25 +3442,24 @@ class ConstantMatrixType final : public MatrixType { public: /// Returns the number of rows in the matrix. - unsigned getNumRows() const { return ConstantMatrixTypeBits.NumRows; } + unsigned getNumRows() const { return NumRows; } /// Returns the number of columns in the matrix. - unsigned getNumColumns() const { return ConstantMatrixTypeBits.NumColumns; } + unsigned getNumColumns() const { return NumColumns; } /// Returns the number of elements required to embed the matrix into a vector. unsigned getNumElementsFlattened() const { - return ConstantMatrixTypeBits.NumRows * ConstantMatrixTypeBits.NumColumns; + return getNumRows() * getNumColumns(); } /// Returns true if \p NumElements is a valid matrix dimension. - static bool isDimensionValid(uint64_t NumElements) { - return NumElements > 0 && - NumElements <= ConstantMatrixTypeBitfields::MaxElementsPerDimension; + static constexpr bool isDimensionValid(size_t NumElements) { + return NumElements > 0 && NumElements <= MaxElementsPerDimension; } /// Returns the maximum number of elements per dimension. - static unsigned getMaxElementsPerDimension() { - return ConstantMatrixTypeBitfields::MaxElementsPerDimension; + static constexpr unsigned getMaxElementsPerDimension() { + return MaxElementsPerDimension; } void Profile(llvm::FoldingSetNodeID &ID) { diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index d40ba4c648c4c..b94e12d65d7fa 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -308,10 +308,8 @@ ConstantMatrixType::ConstantMatrixType(QualType matrixType, unsigned nRows, ConstantMatrixType::ConstantMatrixType(TypeClass tc, QualType matrixType, unsigned nRows, unsigned nColumns, QualType canonType) - : MatrixType(tc, matrixType, canonType) { - ConstantMatrixTypeBits.NumRows = nRows; - ConstantMatrixTypeBits.NumColumns = nColumns; -} + : MatrixType(tc, matrixType, canonType), NumRows(nRows), + NumColumns(nColumns) {} DependentSizedMatrixType::DependentSizedMatrixType( const ASTContext &CTX, QualType ElementType, QualType CanonicalType, From f7a039de7af7b83105f3e0345d65dceda1a0e0d4 Mon Sep 17 00:00:00 2001 From: Bruno Ricci Date: Wed, 5 Aug 2020 12:10:16 +0100 Subject: [PATCH 459/600] [clang][NFC] DeclPrinter: use NamedDecl::getDeclName instead of NamedDecl::printName to print the name of enumerations, namespaces and template parameters. NamedDecl::printName will print the pretty-printed name of the entity, which is not what we want here (we should print "enum { e };" instead of "enum (unnamed enum at input.cc:1:5) { e };"). For now only DecompositionDecl and MDGuidDecl have an overloaded printName so this does not result in any functional change, but this change is needed since I will be adding overloads to better handle unnamed entities in diagnostics. --- clang/lib/AST/DeclPrinter.cpp | 28 +++-- clang/unittests/AST/DeclPrinterTest.cpp | 130 +++++++++++++++++++++--- 2 files changed, 138 insertions(+), 20 deletions(-) diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index 2e48b2b46c4da..ca64f8f6cfbed 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -528,7 +528,8 @@ void DeclPrinter::VisitEnumDecl(EnumDecl *D) { prettyPrintAttributes(D); - Out << ' ' << *D; + if (D->getDeclName()) + Out << ' ' << D->getDeclName(); if (D->isFixed()) Out << " : " << D->getIntegerType().stream(Policy); @@ -933,7 +934,12 @@ void DeclPrinter::VisitStaticAssertDecl(StaticAssertDecl *D) { void DeclPrinter::VisitNamespaceDecl(NamespaceDecl *D) { if (D->isInline()) Out << "inline "; - Out << "namespace " << *D << " {\n"; + + Out << "namespace "; + if (D->getDeclName()) + Out << D->getDeclName() << ' '; + Out << "{\n"; + VisitDeclContext(D); Indent() << "}"; } @@ -1091,10 +1097,15 @@ void DeclPrinter::VisitTemplateDecl(const TemplateDecl *D) { if (const TemplateTemplateParmDecl *TTP = dyn_cast(D)) { - Out << "class "; + Out << "class"; + if (TTP->isParameterPack()) - Out << "..."; - Out << D->getName(); + Out << " ..."; + else if (TTP->getDeclName()) + Out << ' '; + + if (TTP->getDeclName()) + Out << TTP->getDeclName(); } else if (auto *TD = D->getTemplatedDecl()) Visit(TD); else if (const auto *Concept = dyn_cast(D)) { @@ -1216,7 +1227,7 @@ void DeclPrinter::PrintObjCTypeParams(ObjCTypeParamList *Params) { break; } - Out << Param->getDeclName().getAsString(); + Out << Param->getDeclName(); if (Param->hasExplicitBound()) { Out << " : " << Param->getUnderlyingType().getAsString(Policy); @@ -1695,10 +1706,11 @@ void DeclPrinter::VisitTemplateTypeParmDecl(const TemplateTypeParmDecl *TTP) { if (TTP->isParameterPack()) Out << " ..."; - else if (!TTP->getName().empty()) + else if (TTP->getDeclName()) Out << ' '; - Out << *TTP; + if (TTP->getDeclName()) + Out << TTP->getDeclName(); if (TTP->hasDefaultArgument()) { Out << " = "; diff --git a/clang/unittests/AST/DeclPrinterTest.cpp b/clang/unittests/AST/DeclPrinterTest.cpp index 939c8b52c12c1..38e46a378b474 100644 --- a/clang/unittests/AST/DeclPrinterTest.cpp +++ b/clang/unittests/AST/DeclPrinterTest.cpp @@ -37,6 +37,7 @@ void PrintDecl(raw_ostream &Out, const ASTContext *Context, const Decl *D, PrintingPolicyModifier PolicyModifier) { PrintingPolicy Policy = Context->getPrintingPolicy(); Policy.TerseOutput = true; + Policy.Indentation = 0; if (PolicyModifier) PolicyModifier(Policy); D->print(Out, Policy, /*Indentation*/ 0, /*PrintInstantiation*/ false); @@ -162,14 +163,21 @@ ::testing::AssertionResult PrintedDeclCXX11nonMSCMatches( } ::testing::AssertionResult -PrintedDeclCXX1ZMatches(StringRef Code, const DeclarationMatcher &NodeMatch, - StringRef ExpectedPrinted) { - std::vector Args(1, "-std=c++1z"); - return PrintedDeclMatches(Code, - Args, - NodeMatch, - ExpectedPrinted, - "input.cc"); +PrintedDeclCXX17Matches(StringRef Code, const DeclarationMatcher &NodeMatch, + StringRef ExpectedPrinted, + PrintingPolicyModifier PolicyModifier = nullptr) { + std::vector Args(1, "-std=c++17"); + return PrintedDeclMatches(Code, Args, NodeMatch, ExpectedPrinted, "input.cc", + PolicyModifier); +} + +::testing::AssertionResult +PrintedDeclC11Matches(StringRef Code, const DeclarationMatcher &NodeMatch, + StringRef ExpectedPrinted, + PrintingPolicyModifier PolicyModifier = nullptr) { + std::vector Args(1, "-std=c11"); + return PrintedDeclMatches(Code, Args, NodeMatch, ExpectedPrinted, "input.c", + PolicyModifier); } ::testing::AssertionResult @@ -250,6 +258,72 @@ TEST(DeclPrinter, TestNamespaceAlias2) { // Should be: with semicolon } +TEST(DeclPrinter, TestNamespaceUnnamed) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "namespace { int X; }", + namespaceDecl(has(varDecl(hasName("X")))).bind("id"), + "namespace {\nint X;\n}", + [](PrintingPolicy &Policy) { Policy.TerseOutput = false; })); +} + +TEST(DeclPrinter, TestNamespaceUsingDirective) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "namespace X { namespace A {} }" + "using namespace X::A;", + usingDirectiveDecl().bind("id"), "using namespace X::A", + [](PrintingPolicy &Policy) { Policy.TerseOutput = false; })); +} + +TEST(DeclPrinter, TestEnumDecl1) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "enum A { a0, a1, a2 };", enumDecl(hasName("A")).bind("id"), + "enum A {\na0,\na1,\na2\n}", + [](PrintingPolicy &Policy) { Policy.TerseOutput = false; })); +} + +TEST(DeclPrinter, TestEnumDecl2) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "enum A { a0 = -1, a1, a2 = 1 };", enumDecl(hasName("A")).bind("id"), + "enum A {\na0 = -1,\na1,\na2 = 1\n}", + [](PrintingPolicy &Policy) { Policy.TerseOutput = false; })); +} + +TEST(DeclPrinter, TestEnumDecl3) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "enum { a0, a1, a2 };", + enumDecl(has(enumConstantDecl(hasName("a0")))).bind("id"), + "enum {\na0,\na1,\na2\n}", + [](PrintingPolicy &Policy) { Policy.TerseOutput = false; })); +} + +TEST(DeclPrinter, TestEnumDecl4) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "enum class A { a0, a1, a2 };", enumDecl(hasName("A")).bind("id"), + "enum class A : int {\na0,\na1,\na2\n}", + [](PrintingPolicy &Policy) { Policy.TerseOutput = false; })); +} + +TEST(DeclPrinter, TestRecordDecl1) { + ASSERT_TRUE(PrintedDeclC11Matches( + "struct A { int a; };", recordDecl(hasName("A")).bind("id"), + "struct A {\nint a;\n}", + [](PrintingPolicy &Policy) { Policy.TerseOutput = false; })); +} + +TEST(DeclPrinter, TestRecordDecl2) { + ASSERT_TRUE(PrintedDeclC11Matches( + "struct A { struct { int i; }; };", recordDecl(hasName("A")).bind("id"), + "struct A {\nstruct {\nint i;\n};\n}", + [](PrintingPolicy &Policy) { Policy.TerseOutput = false; })); +} + +TEST(DeclPrinter, TestRecordDecl3) { + ASSERT_TRUE(PrintedDeclC11Matches( + "union { int A; } u;", + recordDecl(has(fieldDecl(hasName("A")))).bind("id"), "union {\nint A;\n}", + [](PrintingPolicy &Policy) { Policy.TerseOutput = false; })); +} + TEST(DeclPrinter, TestCXXRecordDecl1) { ASSERT_TRUE(PrintedDeclCXX98Matches( "class A { int a; };", @@ -1119,6 +1193,39 @@ TEST(DeclPrinter, TestFunctionTemplateDecl6) { "template void A(U t)")); } +TEST(DeclPrinter, TestUnnamedTemplateParameters) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "template class> void A();", + functionTemplateDecl(hasName("A")).bind("id"), + "template class> void A()")); +} + +TEST(DeclPrinter, TestUnnamedTemplateParametersPacks) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "template class ...> void A();", + functionTemplateDecl(hasName("A")).bind("id"), + "template class ...> void A()")); +} + +TEST(DeclPrinter, TestNamedTemplateParametersPacks) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "template class ...Z> void A();", + functionTemplateDecl(hasName("A")).bind("id"), + "template class ...Z> void A()")); +} + +TEST(DeclPrinter, TestTemplateTemplateParameterWrittenWithTypename) { + ASSERT_TRUE(PrintedDeclCXX17Matches( + "template