From 1192f5a362a2014c701dbf69806cd4a220d98af9 Mon Sep 17 00:00:00 2001 From: Martin Erhart Date: Wed, 20 Sep 2023 10:09:19 +0000 Subject: [PATCH] [mlir][gpu][bufferization] Implement BufferDeallocationOpInterface for gpu.terminator This is necessary to support deallocation of IR with gpu.launch operations because it does not implement the RegionBranchOpInterface. Implementing the interface would require it to support regions with unstructured control flow and produced arguments/results. --- .../IR/BufferDeallocationOpInterface.h | 17 ++++++++ .../BufferDeallocationOpInterfaceImpl.h | 22 ++++++++++ mlir/include/mlir/InitAllDialects.h | 2 + .../IR/BufferDeallocationOpInterface.cpp | 41 +++++++++++++++++++ .../OwnershipBasedBufferDeallocation.cpp | 35 +++------------- mlir/lib/Dialect/GPU/CMakeLists.txt | 2 + .../BufferDeallocationOpInterfaceImpl.cpp | 37 +++++++++++++++++ .../BufferDeallocationOpInterfaceImpl.cpp | 27 ++---------- .../bufferization-buffer-deallocation.mlir | 18 ++++++++ .../llvm-project-overlay/mlir/BUILD.bazel | 1 + 10 files changed, 149 insertions(+), 53 deletions(-) create mode 100644 mlir/include/mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h create mode 100644 mlir/lib/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.cpp create mode 100644 mlir/test/Dialect/GPU/bufferization-buffer-deallocation.mlir diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h index 7ac4592de7875..752a4a2c6f42a 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h @@ -205,6 +205,23 @@ class DeallocationState { Liveness liveness; }; +namespace deallocation_impl { +/// Insert a `bufferization.dealloc` operation right before `op` which has to be +/// a terminator without any successors. Note that it is not required to have +/// the ReturnLike trait attached. The MemRef values in the `operands` argument +/// will be added to the list of retained values and their updated ownership +/// values will be appended to the `updatedOperandOwnerships` list. `op` is not +/// modified in any way. Returns failure if at least one of the MemRefs to +/// deallocate does not have 'Unique' ownership (likely as a result of an +/// incorrect implementation of the `process` or +/// `materializeUniqueOwnershipForMemref` interface method) or the original +/// `op`. +FailureOr +insertDeallocOpForReturnLike(DeallocationState &state, Operation *op, + ValueRange operands, + SmallVectorImpl &updatedOperandOwnerships); +} // namespace deallocation_impl + } // namespace bufferization } // namespace mlir diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h b/mlir/include/mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h new file mode 100644 index 0000000000000..16cf96980de13 --- /dev/null +++ b/mlir/include/mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h @@ -0,0 +1,22 @@ +//===- BufferDeallocationOpInterfaceImpl.h ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_GPU_TRANSFORMS_BUFFERDEALLOCATIONOPINTERFACEIMPL_H +#define MLIR_DIALECT_GPU_TRANSFORMS_BUFFERDEALLOCATIONOPINTERFACEIMPL_H + +namespace mlir { + +class DialectRegistry; + +namespace gpu { +void registerBufferDeallocationOpInterfaceExternalModels( + DialectRegistry ®istry); +} // namespace gpu +} // namespace mlir + +#endif // MLIR_DIALECT_GPU_TRANSFORMS_BUFFERDEALLOCATIONOPINTERFACEIMPL_H diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index 5b2b1ed24d517..8a085d91cedff 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -36,6 +36,7 @@ #include "mlir/Dialect/EmitC/IR/EmitC.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h" #include "mlir/Dialect/IRDL/IR/IRDL.h" #include "mlir/Dialect/Index/IR/IndexDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -144,6 +145,7 @@ inline void registerAllDialects(DialectRegistry ®istry) { builtin::registerCastOpInterfaceExternalModels(registry); cf::registerBufferizableOpInterfaceExternalModels(registry); cf::registerBufferDeallocationOpInterfaceExternalModels(registry); + gpu::registerBufferDeallocationOpInterfaceExternalModels(registry); linalg::registerBufferizableOpInterfaceExternalModels(registry); linalg::registerTilingInterfaceExternalModels(registry); linalg::registerValueBoundsOpInterfaceExternalModels(registry); diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp index 407d75e2426e9..8d21446f1eb77 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp @@ -272,3 +272,44 @@ bool ValueComparator::operator()(const Value &lhs, const Value &rhs) const { assert(lhsRegion && "this should only happen if lhs == rhs"); return false; } + +//===----------------------------------------------------------------------===// +// Implementation utilities +//===----------------------------------------------------------------------===// + +FailureOr deallocation_impl::insertDeallocOpForReturnLike( + DeallocationState &state, Operation *op, ValueRange operands, + SmallVectorImpl &updatedOperandOwnerships) { + assert(op->hasTrait() && "must be a terminator"); + assert(!op->hasSuccessors() && "must not have any successors"); + // Collect the values to deallocate and retain and use them to create the + // dealloc operation. + OpBuilder builder(op); + Block *block = op->getBlock(); + SmallVector memrefs, conditions, toRetain; + if (failed(state.getMemrefsAndConditionsToDeallocate( + builder, op->getLoc(), block, memrefs, conditions))) + return failure(); + + state.getMemrefsToRetain(block, /*toBlock=*/nullptr, operands, toRetain); + if (memrefs.empty() && toRetain.empty()) + return op; + + auto deallocOp = builder.create( + op->getLoc(), memrefs, conditions, toRetain); + + // We want to replace the current ownership of the retained values with the + // result values of the dealloc operation as they are always unique. + state.resetOwnerships(deallocOp.getRetained(), block); + for (auto [retained, ownership] : + llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions())) + state.updateOwnership(retained, ownership, block); + + unsigned numMemrefOperands = llvm::count_if(operands, isMemref); + auto newOperandOwnerships = + deallocOp.getUpdatedConditions().take_front(numMemrefOperands); + updatedOperandOwnerships.append(newOperandOwnerships.begin(), + newOperandOwnerships.end()); + + return op; +} diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp index 09d3083582808..94a26f3aff5e0 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp @@ -47,10 +47,6 @@ static Value buildBoolValue(OpBuilder &builder, Location loc, bool value) { static bool isMemref(Value v) { return v.getType().isa(); } -static bool isMemrefOperand(OpOperand &operand) { - return isMemref(operand.get()); -} - //===----------------------------------------------------------------------===// // Backedges analysis //===----------------------------------------------------------------------===// @@ -917,35 +913,16 @@ BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) { MutableOperandRange operands = op.getMutableSuccessorOperands(RegionBranchPoint::parent()); - // Collect the values to deallocate and retain and use them to create the - // dealloc operation. - Block *block = op->getBlock(); - SmallVector memrefs, conditions, toRetain; - if (failed(state.getMemrefsAndConditionsToDeallocate( - builder, op.getLoc(), block, memrefs, conditions))) - return failure(); - - state.getMemrefsToRetain(block, nullptr, OperandRange(operands), toRetain); - if (memrefs.empty() && toRetain.empty()) - return op.getOperation(); - - auto deallocOp = builder.create( - op.getLoc(), memrefs, conditions, toRetain); - - // We want to replace the current ownership of the retained values with the - // result values of the dealloc operation as they are always unique. - state.resetOwnerships(deallocOp.getRetained(), block); - for (auto [retained, ownership] : - llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions())) - state.updateOwnership(retained, ownership, block); + SmallVector updatedOwnerships; + auto result = deallocation_impl::insertDeallocOpForReturnLike( + state, op, OperandRange(operands), updatedOwnerships); + if (failed(result) || !*result) + return result; // Add an additional operand for every MemRef for the ownership indicator. if (!funcWithoutDynamicOwnership) { - unsigned numMemRefs = llvm::count_if(operands, isMemrefOperand); SmallVector newOperands{OperandRange(operands)}; - auto ownershipValues = - deallocOp.getUpdatedConditions().take_front(numMemRefs); - newOperands.append(ownershipValues.begin(), ownershipValues.end()); + newOperands.append(updatedOwnerships.begin(), updatedOwnerships.end()); operands.assign(newOperands); } diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 6244132c073a4..324d5c1366722 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -48,6 +48,7 @@ add_mlir_dialect_library(MLIRGPUDialect add_mlir_dialect_library(MLIRGPUTransforms Transforms/AllReduceLowering.cpp Transforms/AsyncRegionRewriter.cpp + Transforms/BufferDeallocationOpInterfaceImpl.cpp Transforms/DecomposeMemrefs.cpp Transforms/GlobalIdRewriter.cpp Transforms/KernelOutlining.cpp @@ -79,6 +80,7 @@ add_mlir_dialect_library(MLIRGPUTransforms MLIRAffineUtils MLIRArithDialect MLIRAsyncDialect + MLIRBufferizationDialect MLIRBuiltinToLLVMIRTranslation MLIRDataLayoutInterfaces MLIRExecutionEngineUtils diff --git a/mlir/lib/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.cpp b/mlir/lib/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.cpp new file mode 100644 index 0000000000000..6ccc0a26426c1 --- /dev/null +++ b/mlir/lib/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.cpp @@ -0,0 +1,37 @@ +//===- BufferDeallocationOpInterfaceImpl.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h" +#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" + +using namespace mlir; +using namespace mlir::bufferization; + +namespace { +/// +struct GPUTerminatorOpInterface + : public BufferDeallocationOpInterface::ExternalModel< + GPUTerminatorOpInterface, gpu::TerminatorOp> { + FailureOr process(Operation *op, DeallocationState &state, + const DeallocationOptions &options) const { + SmallVector updatedOperandOwnerships; + return deallocation_impl::insertDeallocOpForReturnLike( + state, op, {}, updatedOperandOwnerships); + } +}; + +} // namespace + +void mlir::gpu::registerBufferDeallocationOpInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, GPUDialect *dialect) { + gpu::TerminatorOp::attachInterface(*ctx); + }); +} diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp index 88cb3e9b09714..4ded8ba55013d 100644 --- a/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/BufferDeallocationOpInterfaceImpl.cpp @@ -47,33 +47,12 @@ struct InParallelOpInterface FailureOr process(Operation *op, DeallocationState &state, const DeallocationOptions &options) const { auto inParallelOp = cast(op); - OpBuilder builder(op); if (!inParallelOp.getBody()->empty()) return op->emitError("only supported when nested region is empty"); - // Collect the values to deallocate and retain and use them to create the - // dealloc operation. - Block *block = op->getBlock(); - SmallVector memrefs, conditions, toRetain; - if (failed(state.getMemrefsAndConditionsToDeallocate( - builder, op->getLoc(), block, memrefs, conditions))) - return failure(); - - state.getMemrefsToRetain(block, /*toBlock=*/nullptr, {}, toRetain); - if (memrefs.empty() && toRetain.empty()) - return op; - - auto deallocOp = builder.create( - op->getLoc(), memrefs, conditions, toRetain); - - // We want to replace the current ownership of the retained values with the - // result values of the dealloc operation as they are always unique. - state.resetOwnerships(deallocOp.getRetained(), block); - for (auto [retained, ownership] : - llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions())) - state.updateOwnership(retained, ownership, block); - - return op; + SmallVector updatedOperandOwnership; + return deallocation_impl::insertDeallocOpForReturnLike( + state, op, {}, updatedOperandOwnership); } }; diff --git a/mlir/test/Dialect/GPU/bufferization-buffer-deallocation.mlir b/mlir/test/Dialect/GPU/bufferization-buffer-deallocation.mlir new file mode 100644 index 0000000000000..25349967e61d3 --- /dev/null +++ b/mlir/test/Dialect/GPU/bufferization-buffer-deallocation.mlir @@ -0,0 +1,18 @@ +// RUN: mlir-opt %s -buffer-deallocation-pipeline --allow-unregistered-dialect | FileCheck %s + +func.func @gpu_launch() { + %c1 = arith.constant 1 : index + gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) + threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) { + %alloc = memref.alloc() : memref<2xf32> + "test.memref_user"(%alloc) : (memref<2xf32>) -> () + gpu.terminator + } + return +} + +// CHECK-LABEL: func @gpu_launch +// CHECK: gpu.launch +// CHECK: [[ALLOC:%.+]] = memref.alloc( +// CHECK: memref.dealloc [[ALLOC]] +// CHECK: gpu.terminator diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 7ae9b6173ec72..3c167abbd5e9a 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4927,6 +4927,7 @@ cc_library( ":ArithDialect", ":AsmParser", ":AsyncDialect", + ":BufferizationDialect", ":ControlFlowDialect", ":DLTIDialect", ":DialectUtils",