[mlir][ArmSVE] Add -arm-sve-legalize-vector-storage pass

MacDue · MacDue · commit 2ba4626eafcc · 2023-10-24T09:11:24.000Z
This patch adds a pass that ensures that loads, stores, and allocations
of SVE vector types will be legal in the LLVM backend. It does this at
the memref level, so this pass must be applied before lowering all the
way to LLVM.

This pass currently fixes two issues.

It is only legal to load/store predicate types equal to (or greater
than) a full predicate register, which in MLIR is `vector&lt;[16]xi1&gt;`.
Smaller predicate types (`vector&lt;[1|2|4|8]xi1&gt;`) must be converted
to/from a full predicate type (referred to as a `svbool`) before and
after storing and loading respectively. This pass does this by widening
allocations and inserting conversion intrinsics.

For example:

```mlir
%alloca = memref.alloca() : memref&lt;vector&lt;[4]xi1&gt;&gt;
%mask = vector.constant_mask [4] : vector&lt;[4]xi1&gt;
memref.store %mask, %alloca[] : memref&lt;vector&lt;[4]xi1&gt;&gt;
%reload = memref.load %alloca[] : memref&lt;vector&lt;[4]xi1&gt;&gt;
```
Becomes:
```mlir
%alloca = memref.alloca() {alignment = 1 : i64} : memref&lt;vector&lt;[16]xi1&gt;&gt;
%mask = vector.constant_mask [4] : vector&lt;[4]xi1&gt;
%svbool = arm_sve.convert_to_svbool %mask : vector&lt;[4]xi1&gt;
memref.store %svbool, %alloca[] : memref&lt;vector&lt;[16]xi1&gt;&gt;
%reload_svbool = memref.load %alloca[] : memref&lt;vector&lt;[16]xi1&gt;&gt;
%reload = arm_sve.convert_from_svbool %reload_svbool : vector&lt;[4]xi1&gt;
```

The storage for SVE vector types only needs to have an alignment that
matches the element type (for example 4 byte alignment for `f32`s).
However, the LLVM backend currently defaults to aligning to `base size`
x `element size` bytes. For non-legal vector types like
`vector&lt;[8]xf32&gt;` this results in 8 x 4 = 32-byte alignment, but the
backend only supports up to 16-byte alignment for SVE vectors on the
stack. Explicitly setting a smaller alignment prevents this issue.
diff --git a/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/CMakeLists.txt
@@ -1 +1,2 @@
 add_subdirectory(IR)
+add_subdirectory(Transforms)
diff --git a/mlir/include/mlir/Dialect/ArmSVE/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSVE/Transforms/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name ArmSVE)
+add_public_tablegen_target(MLIRArmSVEPassIncGen)
+
+add_mlir_doc(Passes ArmSVEPasses ./ -gen-pass-doc)
diff --git a/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.h b/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.h
@@ -0,0 +1,33 @@
+//===- Passes.h - Pass Entrypoints ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_H
+#define MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_H
+
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir::arm_sve {
+
+#define GEN_PASS_DECL
+#include "mlir/Dialect/ArmSVE/Transforms/Passes.h.inc"
+
+/// Pass to legalize the types of mask stores.
+std::unique_ptr<Pass> createLegalizeVectorStoragePass();
+
+//===----------------------------------------------------------------------===//
+// Registration
+//===----------------------------------------------------------------------===//
+
+/// Generate the code for registering passes.
+#define GEN_PASS_REGISTRATION
+#include "mlir/Dialect/ArmSVE/Transforms/Passes.h.inc"
+
+} // namespace mlir::arm_sve
+
+#endif // MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_H
diff --git a/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.td b/mlir/include/mlir/Dialect/ArmSVE/Transforms/Passes.td
@@ -0,0 +1,67 @@
+//===-- Passes.td - ArmSVE pass definition file ------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_TD
+#define MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_TD
+
+include "mlir/Pass/PassBase.td"
+
+def LegalizeVectorStorage
+    : Pass<"arm-sve-legalize-vector-storage", "mlir::func::FuncOp"> {
+  let summary = "Ensures stores of SVE vector types will be legal";
+  let description = [{
+    This pass ensures that loads, stores, and allocations of SVE vector types
+    will be legal in the LLVM backend. It does this at the memref level, so this
+    pass must be applied before lowering all the way to LLVM.
+
+    This pass currently fixes two issues.
+
+    ## Loading and storing predicate types
+
+    It is only legal to load/store predicate types equal to (or greater than) a
+    full predicate register, which in MLIR is `vector<[16]xi1>`. Smaller
+    predicate types (`vector<[1|2|4|8]xi1>`) must be converted to/from a full
+    predicate type (referred to as a `svbool`) before and after storing and
+    loading respectively. This pass does this by widening allocations and
+    inserting conversion intrinsics.
+
+    For example:
+
+    ```mlir
+    %alloca = memref.alloca() : memref<vector<[4]xi1>>
+    %mask = vector.constant_mask [4] : vector<[4]xi1>
+    memref.store %mask, %alloca[] : memref<vector<[4]xi1>>
+    %reload = memref.load %alloca[] : memref<vector<[4]xi1>>
+    ```
+    Becomes:
+    ```mlir
+    %alloca = memref.alloca() {alignment = 1 : i64} : memref<vector<[16]xi1>>
+    %mask = vector.constant_mask [4] : vector<[4]xi1>
+    %svbool = arm_sve.convert_to_svbool %mask : vector<[4]xi1>
+    memref.store %svbool, %alloca[] : memref<vector<[16]xi1>>
+    %reload_svbool = memref.load %alloca[] : memref<vector<[16]xi1>>
+    %reload = arm_sve.convert_from_svbool %reload_svbool : vector<[4]xi1>
+    ```
+
+    ## Relax alignments for SVE vector allocas
+
+    The storage for SVE vector types only needs to have an alignment that
+    matches the element type (for example 4 byte alignment for `f32`s). However,
+    the LLVM backend currently defaults to aligning to `base size` x
+    `element size` bytes. For non-legal vector types like `vector<[8]xf32>` this
+    results in 8 x 4 = 32-byte alignment, but the backend only supports up to
+    16-byte alignment for SVE vectors on the stack. Explicitly setting a smaller
+    alignment prevents this issue.
+  }];
+  let constructor = "mlir::arm_sve::createLegalizeVectorStoragePass()";
+  let dependentDialects = ["func::FuncDialect",
+    "memref::MemRefDialect", "vector::VectorDialect",
+    "arm_sve::ArmSVEDialect"];
+}
+
+#endif // MLIR_DIALECT_ARMSVE_TRANSFORMS_PASSES_TD
diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
@@ -19,6 +19,7 @@
 #include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/Arith/Transforms/Passes.h"
 #include "mlir/Dialect/ArmSME/Transforms/Passes.h"
+#include "mlir/Dialect/ArmSVE/Transforms/Passes.h"
 #include "mlir/Dialect/Async/Passes.h"
 #include "mlir/Dialect/Bufferization/Pipelines/Passes.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
@@ -82,6 +83,7 @@ inline void registerAllPasses() {
   transform::registerTransformPasses();
   vector::registerVectorPasses();
   arm_sme::registerArmSMEPasses();
+  arm_sve::registerArmSVEPasses();
 
   // Dialect pipelines
   bufferization::registerBufferizationPipelines();
diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt b/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
@@ -1,8 +1,10 @@
 add_mlir_dialect_library(MLIRArmSVETransforms
   LegalizeForLLVMExport.cpp
+  LegalizeVectorStorage.cpp
 
   DEPENDS
   MLIRArmSVEConversionsIncGen
+  MLIRArmSVEPassIncGen
 
   LINK_LIBS PUBLIC
   MLIRArmSVEDialect
diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeVectorStorage.cpp
diff --git a/mlir/test/Dialect/ArmSVE/legalize-vector-storage.mlir b/mlir/test/Dialect/ArmSVE/legalize-vector-storage.mlir
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/arrays-of-scalable-vectors.mlir

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`add_subdirectory(IR)`
	`2`	`+add_subdirectory(Transforms)`