diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir index 621baef82319f..31c4775696b31 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir @@ -1,6 +1,8 @@ // RUN: mlir-opt %s -test-vector-transfer-flatten-patterns -split-input-file | FileCheck %s // RUN: mlir-opt %s -test-vector-transfer-flatten-patterns=target-vector-bitwidth=128 -split-input-file | FileCheck %s --check-prefix=CHECK-128B +// TODO: Align naming and format with e.g. vector-transfer-permutation-lowering.mlir + ///---------------------------------------------------------------------------------------- /// vector.transfer_read /// [Pattern: FlattenContiguousRowMajorTransferReadPattern] diff --git a/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir b/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir index 3ca430a92cf97..15000d706adfc 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir @@ -1,10 +1,5 @@ // RUN: mlir-opt %s --transform-interpreter --split-input-file | FileCheck %s -// TODO: Align naming with e.g. vector-transfer-flatten.mlir -// TODO: Replace %arg0 with %vec -// TODO: Replace index args as %idx -// TODO: Align argument definition in CHECKS with function body. - ///---------------------------------------------------------------------------------------- /// vector.transfer_write -> vector.transpose + vector.transfer_write /// [Pattern: TransferWritePermutationLowering] @@ -17,18 +12,18 @@ /// _is_ a minor identity // CHECK-LABEL: func.func @xfer_write_transposing_permutation_map -// CHECK-SAME: %[[ARG_0:.*]]: vector<4x8xi16>, +// CHECK-SAME: %[[VEC:.*]]: vector<4x8xi16>, // CHECK-SAME: %[[MEM:.*]]: memref<2x2x8x4xi16>) { -// CHECK: %[[TR:.*]] = vector.transpose %[[ARG_0]], [1, 0] : vector<4x8xi16> to vector<8x4xi16> +// CHECK: %[[TR:.*]] = vector.transpose %[[VEC]], [1, 0] : vector<4x8xi16> to vector<8x4xi16> // CHECK: vector.transfer_write // CHECK-NOT: permutation_map // CHECK-SAME: %[[TR]], %[[MEM]]{{.*}} {in_bounds = [true, true]} : vector<8x4xi16>, memref<2x2x8x4xi16> func.func @xfer_write_transposing_permutation_map( - %arg0: vector<4x8xi16>, + %vec: vector<4x8xi16>, %mem: memref<2x2x8x4xi16>) { %c0 = arith.constant 0 : index - vector.transfer_write %arg0, %mem[%c0, %c0, %c0, %c0] { + vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0] { in_bounds = [true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d2)> } : vector<4x8xi16>, memref<2x2x8x4xi16> @@ -38,10 +33,10 @@ func.func @xfer_write_transposing_permutation_map( // Even with out-of-bounds, it is safe to apply this pattern // CHECK-LABEL: func.func @xfer_write_transposing_permutation_map_out_of_bounds -// CHECK-SAME: %[[ARG_0:.*]]: vector<4x8xi16>, +// CHECK-SAME: %[[VEC:.*]]: vector<4x8xi16>, // CHECK-SAME: %[[MEM:.*]]: memref<2x2x?x?xi16>) { // CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[TR:.*]] = vector.transpose %[[ARG_0]], [1, 0] : vector<4x8xi16> to vector<8x4xi16> +// CHECK: %[[TR:.*]] = vector.transpose %[[VEC]], [1, 0] : vector<4x8xi16> to vector<8x4xi16> // Expect the in_bounds attribute to be preserved. Since we don't print it when // all flags are "false", it should not appear in the output. // CHECK-NOT: in_bounds @@ -49,11 +44,11 @@ func.func @xfer_write_transposing_permutation_map( // CHECK-NOT: permutation_map // CHECK-SAME: %[[TR]], %[[MEM]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] : vector<8x4xi16>, memref<2x2x?x?xi16> func.func @xfer_write_transposing_permutation_map_out_of_bounds( - %arg0: vector<4x8xi16>, + %vec: vector<4x8xi16>, %mem: memref<2x2x?x?xi16>) { %c0 = arith.constant 0 : index - vector.transfer_write %arg0, %mem[%c0, %c0, %c0, %c0] { + vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0] { in_bounds = [false, false], permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d2)> } : vector<4x8xi16>, memref<2x2x?x?xi16> @@ -62,20 +57,20 @@ func.func @xfer_write_transposing_permutation_map_out_of_bounds( } // CHECK-LABEL: func.func @xfer_write_transposing_permutation_map_with_mask_scalable -// CHECK-SAME: %[[ARG_0:.*]]: vector<4x[8]xi16>, +// CHECK-SAME: %[[VEC:.*]]: vector<4x[8]xi16>, // CHECK-SAME: %[[MEM:.*]]: memref<2x2x?x4xi16>, // CHECK-SAME: %[[MASK:.*]]: vector<[8]x4xi1>) { -// CHECK: %[[TR:.*]] = vector.transpose %[[ARG_0]], [1, 0] : vector<4x[8]xi16> to vector<[8]x4xi16> +// CHECK: %[[TR:.*]] = vector.transpose %[[VEC]], [1, 0] : vector<4x[8]xi16> to vector<[8]x4xi16> // CHECK: vector.transfer_write // CHECK-NOT: permutation_map // CHECK-SAME: %[[TR]], %[[MEM]]{{.*}}, %[[MASK]] {in_bounds = [true, true]} : vector<[8]x4xi16>, memref<2x2x?x4xi16> func.func @xfer_write_transposing_permutation_map_with_mask_scalable( - %arg0: vector<4x[8]xi16>, + %vec: vector<4x[8]xi16>, %mem: memref<2x2x?x4xi16>, %mask: vector<[8]x4xi1>) { %c0 = arith.constant 0 : index - vector.transfer_write %arg0, %mem[%c0, %c0, %c0, %c0], %mask { + vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0], %mask { in_bounds = [true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d2)> } : vector<4x[8]xi16>, memref<2x2x?x4xi16> @@ -87,13 +82,13 @@ func.func @xfer_write_transposing_permutation_map_with_mask_scalable( // CHECK-LABEL: func.func @xfer_write_transposing_permutation_map_masked // CHECK-NOT: vector.transpose func.func @xfer_write_transposing_permutation_map_masked( - %arg0: vector<4x8xi16>, + %vec: vector<4x8xi16>, %mem: memref<2x2x8x4xi16>, %mask: vector<8x4xi1>) { %c0 = arith.constant 0 : index vector.mask %mask { - vector.transfer_write %arg0, %mem[%c0, %c0, %c0, %c0] { + vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0] { in_bounds = [true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d3, d2)> } : vector<4x8xi16>, memref<2x2x8x4xi16> @@ -122,13 +117,14 @@ func.func @xfer_write_transposing_permutation_map_masked( // CHECK: vector.transfer_write %[[TR]], %[[MEM]]{{\[}}%[[IDX_1]], %[[IDX_2]]] {in_bounds = [false, true]} : vector<7x1xf32>, memref func.func @xfer_write_non_transposing_permutation_map( %mem : memref, - %arg0 : vector<7xf32>, + %vec : vector<7xf32>, %idx_1 : index, %idx_2 : index) { - vector.transfer_write %arg0, %mem[%idx_1, %idx_2] - {permutation_map = affine_map<(d0, d1) -> (d0)>} - : vector<7xf32>, memref + vector.transfer_write %vec, %mem[%idx_1, %idx_2] { + permutation_map = affine_map<(d0, d1) -> (d0)> + } : vector<7xf32>, memref + return } @@ -145,78 +141,111 @@ func.func @xfer_write_non_transposing_permutation_map( // CHECK: vector.transfer_write %[[TR_VEC]], %[[MEM]]{{\[}}%[[IDX_1]], %[[IDX_2]]], %[[TR_MASK]] {in_bounds = [false, true]} : vector<7x1xf32>, memref func.func @xfer_write_non_transposing_permutation_map_with_mask_out_of_bounds( %mem : memref, - %arg0 : vector<7xf32>, + %vec : vector<7xf32>, %idx_1 : index, %idx_2 : index, %mask : vector<7xi1>) { - vector.transfer_write %arg0, %mem[%idx_1, %idx_2], %mask - {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [false]} - : vector<7xf32>, memref + vector.transfer_write %vec, %mem[%idx_1, %idx_2], %mask { + permutation_map = affine_map<(d0, d1) -> (d0)>, + in_bounds = [false] + } : vector<7xf32>, memref + return } // CHECK: func.func @permutation_with_mask_xfer_write_scalable( -// CHECK-SAME: %[[ARG_0:.*]]: vector<4x[8]xi16>, -// CHECK-SAME: %[[ARG_1:.*]]: memref<1x4x?x1xi16>, +// CHECK-SAME: %[[VEC:.*]]: vector<4x[8]xi16>, +// CHECK-SAME: %[[MEM:.*]]: memref<1x4x?x1xi16>, // CHECK-SAME: %[[MASK:.*]]: vector<4x[8]xi1>) { // CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[BCAST_1:.*]] = vector.broadcast %[[ARG_0]] : vector<4x[8]xi16> to vector<1x4x[8]xi16> -// CHECK: %[[BCAST_2:.*]] = vector.broadcast %[[MASK]] : vector<4x[8]xi1> to vector<1x4x[8]xi1> -// CHECK: %[[TRANSPOSE_1:.*]] = vector.transpose %[[BCAST_2]], [1, 2, 0] : vector<1x4x[8]xi1> to vector<4x[8]x1xi1> -// CHECK: %[[TRANSPOSE_2:.*]] = vector.transpose %[[BCAST_1]], [1, 2, 0] : vector<1x4x[8]xi16> to vector<4x[8]x1xi16> -// CHECK: vector.transfer_write %[[TRANSPOSE_2]], %[[ARG_1]]{{.*}}, %[[TRANSPOSE_1]] {in_bounds = [true, true, true]} : vector<4x[8]x1xi16>, memref<1x4x?x1xi16> -func.func @permutation_with_mask_xfer_write_scalable(%arg0: vector<4x[8]xi16>, %mem: memref<1x4x?x1xi16>, %mask: vector<4x[8]xi1>){ - %c0 = arith.constant 0 : index - vector.transfer_write %arg0, %mem[%c0, %c0, %c0, %c0], %mask {in_bounds = [true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, d2)> -} : vector<4x[8]xi16>, memref<1x4x?x1xi16> - - return +// CHECK: %[[BC_1:.*]] = vector.broadcast %[[VEC]] : vector<4x[8]xi16> to vector<1x4x[8]xi16> +// CHECK: %[[BC_2:.*]] = vector.broadcast %[[MASK]] : vector<4x[8]xi1> to vector<1x4x[8]xi1> +// CHECK: %[[TRANSPOSE_1:.*]] = vector.transpose %[[BC_2]], [1, 2, 0] : vector<1x4x[8]xi1> to vector<4x[8]x1xi1> +// CHECK: %[[TRANSPOSE_2:.*]] = vector.transpose %[[BC_1]], [1, 2, 0] : vector<1x4x[8]xi16> to vector<4x[8]x1xi16> +// CHECK: vector.transfer_write %[[TRANSPOSE_2]], %[[MEM]]{{.*}}, %[[TRANSPOSE_1]] {in_bounds = [true, true, true]} : vector<4x[8]x1xi16>, memref<1x4x?x1xi16> +func.func @permutation_with_mask_xfer_write_scalable( + %vec: vector<4x[8]xi16>, + %mem: memref<1x4x?x1xi16>, + %mask: vector<4x[8]xi1>){ + + %c0 = arith.constant 0 : index + vector.transfer_write %vec, %mem[%c0, %c0, %c0, %c0], %mask { + in_bounds = [true, true], + permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, d2)> + } : vector<4x[8]xi16>, memref<1x4x?x1xi16> + + return } // transfer_write in MaskOp case not supported. // CHECK-LABEL: func @masked_permutation_xfer_write_fixed_width -// CHECK-SAME: %[[ARG_0:.*]]: tensor, -// CHECK-SAME: %[[ARG_1:.*]]: vector<16xf32>, -// CHECK-SAME: %[[IDX:.*]]: index, -// CHECK-SAME: %[[MASK:.*]]: vector<16xi1> +// CHECK-SAME: %[[DEST:.*]]: tensor, +// CHECK-SAME: %[[VEC:.*]]: vector<16xf32>, +// CHECK-SAME: %[[IDX:.*]]: index, +// CHECK-SAME: %[[MASK:.*]]: vector<16xi1> // CHECK-NOT: vector.transpose -// CHECK: %[[RES:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[ARG_1]], %[[ARG_0]]{{.*}} vector<16xf32>, tensor } : vector<16xi1> -> tensor -func.func @masked_permutation_xfer_write_fixed_width(%t: tensor, %val: vector<16xf32>, %idx: index, %mask: vector<16xi1>) -> tensor { - %r = vector.mask %mask { vector.transfer_write %val, %t[%idx, %idx] {permutation_map = affine_map<(d0, d1) -> (d0)>} : vector<16xf32>, tensor } : vector<16xi1> -> tensor - return %r : tensor +// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[VEC]], %[[DEST]]{{.*}} vector<16xf32>, tensor } : vector<16xi1> -> tensor +func.func @masked_permutation_xfer_write_fixed_width( + %dest: tensor, + %vec: vector<16xf32>, + %idx: index, + %mask: vector<16xi1>) -> tensor { + + %res = vector.mask %mask { + vector.transfer_write %vec, %dest[%idx, %idx] { + permutation_map = affine_map<(d0, d1) -> (d0)> + } : vector<16xf32>, tensor + } : vector<16xi1> -> tensor + + return %res : tensor } // CHECK-LABEL: func.func @masked_permutation_xfer_write_scalable( -// CHECK-SAME: %[[ARG_0:.*]]: vector<4x[8]xi16>, -// CHECK-SAME: %[[ARG_1:.*]]: tensor, -// CHECK-SAME: %[[MASK:.*]]: vector<4x[8]xi1>) -// CHECK-SAME: -> tensor { +// CHECK-SAME: %[[VEC:.*]]: vector<4x[8]xi16>, +// CHECK-SAME: %[[DEST:.*]]: tensor, +// CHECK-SAME: %[[MASK:.*]]: vector<4x[8]xi1>) +// CHECK-SAME: -> tensor { // CHECK-NOT: vector.transpose -// CHECK: %[[R:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[ARG_0]], %[[ARG_1]]{{.*}} : vector<4x[8]xi16>, tensor } : vector<4x[8]xi1> -> tensor -func.func @masked_permutation_xfer_write_scalable(%arg0: vector<4x[8]xi16>, %t: tensor, %mask: vector<4x[8]xi1>) -> tensor { - %c0 = arith.constant 0 : index - %r = vector.mask %mask { vector.transfer_write %arg0, %t[%c0, %c0, %c0, %c0] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, d2)> -} : vector<4x[8]xi16>, tensor } : vector<4x[8]xi1> -> tensor +// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[VEC]], %[[DEST]]{{.*}} : vector<4x[8]xi16>, tensor } : vector<4x[8]xi1> -> tensor +func.func @masked_permutation_xfer_write_scalable( + %vec: vector<4x[8]xi16>, + %dest: tensor, + %mask: vector<4x[8]xi1>) -> tensor { + + %c0 = arith.constant 0 : index + %res = vector.mask %mask { + vector.transfer_write %vec, %dest[%c0, %c0, %c0, %c0] { + in_bounds = [true, true], + permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, d2)> + } : vector<4x[8]xi16>, tensor + } : vector<4x[8]xi1> -> tensor - return %r : tensor + return %res : tensor } // transfer_write in MaskOp case not supported. // CHECK-LABEL: func @masked_non_permutation_xfer_write_fixed_width -// CHECK-SAME: %[[ARG0:.*]]: tensor -// CHECK-SAME: %[[ARG1:.*]]: vector<14x8x16xf32> -// CHECK-SAME: %[[IDX:.*]]: index) -> tensor +// CHECK-SAME: %[[DEST:.*]]: tensor +// CHECK-SAME: %[[VEC:.*]]: vector<14x8x16xf32> +// CHECK-SAME: %[[IDX:.*]]: index) -> tensor // CHECK-NOT: vector.broadcast -// CHECK: %[[masked1:.*]] = vector.mask %0 { vector.transfer_write %[[ARG1]], %[[ARG0]]{{.*}} : vector<14x8x16xf32>, tensor } : vector<14x8x16xi1> -> tensor +// CHECK: vector.mask %0 { vector.transfer_write %[[VEC]], %[[DEST]]{{.*}} : vector<14x8x16xf32>, tensor } : vector<14x8x16xi1> -> tensor func.func @masked_non_permutation_xfer_write_fixed_width( - %arg0 : tensor, - %v1 : vector<14x8x16xf32>, %dim : index) -> tensor { + %dest : tensor, + %vec : vector<14x8x16xf32>, + %dim : index) -> tensor { + %c0 = arith.constant 0 : index %mask = vector.create_mask %dim, %dim, %dim : vector<14x8x16xi1> - %0 = vector.mask %mask { vector.transfer_write %v1, %arg0[%c0, %c0, %c0, %c0] {in_bounds = [false, false, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>} : vector<14x8x16xf32>, tensor } : vector<14x8x16xi1> -> tensor - - return %0 : tensor + %res = vector.mask %mask { + vector.transfer_write %vec, %dest[%c0, %c0, %c0, %c0] { + in_bounds = [false, false, true], + permutation_map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> + } : vector<14x8x16xf32>, tensor + } : vector<14x8x16xi1> -> tensor + + return %res : tensor } ///---------------------------------------------------------------------------------------- @@ -229,80 +258,105 @@ func.func @masked_non_permutation_xfer_write_fixed_width( /// vector.transpose op // CHECK-LABEL: func.func @permutation_with_mask_xfer_read_fixed_width( -// CHECK-SAME: %[[ARG_0:.*]]: memref, +// CHECK-SAME: %[[MEM:.*]]: memref, // CHECK-SAME: %[[IDX_1:.*]]: index, // CHECK-SAME: %[[IDX_2:.*]]: index) -> vector<8x4x2xf32> { // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[PASS_THROUGH:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK:.*]] = vector.create_mask %[[IDX_2]], %[[IDX_1]] : vector<2x4xi1> -// CHECK: %[[T_READ:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]]], %[[PASS_THROUGH]], %[[MASK]] {in_bounds = [true, true]} : memref, vector<2x4xf32> +// CHECK: %[[T_READ:.*]] = vector.transfer_read %[[MEM]]{{\[}}%[[C0]], %[[C0]]], %[[PASS_THROUGH]], %[[MASK]] {in_bounds = [true, true]} : memref, vector<2x4xf32> // CHECK: %[[BCAST:.*]] = vector.broadcast %[[T_READ]] : vector<2x4xf32> to vector<8x2x4xf32> // CHECK: %[[TRANSPOSE:.*]] = vector.transpose %[[BCAST]], [0, 2, 1] : vector<8x2x4xf32> to vector<8x4x2xf32> // CHECK: return %[[TRANSPOSE]] : vector<8x4x2xf32> -func.func @permutation_with_mask_xfer_read_fixed_width(%mem: memref, %dim_1: index, %dim_2: index) -> (vector<8x4x2xf32>) { +func.func @permutation_with_mask_xfer_read_fixed_width( + %mem: memref, + %dim_1: index, + %dim_2: index) -> (vector<8x4x2xf32>) { %c0 = arith.constant 0 : index %cst_0 = arith.constant 0.000000e+00 : f32 %mask = vector.create_mask %dim_2, %dim_1 : vector<2x4xi1> - %1 = vector.transfer_read %mem[%c0, %c0], %cst_0, %mask - {in_bounds = [true, true, true], permutation_map = affine_map<(d0, d1) -> (0, d1, d0)>} - : memref, vector<8x4x2xf32> - return %1 : vector<8x4x2xf32> + %res = vector.transfer_read %mem[%c0, %c0], %cst_0, %mask { + in_bounds = [true, true, true], + permutation_map = affine_map<(d0, d1) -> (0, d1, d0)> + } : memref, vector<8x4x2xf32> + + return %res : vector<8x4x2xf32> } // CHECK-LABEL: func.func @permutation_with_mask_xfer_read_scalable( -// CHECK-SAME: %[[ARG_0:.*]]: memref, +// CHECK-SAME: %[[MEM:.*]]: memref, // CHECK-SAME: %[[IDX_1:.*]]: index, // CHECK-SAME: %[[IDX_2:.*]]: index) -> vector<8x[4]x2xf32> { // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[PASS_THROUGH:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[MASK:.*]] = vector.create_mask %[[IDX_2]], %[[IDX_1]] : vector<2x[4]xi1> -// CHECK: %[[T_READ:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]]], %[[PASS_THROUGH]], %[[MASK]] {in_bounds = [true, true]} : memref, vector<2x[4]xf32> +// CHECK: %[[T_READ:.*]] = vector.transfer_read %[[MEM]]{{\[}}%[[C0]], %[[C0]]], %[[PASS_THROUGH]], %[[MASK]] {in_bounds = [true, true]} : memref, vector<2x[4]xf32> // CHECK: %[[BCAST:.*]] = vector.broadcast %[[T_READ]] : vector<2x[4]xf32> to vector<8x2x[4]xf32> // CHECK: %[[TRANSPOSE:.*]] = vector.transpose %[[BCAST]], [0, 2, 1] : vector<8x2x[4]xf32> to vector<8x[4]x2xf32> // CHECK: return %[[TRANSPOSE]] : vector<8x[4]x2xf32> -func.func @permutation_with_mask_xfer_read_scalable(%mem: memref, %dim_1: index, %dim_2: index) -> (vector<8x[4]x2xf32>) { +func.func @permutation_with_mask_xfer_read_scalable( + %mem: memref, + %dim_1: index, + %dim_2: index) -> (vector<8x[4]x2xf32>) { %c0 = arith.constant 0 : index %cst_0 = arith.constant 0.000000e+00 : f32 %mask = vector.create_mask %dim_2, %dim_1 : vector<2x[4]xi1> - %1 = vector.transfer_read %mem[%c0, %c0], %cst_0, %mask - {in_bounds = [true, true, true], permutation_map = affine_map<(d0, d1) -> (0, d1, d0)>} - : memref, vector<8x[4]x2xf32> - return %1 : vector<8x[4]x2xf32> + %res = vector.transfer_read %mem[%c0, %c0], %cst_0, %mask { + in_bounds = [true, true, true], + permutation_map = affine_map<(d0, d1) -> (0, d1, d0)> + } : memref, vector<8x[4]x2xf32> + + return %res : vector<8x[4]x2xf32> } // transfer_read in MaskOp case not supported. // CHECK-LABEL: func @masked_permutation_xfer_read_fixed_width -// CHECK-SAME: %[[ARG_0:.*]]: tensor, -// CHECK-SAME: %[[ARG_1:.*]]: vector<4x1xi1> +// CHECK-SAME: %[[DEST:.*]]: tensor, +// CHECK-SAME: %[[MASK:.*]]: vector<4x1xi1> // CHECK-NOT: vector.transpose -// CHECK: vector.mask %[[ARG_1]] { vector.transfer_read %[[ARG_0]]{{.*}}: tensor, vector<1x4x4xf32> } : vector<4x1xi1> -> vector<1x4x4xf32> -func.func @masked_permutation_xfer_read_fixed_width(%arg0: tensor, %mask : vector<4x1xi1>) { +// CHECK: vector.mask %[[MASK]] { vector.transfer_read %[[DEST]]{{.*}}: tensor, vector<1x4x4xf32> } : vector<4x1xi1> -> vector<1x4x4xf32> +func.func @masked_permutation_xfer_read_fixed_width( + %dest: tensor, + %mask : vector<4x1xi1>) { + %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index - %3 = vector.mask %mask { vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [false, true, false], permutation_map = affine_map<(d0, d1) -> (d1, 0, d0)>} : tensor, vector<1x4x4xf32> } : vector<4x1xi1> -> vector<1x4x4xf32> - call @test.some_use(%3) : (vector<1x4x4xf32>) -> () + %3 = vector.mask %mask { + vector.transfer_read %dest[%c0, %c0], %cst { + in_bounds = [false, true, false], + permutation_map = affine_map<(d0, d1) -> (d1, 0, d0)> + } : tensor, vector<1x4x4xf32> + } : vector<4x1xi1> -> vector<1x4x4xf32> + + "test.some_use"(%3) : (vector<1x4x4xf32>) -> () + return } -func.func private @test.some_use(vector<1x4x4xf32>) // CHECK-LABEL: func.func @masked_permutation_xfer_read_scalable( -// CHECK-SAME: %[[ARG_0:.*]]: tensor, -// CHECK-SAME: %[[MASK:.*]]: vector<2x[4]xi1>) -> vector<8x[4]x2xf32> { +// CHECK-SAME: %[[DEST:.*]]: tensor, +// CHECK-SAME: %[[MASK:.*]]: vector<2x[4]xi1>) -> vector<8x[4]x2xf32> { // CHECK-NOT: vector.transpose -// CHECK: %[[T_READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[ARG_0]]{{.*}} : tensor, vector<8x[4]x2xf32> } : vector<2x[4]xi1> -> vector<8x[4]x2xf32> -func.func @masked_permutation_xfer_read_scalable(%t: tensor, %mask : vector<2x[4]xi1>) -> vector<8x[4]x2xf32> { +// CHECK: %[[T_READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[DEST]]{{.*}} : tensor, vector<8x[4]x2xf32> } : vector<2x[4]xi1> -> vector<8x[4]x2xf32> +func.func @masked_permutation_xfer_read_scalable( + %dest: tensor, + %mask : vector<2x[4]xi1>) -> vector<8x[4]x2xf32> { %c0 = arith.constant 0 : index %cst_0 = arith.constant 0.000000e+00 : f32 - %1 = vector.mask %mask { vector.transfer_read %t[%c0, %c0], %cst_0 - {in_bounds = [true, true, true], permutation_map = affine_map<(d0, d1) -> (0, d1, d0)>} - : tensor, vector<8x[4]x2xf32> } :vector<2x[4]xi1> -> vector<8x[4]x2xf32> - return %1 : vector<8x[4]x2xf32> + %res = vector.mask %mask { + vector.transfer_read %dest[%c0, %c0], %cst_0 { + in_bounds = [true, true, true], + permutation_map = affine_map<(d0, d1) -> (0, d1, d0)> + } : tensor, vector<8x[4]x2xf32> + } :vector<2x[4]xi1> -> vector<8x[4]x2xf32> + + return %res : vector<8x[4]x2xf32> } module attributes {transform.with_named_sequence} { @@ -325,33 +379,46 @@ module attributes {transform.with_named_sequence} { // CHECK: #[[MAP:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, 0, d3)> // CHECK: func.func @transfer_read_reduce_rank_scalable( -// CHECK-SAME: %[[ARG_0:.*]]: memref) -> vector<8x[4]x2x3xf32> { +// CHECK-SAME: %[[MEM:.*]]: memref) -> vector<8x[4]x2x3xf32> { // CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[TFR:.*]] = vector.transfer_read %arg0[%[[C0]], %[[C0]], %[[C0]], %[[C0]]]{{.*}} permutation_map = #[[MAP]]} : memref, vector<[4]x2x3xf32> -// CHECK: %[[BC:.*]] = vector.broadcast %[[TFR]] : vector<[4]x2x3xf32> to vector<8x[4]x2x3xf32> +// CHECK: %[[T_READ:.*]] = vector.transfer_read %[[MEM]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]]{{.*}} permutation_map = #[[MAP]]} : memref, vector<[4]x2x3xf32> +// CHECK: %[[BC:.*]] = vector.broadcast %[[T_READ]] : vector<[4]x2x3xf32> to vector<8x[4]x2x3xf32> // CHECK: return %[[BC]] : vector<8x[4]x2x3xf32> -func.func @transfer_read_reduce_rank_scalable(%mem: memref) -> vector<8x[4]x2x3xf32> { +func.func @transfer_read_reduce_rank_scalable( + %mem: memref) -> vector<8x[4]x2x3xf32> { + %c0 = arith.constant 0 : index %cst_0 = arith.constant 0.000000e+00 : f32 - %1 = vector.transfer_read %mem[%c0, %c0, %c0, %c0], %cst_0 - {in_bounds = [true, true, true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>} - : memref, vector<8x[4]x2x3xf32> - return %1 : vector<8x[4]x2x3xf32> + + %res = vector.transfer_read %mem[%c0, %c0, %c0, %c0], %cst_0 { + in_bounds = [true, true, true, true], + permutation_map = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)> + } : memref, vector<8x[4]x2x3xf32> + + return %res : vector<8x[4]x2x3xf32> } // Masked case not supported. // CHECK-LABEL: func.func @masked_transfer_read_reduce_rank( -// CHECK-SAME: %[[ARG_0:.*]]: memref, -// CHECK-SAME: %[[DIM:.*]]: index) -> vector<8x[4]x2x3xf32> { +// CHECK-SAME: %[[MEM:.*]]: memref, +// CHECK-SAME: %[[DIM:.*]]: index) -> vector<8x[4]x2x3xf32> { // CHECK-NOT: vector.broadcast -// CHECK: %[[MASK:.*]] = vector.mask %0 { vector.transfer_read %arg0{{.*}} : memref, vector<8x[4]x2x3xf32> } : vector<[4]x3xi1> -> vector<8x[4]x2x3xf32> -func.func @masked_transfer_read_reduce_rank(%mem: memref, %dim: index) -> vector<8x[4]x2x3xf32> { +// CHECK: %[[MASK:.*]] = vector.mask %0 { vector.transfer_read %[[MEM]]{{.*}} : memref, vector<8x[4]x2x3xf32> } : vector<[4]x3xi1> -> vector<8x[4]x2x3xf32> +func.func @masked_transfer_read_reduce_rank( + %mem: memref, + %dim: index) -> vector<8x[4]x2x3xf32> { + %c0 = arith.constant 0 : index %cst_0 = arith.constant 0.000000e+00 : f32 %mask = vector.create_mask %dim, %dim: vector<[4]x3xi1> - %res = vector.mask %mask { vector.transfer_read %mem[%c0, %c0, %c0, %c0], %cst_0 - {in_bounds = [true, true, true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>} - : memref, vector<8x[4]x2x3xf32> } : vector<[4]x3xi1> -> vector<8x[4]x2x3xf32> + + %res = vector.mask %mask { + vector.transfer_read %mem[%c0, %c0, %c0, %c0], %cst_0 { + in_bounds = [true, true, true, true], + permutation_map = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)> + } : memref, vector<8x[4]x2x3xf32> + } : vector<[4]x3xi1> -> vector<8x[4]x2x3xf32> + return %res : vector<8x[4]x2x3xf32> }