From aaf9924097a7a18187e357eeb1bea760f0a4461e Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:48:08 -0700 Subject: [PATCH 1/3] [ET-VK] Adding boolean parameters to add_copy_offset_node to specify index calculation function in copy op's shader. Pull Request resolved: https://github.com/pytorch/executorch/pull/9343 This diff adds two new boolean flags, `calc_out_pos_using_src_chnl` and `calc_in_pos_using_dst_chnl` to add_copy_offset_node, which can be used to specify an indexing function in the shader. ghstack-source-id: 272554190 @exported-using-ghexport Differential Revision: [D71343588](https://our.internmc.facebook.com/intern/diff/D71343588/) --- .../runtime/graph/ops/glsl/copy_offset.glsl | 20 +++++++++++++------ .../vulkan/runtime/graph/ops/impl/Cat.cpp | 2 +- .../vulkan/runtime/graph/ops/impl/Copy.cpp | 13 +++++++++--- backends/vulkan/runtime/graph/ops/impl/Copy.h | 17 +++++++++++++++- .../vulkan/runtime/graph/ops/impl/Repeat.cpp | 9 +++++---- .../vulkan/runtime/graph/ops/impl/Split.cpp | 9 ++++++--- 6 files changed, 52 insertions(+), 18 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl b/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl index a23822765a3..178814a90c3 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/copy_offset.glsl @@ -35,6 +35,8 @@ const lowp ivec4 out_axis_map = unhash_axis_map(out_layout); ${layout_declare_spec_const(C, "int", "in_layout", "DEFAULT_LAYOUT")} const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); +${layout_declare_spec_const(C, "int", "batch_index_function", "0")} + void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); @@ -42,14 +44,20 @@ void main() { return; } - const ivec3 in_pos = pos + src_offset.xyz; + ivec3 in_pos = pos + src_offset.xyz; ivec3 out_pos = pos + dst_offset.xyz; - - // If source channel size is specified compose output z based on channel and batch index if (src_offset.w > 0) { - const int channel_index = in_pos.z % src_offset.w; - const int batch_index = in_pos.z / src_offset.w; - out_pos.z = channel_index + dst_offset.z + batch_index * dst_offset.w; + if (batch_index_function == 1) { + // batch index is calculated using source channel size + const int channel_index = pos.z % src_offset.w; + const int batch_index = pos.z / src_offset.w; + out_pos.z = channel_index + dst_offset.z + batch_index * dst_offset.w; + } else if (batch_index_function == 2) { + // batch index is calculated using destination channel size + const int channel_index = pos.z % dst_offset.w; + const int batch_index = pos.z / dst_offset.w; + in_pos.z = channel_index + src_offset.z + batch_index * src_offset.w; + } } write_texel_lpos( diff --git a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp index 5f172454121..25a0ff9a7f5 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp @@ -80,7 +80,7 @@ void add_cat_default_node( // concatenating channels src_offset[3] = is_concat_channel ? in_channel_size : 0; add_copy_offset_node( - graph, input_ref, range, src_offset, dst_offset, out); + graph, input_ref, range, src_offset, dst_offset, out, true, false); dst_offset[dim_xyz_index] += is_concat_channel ? in_channel_size : range[dim_xyz_index]; } diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp index 4b09fbe8619..2ecc7400d3e 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp @@ -25,7 +25,9 @@ void add_copy_offset_node( const ivec3& range, const ivec4& src_offset, const ivec4& dst_offset, - const ValueRef out) { + const ValueRef out, + bool calc_out_pos_using_src_chnl, + bool calc_in_pos_using_dst_chnl) { vTensorPtr t_in = graph.get_tensor(in); vTensorPtr t_out = graph.get_tensor(out); @@ -49,7 +51,11 @@ void add_copy_offset_node( // Parameter buffers {}, // Specialization Constants - {graph.hashed_layout_of(out), graph.hashed_layout_of(in)}, + {graph.hashed_layout_of(out), + graph.hashed_layout_of(in), + (calc_out_pos_using_src_chnl ? 1 + : calc_in_pos_using_dst_chnl ? 2 + : 0)}, nullptr, {}, { @@ -256,7 +262,8 @@ void add_copy_offset_node( ivec4 src_offset = {src[0], src[1], src[2], 0}; ivec4 dst_offset = {dst[0], dst[1], dst[2], 0}; - add_copy_offset_node(graph, in, range, src_offset, dst_offset, out); + add_copy_offset_node( + graph, in, range, src_offset, dst_offset, out, false, false); } void copy_offset(ComputeGraph& graph, const std::vector& args) { diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.h b/backends/vulkan/runtime/graph/ops/impl/Copy.h index d4b4c0dcc03..e9388345afa 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.h +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.h @@ -22,13 +22,28 @@ namespace vkcompute { // It is possible to have input and output to point to the same image // object. But when the source range and destination range overlap, the behavior // is undefined. +// +// boolean flags calc_out_pos_using_src_chnl and calc_in_pos_using_dst_chnl +// can be used to specify an indexing function in the shader +// If calc_out_pos_using_src_chnl is set to true channel and batch index will be +// calculated based on source channel size and will be used to determine +// destination texel position. +// +// If calc_in_pos_using_dst_chnl is set to truechannel and batch index will be +// calculated based on destination channel size and will be used to determine +// source texel position. +// +// If both are true calc_out_pos_using_src_chnl is picked. If both are false no +// index calculation happens. void add_copy_offset_node( ComputeGraph& graph, const ValueRef in, const utils::ivec3& range, const utils::ivec4& src_offset, const utils::ivec4& dst_offset, - const ValueRef out); + const ValueRef out, + bool calc_out_pos_using_src_chnl, + bool calc_in_pos_using_dst_chnl); // add_copy_packed_dim_offset_node behaves similar to add_copy_node, except that // its used when copying packed dimension, if tensor is width or height packed. diff --git a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp index 49daabdcb76..3f4ed4f1090 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp @@ -151,7 +151,8 @@ void add_repeat_node( utils::ivec4 src_offset{0, 0, 0, 0}; utils::ivec4 dst_offset{0, 0, 0, 0}; - add_copy_offset_node(graph, in, running_range, src_offset, dst_offset, out); + add_copy_offset_node( + graph, in, running_range, src_offset, dst_offset, out, false, false); } else { add_repeat_channel_node(graph, in, channel_repeat, out, running_range); @@ -166,7 +167,7 @@ void add_repeat_node( utils::ivec4 dst_offset{i * dim_at(in_sizes), 0, 0, 0}; add_copy_offset_node( - graph, out, running_range, src_offset, dst_offset, out); + graph, out, running_range, src_offset, dst_offset, out, true, false); } running_range[0] = running_range[0] * width_repeat; @@ -180,7 +181,7 @@ void add_repeat_node( utils::ivec4 dst_offset = {0, i * dim_at(in_sizes), 0, 0}; add_copy_offset_node( - graph, out, running_range, src_offset, dst_offset, out); + graph, out, running_range, src_offset, dst_offset, out, true, false); } running_range[1] = running_range[1] * height_repeat; @@ -194,7 +195,7 @@ void add_repeat_node( utils::ivec4 dst_offset = {0, 0, i * running_range[2], 0}; add_copy_offset_node( - graph, out, running_range, src_offset, dst_offset, out); + graph, out, running_range, src_offset, dst_offset, out, true, false); } running_range[2] = running_range[2] * batch_repeat; diff --git a/backends/vulkan/runtime/graph/ops/impl/Split.cpp b/backends/vulkan/runtime/graph/ops/impl/Split.cpp index ca585f1fb6d..b74317b078e 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Split.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Split.cpp @@ -51,7 +51,8 @@ void add_split_with_sizes_default_node( // output tensor's size matches with the split_size. vTensorPtr t_out = graph.get_tensor(out_ref); utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref); + add_copy_offset_node( + graph, in, range, src_offset, dst_offset, out_ref, false, true); src_offset[0] += range[0]; } @@ -62,7 +63,8 @@ void add_split_with_sizes_default_node( for (ValueRef out_ref : *out_list) { vTensorPtr t_out = graph.get_tensor(out_ref); utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref); + add_copy_offset_node( + graph, in, range, src_offset, dst_offset, out_ref, false, true); src_offset[1] += range[1]; } @@ -73,7 +75,8 @@ void add_split_with_sizes_default_node( for (ValueRef out_ref : *out_list) { vTensorPtr t_out = graph.get_tensor(out_ref); utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref); + add_copy_offset_node( + graph, in, range, src_offset, dst_offset, out_ref, false, true); src_offset[2] += range[2]; } From 22b480e1e0a2bd47bb1512ef78da8924517e6f48 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:48:10 -0700 Subject: [PATCH 2/3] [ET-VK] Adding source_offset processing to copy_packed_dim_offset function. Pull Request resolved: https://github.com/pytorch/executorch/pull/9344 This diff change `copy_packed_dim_offset` function and associated shader to handle the source_offset parameter. This change will help enable all tensor packing for slice op. ghstack-source-id: 272554186 @exported-using-ghexport Differential Revision: [D71349217](https://our.internmc.facebook.com/intern/diff/D71349217/) --- .../ops/glsl/copy_packed_dim_offset.glsl | 48 ++++++++++++++++--- .../vulkan/runtime/graph/ops/impl/Copy.cpp | 28 +++++++++-- 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/copy_packed_dim_offset.glsl b/backends/vulkan/runtime/graph/ops/glsl/copy_packed_dim_offset.glsl index 02ea6405b4a..e0f09f0be43 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/copy_packed_dim_offset.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/copy_packed_dim_offset.glsl @@ -44,15 +44,49 @@ void main() { return; } - // Starting offset to write at within a texel - const int out_lane_offset = dst_offset[packed_dim] & 0x3; - const bool has_lane_offset = out_lane_offset != 0; - // Position in input tensor - const ivec3 in_pos = pos + src_offset.xyz; + ivec3 in_pos = pos + src_offset.xyz; + in_pos[packed_dim] = pos[packed_dim] + (src_offset[packed_dim] >> 2); // Read input value mapping to this output texel - const VEC4_T in_value = load_texel_lpos(t_in, in_pos, in_axis_map); + VEC4_T in_value = load_texel_lpos(t_in, in_pos, in_axis_map); + + // Starting offset to read from a texel + const int src_lane_offset = src_offset[packed_dim] & 0x3; + const bool has_src_lane_offset = src_lane_offset != 0; + + // If input lane offset is non zero i.e packed texel is composed from multiple sources + if (has_src_lane_offset) { + // Boundary values will come from next input texel in the packed dim. + ivec3 next_in_pos = in_pos; + next_in_pos[packed_dim] = in_pos[packed_dim] + 1; + VEC4_T next_value = load_texel_lpos(t_in, next_in_pos, in_axis_map); + + // Keep input values from the end of current input pixel based on src_lane_offset + // offset 1 means the first lane of current input texel is not a part of the output texel + // offset 2 means first 2 lanes are not and so on + if (src_lane_offset == 1) { + in_value.xyz = in_value.yzw; + } else if (src_lane_offset == 2) { + in_value.xy = in_value.zw; + } else { + in_value.x = in_value.w; + } + // Copy next texel's values towards the end of input texel, based on lane offset + // offset 1 means the first lane from next texel is part of the input texel + // offset 2 means first 2 lanes from next texel is part of the input texel and so on + if (src_lane_offset == 1) { + in_value.w = next_value.x; + } else if (src_lane_offset == 2) { + in_value.zw = next_value.xy; + } else { + in_value.yzw = next_value.xyz; + } + } + + // Starting offset to write at within a texel + const int out_lane_offset = dst_offset[packed_dim] & 0x3; + const bool has_dst_lane_offset = out_lane_offset != 0; ivec3 out_pos = pos + dst_offset.xyz; out_pos[packed_dim] = pos[packed_dim] + (dst_offset[packed_dim] >> 2); @@ -60,7 +94,7 @@ void main() { VEC4_T out_value; // If lane offset is non zero i.e packed texel is composed from multiple sources - if (has_lane_offset) { + if (has_dst_lane_offset) { // When position in packed dim is > 0 if (pos[packed_dim] > 0) { // Boundary values will come from previous input texel in the packed dim. diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp index 2ecc7400d3e..5756d3a9052 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp @@ -92,19 +92,37 @@ void add_copy_packed_dim_offset_node( ivec4 final_range = { range[0], range[1], range[2], dim_at(t_in->sizes(), kBatch4D)}; ivec3 global_wg_size = t_out->logical_limits(); + // The starting offset in a texel where this tensor will start copying from + const auto src_lane_offset = src_offset[packed_dim] & 0x3; // The starting offset in a texel where this tensor will start copying to const auto dst_lane_offset = dst_offset[packed_dim] & 0x3; + + // The total packed texels this tensor will be copied from + // The first texel of tensor data in packed dimension will be copied from + // remaining lanes from current source Hence (4 - src_lane_offset) is added + // to tensor size in packed dimension + const auto src_packed_size = utils::div_up_4( + (4 - src_lane_offset) + + dim_at(t_out->sizes(), normalize_to_dim_index(*t_out, packed_dim))); + // The total packed texels this tensor will be copied to - // The first texel of tensor data in packed dimension will be copied to remain - // lanes from previous write Hence (4 - dst_lane_offset) is added to tensor - // size in packed dimension + // The first texel of tensor data in packed dimension will be copied to + // remaining lanes from previous write Hence (4 - dst_lane_offset) is added to + // tensor size in packed dimension const auto dst_packed_size = utils::div_up_4( (4 - dst_lane_offset) + dim_at(t_in->sizes(), normalize_to_dim_index(*t_in, packed_dim))); - // If the starting offset is not 0, and the total packed texels is greater + // If the starting src offset is not 0, and the total packed texels is greater // than the source texel range - if (dst_lane_offset != 0 && dst_packed_size > final_range[packed_dim]) { + const bool has_additional_src_work = + src_lane_offset != 0 && src_packed_size > final_range[packed_dim]; + // If the starting dst offset is not 0, and the total packed texels is greater + // than the source texel range + const bool has_additional_dst_work = + dst_lane_offset != 0 && dst_packed_size > final_range[packed_dim]; + + if (has_additional_src_work || has_additional_dst_work) { global_wg_size[packed_dim]++; // Increase the global work group size in // packed dimension final_range[packed_dim]++; // Increase the range in packed dimension From 6b58d463d57be9c5f14dcd9004a4d56de77307d2 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:48:11 -0700 Subject: [PATCH 3/3] [ET-VK] Adding all tensor packing support to split op. Pull Request resolved: https://github.com/pytorch/executorch/pull/9345 This diff updates Executorch Vulkan backend's `split` operation to support width, height and channel packed tensors. It also updates the op_registry.py file to indicate `split` operation supports all packing and adds new test cases to the cases.py file to test the operation. ghstack-source-id: 272554188 @exported-using-ghexport Differential Revision: [D71345589](https://our.internmc.facebook.com/intern/diff/D71345589/) --- backends/vulkan/op_registry.py | 4 +- .../vulkan/runtime/graph/ops/impl/Split.cpp | 90 +++++++++---------- backends/vulkan/test/op_tests/cases.py | 13 +++ 3 files changed, 58 insertions(+), 49 deletions(-) diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py index f2b80c2e544..5aa805dc1b3 100644 --- a/backends/vulkan/op_registry.py +++ b/backends/vulkan/op_registry.py @@ -528,8 +528,6 @@ def register_view_op(features: OpFeatures): exir_ops.edge.aten.index_select.default, exir_ops.edge.aten.select_copy.int, # Tensor combination - exir_ops.edge.aten.split_with_sizes_copy.default, - exir_ops.edge.aten.split.Tensor, exir_ops.edge.aten.repeat.default, # Tensor creation exir_ops.edge.aten.arange.start_step, @@ -563,6 +561,8 @@ def register_ported_op(features: OpFeatures): exir_ops.edge.aten.permute_copy.default, # Tensor combination exir_ops.edge.aten.cat.default, + exir_ops.edge.aten.split_with_sizes_copy.default, + exir_ops.edge.aten.split.Tensor, ] ) def register_ported_op_all_packed_dims(features: OpFeatures): diff --git a/backends/vulkan/runtime/graph/ops/impl/Split.cpp b/backends/vulkan/runtime/graph/ops/impl/Split.cpp index b74317b078e..8002dadc538 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Split.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Split.cpp @@ -25,8 +25,6 @@ void add_split_with_sizes_default_node( ValueRef out_list_ref) { vTensorPtr t_in = graph.get_tensor(in); - VK_CHECK_COND(check_packed_dim_is(*t_in, WHCN::kChannelsDim)); - ValueListPtr out_list = graph.get_value_list(out_list_ref); DimIndex dim_index = normalize_to_dim_index(*t_in, dim); @@ -38,62 +36,60 @@ void add_split_with_sizes_default_node( ValueRef out_ref = (*out_list)[split_idx]; vTensorPtr t_out = graph.get_tensor(out_ref); - VK_CHECK_COND(check_packed_dim_is(*t_out, WHCN::kChannelsDim)); VK_CHECK_COND(dim_at(*t_out, dim_index) == split_size); } - if (dim_index == kWidth4D) { - utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false); - utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false); + const auto packed_dim = t_in->packed_dim(); + const auto packed_dim_index = static_cast(kWidth4D - packed_dim); - for (ValueRef out_ref : *out_list) { - // Doesn't need to use split_size since we have already verified that the - // output tensor's size matches with the split_size. - vTensorPtr t_out = graph.get_tensor(out_ref); - utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node( - graph, in, range, src_offset, dst_offset, out_ref, false, true); + // Index of dimension to be concatenated in (w, h, c * b) coordinate system + const auto dim_xyz_index = std::min(2, -dim_index - 1); - src_offset[0] += range[0]; - } - } else if (dim_index == kHeight4D) { - utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false); - utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false); + utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false); + utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false); - for (ValueRef out_ref : *out_list) { - vTensorPtr t_out = graph.get_tensor(out_ref); - utils::ivec3 range = t_out->logical_limits(); - add_copy_offset_node( - graph, in, range, src_offset, dst_offset, out_ref, false, true); + const bool is_splitting_channel = (dim_index == kChannel4D); - src_offset[1] += range[1]; - } - } else if (dim_index == kBatch4D) { - utils::ivec4 src_offset = utils::make_ivec4({0, 0, 0, 0}, false); - utils::ivec4 dst_offset = utils::make_ivec4({0, 0, 0, 0}, false); + // if splitting channels + if (is_splitting_channel) { + // set source offset w as channel size of the input tensor + src_offset[3] = dim_at(t_in->sizes(), kChannel4D); + } - for (ValueRef out_ref : *out_list) { - vTensorPtr t_out = graph.get_tensor(out_ref); - utils::ivec3 range = t_out->logical_limits(); + for (ValueRef out_ref : *out_list) { + // Doesn't need to use split_size since we have already verified that the + // output tensor's size matches with the split_size. + vTensorPtr t_out = graph.get_tensor(out_ref); + const auto out_channel_size = dim_at(t_out->sizes(), kChannel4D); + utils::ivec3 range = t_out->logical_limits(); + + if (dim_index == packed_dim_index) { + // if splitting channels, use add_copy_channel_offset_node function as + // add_copy_packed_dim_offset_node does not support channel packing + if (is_splitting_channel) { + add_copy_channel_offset_node( + graph, in, out_channel_size, src_offset[2], dst_offset[2], out_ref); + src_offset[dim_xyz_index] += out_channel_size; + } else { + // dst_offset[3] is not used now but will be used in the future when + // add_copy_packed_dim_offset_node will support channel packing + // + // set destination offset w as channel size of the output tensor if + // splitting channel + dst_offset[3] = is_splitting_channel ? out_channel_size : 0; + add_copy_packed_dim_offset_node( + graph, in, range, src_offset, dst_offset, out_ref); + src_offset[dim_xyz_index] += dim_at(t_out->sizes(), packed_dim_index); + } + } else { + // set destination offset w as channel size of the output tensor if + // splitting channels + dst_offset[3] = is_splitting_channel ? out_channel_size : 0; add_copy_offset_node( graph, in, range, src_offset, dst_offset, out_ref, false, true); - - src_offset[2] += range[2]; - } - } else if (dim_index == kChannel4D) { - int32_t src_offset = 0; - int32_t dst_offset = 0; - - for (ValueRef out_ref : *out_list) { - vTensorPtr t_out = graph.get_tensor(out_ref); - int32_t range = dim_at(t_out->sizes()); - add_copy_channel_offset_node( - graph, in, range, src_offset, dst_offset, out_ref); - src_offset += range; + src_offset[dim_xyz_index] += + is_splitting_channel ? out_channel_size : range[dim_xyz_index]; } - - } else { - VK_THROW("not ipmlemented"); } } diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py index e4f7ac15434..41d8edf1f25 100644 --- a/backends/vulkan/test/op_tests/cases.py +++ b/backends/vulkan/test/op_tests/cases.py @@ -922,14 +922,20 @@ def get_split_with_sizes_inputs(): Test = namedtuple("VkSliceTest", ["self", "sizes", "dim"]) test_cases = [ # Split on Width + Test(self=(S1, 7, 10, 11), sizes=[1, 3, 2, 5], dim=3), Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=3), + Test(self=(7, 10, 11), sizes=[1, 3, 2, 5], dim=2), Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=2), + Test(self=(7, 10, 11), sizes=[3, 8], dim=2), Test(self=(7, 10, 10), sizes=[1, 9], dim=2), Test(self=(10, 10), sizes=[1, 9], dim=1), Test(self=(10,), sizes=[1, 9], dim=0), # Split on Height + Test(self=(S1, 7, 11, 10), sizes=[1, 3, 2, 5], dim=2), Test(self=(S1, 7, 10, 10), sizes=[1, 2, 3, 4], dim=2), + Test(self=(7, 11, 10), sizes=[1, 3, 2, 5], dim=1), Test(self=(7, 10, 10), sizes=[1, 2, 3, 4], dim=1), + Test(self=(7, 11, 11), sizes=[3, 8], dim=1), Test(self=(7, 10, 10), sizes=[10], dim=1), Test(self=(7, 6, 10), sizes=[1, 1, 1, 1, 1, 1], dim=1), Test(self=(10, 10), sizes=[1, 2, 3, 4], dim=0), @@ -937,8 +943,11 @@ def get_split_with_sizes_inputs(): Test(self=(10, 7, 10, 10), sizes=[3, 6, 1], dim=0), Test(self=(10, 7, 10, 10), sizes=[10], dim=0), # Split on Channel + Test(self=(7, 13, 4, 8), sizes=[3, 5, 2, 3], dim=1), Test(self=(7, 13, 4, 8), sizes=[3, 6, 1, 3], dim=1), + Test(self=(7, 13, 4, 8), sizes=[3, 2, 2, 5, 1], dim=1), Test(self=(7, 13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=1), + Test(self=(13, 4, 8), sizes=[3, 5, 2, 1, 2], dim=0), Test(self=(13, 4, 8), sizes=[3, 3, 3, 3, 1], dim=0), Test(self=(13, 4, 8), sizes=[2, 9, 2], dim=0), Test(self=(13, 4, 8), sizes=[13], dim=0), @@ -946,6 +955,8 @@ def get_split_with_sizes_inputs(): test_suite = VkTestSuite([tuple(tc) for tc in test_cases]) test_suite.layouts = [ + "utils::kWidthPacked", + "utils::kHeightPacked", "utils::kChannelsPacked", ] test_suite.data_gen = "make_seq_tensor" @@ -997,6 +1008,8 @@ def get_split_tensor_inputs(): ) test_suite.layouts = [ + "utils::kWidthPacked", + "utils::kHeightPacked", "utils::kChannelsPacked", ] test_suite.data_gen = "make_seq_tensor"