From 8dda88e9550bb9e7eefa448d88a9498fcc90c69f Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Wed, 4 Jun 2025 16:09:28 -0700 Subject: [PATCH] Add index check for embedding kernel (#11375) Summary: index should always be smaller than weight.size(0). Adding this check in `op_embedding`. This is to avoid wild-addr-read error: ``` AddressSanitizer:DEADLYSIGNAL ================================================================= ==3544359==ERROR: AddressSanitizer: SEGV on unknown address 0x7fce2364bc00 (pc 0x000002d225a0 bp 0x7ffffc792a40 sp 0x7ffffc792990 T0) ==3544359==The signal is caused by a READ memory access. SCARINESS: 20 (wild-addr-read) #0 0x2d225a0 in void torch::executor::native::(anonymous namespace)::embedding_byte_per_channel(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor&) xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:175 https://github.com/pytorch/executorch/issues/1 0x2d22367 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 https://github.com/pytorch/executorch/issues/2 0x2d2223d in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 https://github.com/pytorch/executorch/issues/3 0x2d21d37 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const::'lambda0'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 https://github.com/pytorch/executorch/issues/4 0x2d21bca in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const::'lambda'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 https://github.com/pytorch/executorch/issues/5 0x2d20f8f in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional, executorch::runtime::etensor::Tensor&)::$_0::operator()() const::'lambda0'()::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 https://github.com/pytorch/executorch/issues/6 0x2d20e13 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional, executorch::runtime::etensor::Tensor&)::$_0::operator()() const xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 https://github.com/pytorch/executorch/issues/7 0x2d20d06 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional, executorch::runtime::etensor::Tensor&) xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:303 https://github.com/pytorch/executorch/issues/8 0x2d226b7 in torch::executor::native::quantized_embedding_byte_dtype_out(executorch::runtime::KernelRuntimeContext&, executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, long, long, executorch::runtime::etensor::Tensor const&, std::optional, executorch::runtime::etensor::Tensor&) xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:329 https://github.com/pytorch/executorch/issues/9 0x2d09bef in torch::executor::function::(anonymous namespace)::$_7::operator()(executorch::runtime::KernelRuntimeContext&, executorch::runtime::EValue**) const buck-out/v2/gen/fbsource/ff19a7e6cb17a7b1/xplat/executorch/kernels/quantized/__generated_lib_combined__/out/RegisterCodegenUnboxedKernelsEverything.cpp:322 https://github.com/pytorch/executorch/issues/10 0x2d09a70 in torch::executor::function::(anonymous namespace)::$_7::__invoke(executorch::runtime::KernelRuntimeContext&, executorch::runtime::EValue**) buck-out/v2/gen/fbsource/ff19a7e6cb17a7b1/xplat/executorch/kernels/quantized/__generated_lib_combined__/out/RegisterCodegenUnboxedKernelsEverything.cpp:297 https://github.com/pytorch/executorch/issues/11 0x27d769b in executorch::runtime::Method::execute_instruction() xplat/executorch/runtime/executor/method.cpp:1306 https://github.com/pytorch/executorch/issues/12 0x27d8c55 in executorch::runtime::Method::execute() xplat/executorch/runtime/executor/method.cpp:1550 https://github.com/pytorch/executorch/issues/13 0x27b1e25 in executorch::extension::Module::execute(std::__cxx11::basic_string, std::allocator> const&, std::vector> const&) xplat/executorch/extension/module/module.cpp:261 https://github.com/pytorch/executorch/issues/14 0x27afe43 in executorch::extension::Module::forward(std::vector> const&) xplat/executorch/extension/module/module.h:340 https://github.com/pytorch/executorch/issues/15 0x27e0519 in executorch::extension::llm::LlmBackboneRunner::run(std::shared_ptr const&, std::shared_ptr const&, std::shared_ptr const&) xplat/executorch/examples/models/fb/llama4/runner/llm_backbone_runner.cpp:58 https://github.com/pytorch/executorch/issues/16 0x27a35c9 in executorch::extension::llm::Llama4Runner::prefill_tokens(std::shared_ptr const&, std::shared_ptr const&, std::shared_ptr const&) xplat/executorch/examples/models/fb/llama4/runner/llama4_runner.cpp:133 https://github.com/pytorch/executorch/issues/17 0x885774 in main (/data/users/larryliu/fbsource/buck-out/v2/gen/fbsource/ff19a7e6cb17a7b1/xplat/cria/benchmark/llama4/__generation_main__/generation_main+0x885774) https://github.com/pytorch/executorch/issues/18 0x7fce2122c656 in __libc_start_call_main /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/csu/../sysdeps/nptl/libc_start_call_main.h:58:16 https://github.com/pytorch/executorch/issues/19 0x7fce2122c717 in __libc_start_main@GLIBC_2.2.5 /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/csu/../csu/libc-start.c:409:3 https://github.com/pytorch/executorch/issues/20 0x884c20 in _start /home/engshare/third-party2/glibc/2.34/src/glibc-2.34/csu/../sysdeps/x86_64/start.S:116 AddressSanitizer can not provide additional info. AddressSanitizer: SEGV xplat/executorch/kernels/quantized/cpu/op_embedding.cpp:175 in void torch::executor::native::(anonymous namespace)::embedding_byte_per_channel(executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor const&, std::optional const&, executorch::runtime::etensor::Tensor const&, executorch::runtime::etensor::Tensor&) ==3544359==ABORTING ``` Test Plan: Imported from GitHub, without a `Test Plan:` line. Rollback Plan: Reviewed By: Gasoonjia Differential Revision: D75982682 Pulled By: larryliu0820 --- kernels/quantized/cpu/op_embedding.cpp | 16 +++++++++ kernels/quantized/test/op_embedding_test.cpp | 35 ++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/kernels/quantized/cpu/op_embedding.cpp b/kernels/quantized/cpu/op_embedding.cpp index b297d91870a..c43755ed3da 100644 --- a/kernels/quantized/cpu/op_embedding.cpp +++ b/kernels/quantized/cpu/op_embedding.cpp @@ -153,6 +153,22 @@ void embedding_byte_per_channel( for (int i = 0; i < indices.numel(); i++) { int64_t index = indices_ptr[i]; + + // Check if index is out of bounds for both weight and weight_scales + ET_CHECK_MSG( + index >= 0 && index < weight.size(0), + "Index out of bounds for weight: index %" PRId64 + " must be in range [0, %zd)", + index, + weight.size(0)); + + ET_CHECK_MSG( + index >= 0 && index < weight_scales.size(0), + "Index out of bounds for weight_scales: index %" PRId64 + " must be in range [0, %zd)", + index, + weight_scales.size(0)); + // If using groupwise embedding int32_t qparams_index = index * num_groups_per_channel; CTYPE_PARAMS zp = 0.0; diff --git a/kernels/quantized/test/op_embedding_test.cpp b/kernels/quantized/test/op_embedding_test.cpp index 6c949bd6e69..68359f5e45b 100644 --- a/kernels/quantized/test/op_embedding_test.cpp +++ b/kernels/quantized/test/op_embedding_test.cpp @@ -373,3 +373,38 @@ TEST(OpQuantizedEmbeddingTest, TestGroupWiseQuantizedEmbeddingDeath5) { out), ""); } + +TEST(OpQuantizedEmbeddingTest, TestOutOfBoundsIndex) { + et_pal_init(); + TensorFactory tf; + TensorFactory tf_l; + + int64_t quant_min = 0; + int64_t quant_max = 255; + + // Create a weight tensor with 3 rows + TensorFactory tfo; + Tensor qweight = + tfo.make({3, 4}, {8, 10, 12, 14, 10, 12, 12, 14, 8, 9, 10, 12}); + + // Create weight_scales with the same number of rows + Tensor weight_scales = tf.make({3, 1}, {0.5, 1.0, 1.5}); + Tensor weight_zero_points = tf.make({3, 1}, {1, 5, 7}); + + // Create indices with an out-of-bounds index (3, which is >= weight.size(0)) + Tensor indices = tf_l.make({2}, {1, 3}); + + Tensor out = tf.zeros({2, 4}); + + // Expect death when accessing an out-of-bounds index + ET_EXPECT_DEATH( + quantized_embedding_byte_out( + qweight, + weight_scales, + weight_zero_points, + quant_min, + quant_max, + indices, + out), + "Index out of bounds for weight"); +}