Add out variants for softmax and log_softmax

tugsbayasgalan · pytorchmergebot · commit 4766b6824c89 · 2022-04-21T22:40:50.000Z
Pull Request resolved: #75833 Approved by: https://github.com/ngimel
diff --git a/aten/src/ATen/native/SoftMax.cpp b/aten/src/ATen/native/SoftMax.cpp
@@ -438,6 +438,43 @@ Tensor softmax(const Tensor& input_, const int64_t dim_, c10::optional<ScalarTyp
   return result;
 }
 
+Tensor& softmax_out(
+    const Tensor& input_,
+    const int64_t dim_,
+    c10::optional<ScalarType> dtype,
+    Tensor& output_) {
+  Tensor output_temp;
+  if (input_.is_cuda() && input_.scalar_type() == ScalarType::Half &&
+      dtype == ScalarType::Float) {
+    if (!output_.is_contiguous()) {
+      auto options =
+          TensorOptions().dtype(output_.dtype()).device(output_.device());
+      output_temp = at::empty(output_.sizes(), options);
+      at::_softmax_out(output_temp, input_, dim_, true);
+    } else {
+      at::_softmax_out(output_, input_, dim_, true);
+    }
+  } else {
+    Tensor converted =
+        dtype.has_value() ? input_.toType(dtype.value()) : input_;
+    if (!output_.is_contiguous()) {
+      auto options =
+          TensorOptions().dtype(output_.dtype()).device(output_.device());
+      output_temp = at::empty(output_.sizes(), options);
+      at::_softmax_out(output_temp, converted, dim_, false);
+    } else {
+      at::_softmax_out(output_, converted, dim_, false);
+    }
+  }
+
+  if (!output_.is_contiguous()) {
+    output_.resize_(output_temp.sizes());
+    output_.copy_(output_temp);
+  }
+
+  return output_;
+}
+
 // special_softmax, alias for softmax
 Tensor special_softmax(const Tensor& input_, const int64_t dim_, c10::optional<ScalarType> dtype) {
   return at::softmax(input_, dim_, dtype);
@@ -466,6 +503,43 @@ Tensor log_softmax(const Tensor& input_, const int64_t dim_, c10::optional<Scala
   return result;
 }
 
+Tensor& log_softmax_out(
+    const Tensor& input_,
+    const int64_t dim_,
+    c10::optional<ScalarType> dtype,
+    Tensor& output_) {
+  Tensor output_temp;
+  if (input_.is_cuda() && input_.scalar_type() == ScalarType::Half &&
+      dtype == ScalarType::Float) {
+    if (!output_.is_contiguous()) {
+      auto options =
+          TensorOptions().dtype(output_.dtype()).device(output_.device());
+      output_temp = at::empty(output_.sizes(), options);
+      at::_log_softmax_out(output_temp, input_, dim_, true);
+    } else {
+      at::_log_softmax_out(output_, input_, dim_, true);
+    }
+  } else {
+    Tensor converted =
+        dtype.has_value() ? input_.toType(dtype.value()) : input_;
+    if (!output_.is_contiguous()) {
+      auto options =
+          TensorOptions().dtype(output_.dtype()).device(output_.device());
+      output_temp = at::empty(output_.sizes(), options);
+      at::_log_softmax_out(output_temp, converted, dim_, false);
+    } else {
+      at::_log_softmax_out(output_, converted, dim_, false);
+    }
+  }
+
+  if (!output_.is_contiguous()) {
+    output_.resize_(output_temp.sizes());
+    output_.copy_(output_temp);
+  }
+
+  return output_;
+}
+
 Tensor special_log_softmax(const Tensor& input, const int64_t dim, c10::optional<ScalarType> dtype) {
   return at::log_softmax(input, dim, dtype);
 }
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -2821,6 +2821,11 @@
 - func: log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
   variants: function, method
 
+- func: log_softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)
+  variants: function
+  dispatch:
+    CompositeExplicitAutograd: log_softmax_out
+
 - func: log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
 
@@ -4131,6 +4136,11 @@
 - func: softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor
   variants: function, method
 
+- func: softmax.int_out(Tensor self, int dim, ScalarType? dtype=None, *, Tensor(a!) out) -> Tensor(a!)
+  variants: function
+  dispatch:
+    CompositeExplicitAutograd: softmax_out
+
 - func: softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
 
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
@@ -11275,7 +11275,7 @@ def generate_std_var_kwargs(t: torch.Tensor, **kwargs):
            assert_jit_shape_analysis=True,
            assert_autodiffed=True,
            supports_forward_ad=True,
-           supports_out=False),
+           supports_out=True),
     OpInfo('softmax',
            aliases=('special.softmax', 'nn.functional.softmax',),
            variant_test_name="with_dtype",
@@ -11284,7 +11284,7 @@ def generate_std_var_kwargs(t: torch.Tensor, **kwargs):
            sample_inputs_func=partial(sample_inputs_softmax_variant, with_dtype=True),
            assert_autodiffed=True,
            supports_forward_ad=True,
-           supports_out=False),
+           supports_out=True),
     # `softmin` supports different dtypes based on whether `dtype` argument,
     # is passed or not. Hence two OpInfo entries, one with dtype and other without.
     # https://github.com/pytorch/pytorch/issues/68752
@@ -15445,7 +15445,7 @@ def generate_std_var_kwargs(t: torch.Tensor, **kwargs):
     OpInfo(
         'log_softmax',
         aliases=('special.log_softmax', 'nn.functional.log_softmax'),
-        supports_out=False,
+        supports_out=True,
         dtypes=floating_types_and(torch.bfloat16),
         dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16),
         sample_inputs_func=sample_inputs_softmax_variant,
@@ -15455,7 +15455,7 @@ def generate_std_var_kwargs(t: torch.Tensor, **kwargs):
         'log_softmax',
         variant_test_name='dtype',
         aliases=('special.log_softmax', 'nn.functional.log_softmax'),
-        supports_out=False,
+        supports_out=True,
         dtypes=all_types_and_complex_and(torch.bool, torch.float16, torch.bfloat16),
         sample_inputs_func=partial(sample_inputs_softmax_variant, with_dtype=True),
         supports_forward_ad=True,