From 77428d5456855aa3f7dfd08524b79cd6160eb928 Mon Sep 17 00:00:00 2001
From: Stephen Canon <scanon@apple.com>
Date: Fri, 23 May 2025 08:21:27 -0400
Subject: [PATCH 1/5] Refactor SIMD concrete operations

Split these out into separate files for Mask, Integer, Float, since each has a somewhat different set of operations.
---
 stdlib/public/core/CMakeLists.txt             |  14 +-
 stdlib/public/core/GroupInfo.json             |   7 +-
 .../core/SIMDConcreteOperations.swift.gyb     | 371 ------------------
 .../SIMDFloatConcreteOperations.swift.gyb     |  90 +++++
 .../SIMDIntegerConcreteOperations.swift.gyb   | 117 ++++++
 .../core/SIMDMaskConcreteOperations.swift.gyb | 209 ++++++++++
 utils/SwiftIntTypes.py                        |  10 +
 7 files changed, 443 insertions(+), 375 deletions(-)
 delete mode 100644 stdlib/public/core/SIMDConcreteOperations.swift.gyb
 create mode 100644 stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
 create mode 100644 stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
 create mode 100644 stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb

diff --git a/stdlib/public/core/CMakeLists.txt b/stdlib/public/core/CMakeLists.txt
index 3fe6ca201d234..3dd29731a6355 100644
--- a/stdlib/public/core/CMakeLists.txt
+++ b/stdlib/public/core/CMakeLists.txt
@@ -268,11 +268,21 @@ split_embedded_sources(
   )
 
 if(SWIFT_STDLIB_ENABLE_VECTOR_TYPES)
+  split_embedded_sources(
+    OUT_LIST_EMBEDDED SWIFTLIB_EMBEDDED_VECTOR_GYB_SOURCES
+    OUT_LIST_NORMAL SWIFTLIB_VECTOR_GYB_SOURCES
+    
+    EMBEDDED SIMDIntegerConcreteOperations.swift.gyb
+    EMBEDDED SIMDFloatConcreteOperations.swift.gyb
+    EMBEDDED SIMDMaskConcreteOperations.swift.gyb
+    EMBEDDED SIMDVectorTypes.swift.gyb
+  )
+
   list(APPEND SWIFTLIB_SOURCES SIMDVector.swift)
-  list(APPEND SWIFTLIB_GYB_SOURCES SIMDConcreteOperations.swift.gyb SIMDVectorTypes.swift.gyb)
+  list(APPEND SWIFTLIB_GYB_SOURCES ${SWIFTLIB_VECTOR_GYB_SOURCES})
 
   list(APPEND SWIFTLIB_EMBEDDED_SOURCES SIMDVector.swift)
-  list(APPEND SWIFTLIB_EMBEDDED_GYB_SOURCES SIMDConcreteOperations.swift.gyb SIMDVectorTypes.swift.gyb)
+  list(APPEND SWIFTLIB_EMBEDDED_GYB_SOURCES ${SWIFTLIB_EMBEDDED_VECTOR_GYB_SOURCES})
 endif()
 
 # Freestanding and Linux/Android builds both have failures to resolve.
diff --git a/stdlib/public/core/GroupInfo.json b/stdlib/public/core/GroupInfo.json
index ab9f72243d030..e2487a585d0a9 100644
--- a/stdlib/public/core/GroupInfo.json
+++ b/stdlib/public/core/GroupInfo.json
@@ -182,9 +182,12 @@
       "FloatingPointTypes.swift",
       "FloatingPointRandom.swift"],
     "Vector": [
-      "SIMDConcreteOperations.swift",
+      "SIMDIntegerConcreteOperations.swift",
+      "SIMDFloatConcreteOperations.swift",
+      "SIMDMaskConcreteOperations.swift",
       "SIMDVector.swift",
-      "SIMDVectorTypes.swift"]}
+      "SIMDVectorTypes.swift"
+    ]}
   ],
   "Optional": [
     "Optional.swift"
diff --git a/stdlib/public/core/SIMDConcreteOperations.swift.gyb b/stdlib/public/core/SIMDConcreteOperations.swift.gyb
deleted file mode 100644
index bcaefd7c550c8..0000000000000
--- a/stdlib/public/core/SIMDConcreteOperations.swift.gyb
+++ /dev/null
@@ -1,371 +0,0 @@
-//===--- SIMDConcreteOperations.swift -------------------------*- swift -*-===//
-//
-// This source file is part of the Swift.org open source project
-//
-// Copyright (c) 2021 Apple Inc. and the Swift project authors
-// Licensed under Apache License v2.0 with Runtime Library Exception
-//
-// See https://swift.org/LICENSE.txt for license information
-// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
-//
-//===----------------------------------------------------------------------===//
-
-%{
-from SwiftIntTypes import all_integer_types
-word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
-storagescalarCounts = [2,4,8,16,32,64]
-vectorscalarCounts = storagescalarCounts + [3]
-}%
-
-%for int in all_integer_types(word_bits):
-% Scalar = int.stdlib_name
-% for n in vectorscalarCounts:
-%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
-%  storageN = 4 if n == 3 else n
-%  s = "s" if int.is_signed else "u"
-%  Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
-%  MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
-%  if int.is_signed:
-extension SIMDMask where Storage == ${Vector} {
-  @_alwaysEmitIntoClient
-  internal init(_ _builtin: Builtin.${Builtin}) {
-    _storage = ${Vector}(_builtin)
-  }
-  
-  @_alwaysEmitIntoClient
-  internal static var allTrue: Self {
-    let zero = ${Vector}()
-    return zero .== zero
-  }
-  
-  /// A vector mask that is the pointwise logical negation of the input.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = !a[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static prefix func .!(a: Self) -> Self {
-    a .^ .allTrue
-  }
-    
-  /// A vector mask that is the pointwise logical conjunction of the inputs.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] && b[i]
-  /// }
-  /// ```
-  ///
-  /// Note that unlike the scalar `&&` operator, the SIMD `.&` operator
-  /// always fully evaluates both arguments.
-  @_alwaysEmitIntoClient
-  public static func .&(a: Self, b: Self) -> Self {
-    Self(${Vector}(Builtin.and_${Builtin}(
-      a._storage._storage._value,
-      b._storage._storage._value
-    )))
-  }
-    
-  /// Replaces `a` with the pointwise logical conjunction of `a` and `b`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// for i in a.indices {
-  ///   a[i] = a[i] && b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .&=(a: inout Self, b: Self) {
-    a = a .& b
-  }
-      
-  /// A vector mask that is the pointwise exclusive or of the inputs.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] != b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .^(a: Self, b: Self) -> Self {
-    Self(${Vector}(Builtin.xor_${Builtin}(
-      a._storage._storage._value,
-      b._storage._storage._value
-    )))
-  }
-    
-  /// Replaces `a` with the pointwise exclusive or of `a` and `b`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// for i in a.indices {
-  ///   a[i] = a[i] != b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .^=(a: inout Self, b: Self) {
-    a = a .^ b
-  }
-      
-  /// A vector mask that is the pointwise logical disjunction of the inputs.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] || b[i]
-  /// }
-  /// ```
-  ///
-  /// Note that unlike the scalar `||` operator, the SIMD `.|` operator
-  /// always fully evaluates both arguments.
-  @_alwaysEmitIntoClient
-  public static func .|(a: Self, b: Self) -> Self {
-    Self(${Vector}(Builtin.or_${Builtin}(
-      a._storage._storage._value,
-      b._storage._storage._value
-    )))
-  }
-    
-  /// Replaces `a` with the pointwise logical disjunction of `a` and `b`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// for i in a.indices {
-  ///   a[i] = a[i] || b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .|=(a: inout Self, b: Self) {
-    a = a .| b
-  }
-    
-  /// A vector mask with the result of a pointwise equality comparison.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] == b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .==(a: Self, b: Self) -> Self {
-    .!(a .^ b)
-  }
-  
-  /// A vector mask with the result of a pointwise inequality comparison.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] != b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .!=(a: Self, b: Self) -> Self {
-    a .^ b
-  }
-    
-  /// Replaces elements of this vector with elements of `other` in the lanes
-  /// where `mask` is `true`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// for i in indices {
-  ///   if mask[i] { self[i] = other[i] }
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public mutating func replace(with other: Self, where mask: Self) {
-    self = replacing(with: other, where: mask)
-  }
-    
-  /// Returns a copy of this vector, with elements replaced by elements of
-  /// `other` in the lanes where `mask` is `true`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = Self()
-  /// for i in indices {
-  ///   result[i] = mask[i] ? other[i] : self[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public func replacing(with other: Self, where mask: Self) -> Self {
-    (self .& .!mask) .| (other .& mask)
-  }
-}
-
-%  end
-extension SIMD${n} where Scalar == ${Scalar} {
-  @_alwaysEmitIntoClient
-  internal init(_ _builtin: Builtin.${Builtin}) {
-    _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
-  }
-    
-  /// A vector mask with the result of a pointwise equality comparison.
-  @_alwaysEmitIntoClient
-  public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_eq_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise inequality comparison.
-  @_alwaysEmitIntoClient
-  public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_ne_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_${s}lt_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_${s}le_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise greater-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_${s}gt_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_${s}ge_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-    
-  /// The wrapping sum of two vectors.
-  @_alwaysEmitIntoClient
-  public static func &+(a: Self, b: Self) -> Self {
-    Self(Builtin.add_${Builtin}(a._storage._value, b._storage._value))
-  }
-    
-  /// The wrapping difference of two vectors.
-  @_alwaysEmitIntoClient
-  public static func &-(a: Self, b: Self) -> Self {
-    Self(Builtin.sub_${Builtin}(a._storage._value, b._storage._value))
-  }
-    
-  /// The pointwise wrapping product of two vectors.
-  @_alwaysEmitIntoClient
-  public static func &*(a: Self, b: Self) -> Self {
-    Self(Builtin.mul_${Builtin}(a._storage._value, b._storage._value))
-  }
-        
-  /// Updates the left hand side with the wrapping sum of the two
-  /// vectors.
-  @_alwaysEmitIntoClient
-  public static func &+=(a: inout Self, b: Self) { a = a &+ b }
-    
-  /// Updates the left hand side with the wrapping difference of the two
-  /// vectors.
-  @_alwaysEmitIntoClient
-  public static func &-=(a: inout Self, b: Self) { a = a &- b }
-    
-  /// Updates the left hand side with the pointwise wrapping product of two
-  /// vectors.
-  @_alwaysEmitIntoClient
-  public static func &*=(a: inout Self, b: Self) { a = a &* b }
-}
-
-% end
-%end
-
-%for (Scalar, bits) in [('Float16',16), ('Float',32), ('Double',64)]:
-% for n in vectorscalarCounts:
-%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
-%  storageN = 4 if n == 3 else n
-%  Builtin = "Vec" + str(storageN) + "xFPIEEE" + str(bits)
-%  VecPre = "Vec" + str(storageN) + "x"
-%  MaskExt = "Builtin.sext_" + VecPre + "Int1_" + VecPre + "Int" + str(bits)
-%  if bits == 16:
-#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64))
-@available(SwiftStdlib 5.3, *)
-%  end
-extension SIMD${n} where Scalar == ${Scalar} {
-  @_alwaysEmitIntoClient
-  internal init(_ _builtin: Builtin.${Builtin}) {
-    _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
-  }
-  
-  /// A vector mask with the result of a pointwise equality comparison.
-  @_alwaysEmitIntoClient
-  public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_oeq_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise inequality comparison.
-  @_alwaysEmitIntoClient
-  public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_une_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_olt_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_ole_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise greater-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_ogt_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_oge_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-}
-%  if bits == 16:
-#endif
-%  end
-
-% end
-%end
diff --git a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
new file mode 100644
index 0000000000000..9108b542ac4e2
--- /dev/null
+++ b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
@@ -0,0 +1,90 @@
+//===--- SIMDFloatConcreteOperations.swift --------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+%{
+from SwiftIntTypes import all_integer_types
+word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
+storagescalarCounts = [2,4,8,16,32,64]
+vectorscalarCounts = storagescalarCounts + [3]
+}%
+
+%for (Scalar, bits) in [('Float16',16), ('Float',32), ('Double',64)]:
+% for n in vectorscalarCounts:
+%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
+%  storageN = 4 if n == 3 else n
+%  Builtin = "Vec" + str(storageN) + "xFPIEEE" + str(bits)
+%  VecPre = "Vec" + str(storageN) + "x"
+%  MaskExt = "Builtin.sext_" + VecPre + "Int1_" + VecPre + "Int" + str(bits)
+%  if bits == 16:
+#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64))
+@available(SwiftStdlib 5.3, *)
+%  end
+extension SIMD${n} where Scalar == ${Scalar} {
+  @_alwaysEmitIntoClient
+  internal init(_ _builtin: Builtin.${Builtin}) {
+    _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
+  }
+  
+  /// A vector mask with the result of a pointwise equality comparison.
+  @_alwaysEmitIntoClient
+  public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_oeq_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise inequality comparison.
+  @_alwaysEmitIntoClient
+  public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_une_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise less-than comparison.
+  @_alwaysEmitIntoClient
+  public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_olt_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
+  @_alwaysEmitIntoClient
+  public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_ole_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise greater-than comparison.
+  @_alwaysEmitIntoClient
+  public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_ogt_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
+  @_alwaysEmitIntoClient
+  public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_oge_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+}
+%  if bits == 16:
+#endif
+%  end
+
+% end
+%end
diff --git a/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb b/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
new file mode 100644
index 0000000000000..01ab5ab57d546
--- /dev/null
+++ b/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
@@ -0,0 +1,117 @@
+//===--- SIMDIntegerConcreteOperations.swift ------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+%{
+from SwiftIntTypes import all_integer_types
+word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
+storagescalarCounts = [2,4,8,16,32,64]
+vectorscalarCounts = storagescalarCounts + [3]
+}%
+
+%for int in all_integer_types(word_bits):
+% Scalar = int.stdlib_name
+% for n in vectorscalarCounts:
+%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
+%  storageN = 4 if n == 3 else n
+%  s = "s" if int.is_signed else "u"
+%  Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
+%  MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
+extension SIMD${n} where Scalar == ${Scalar} {
+  @_alwaysEmitIntoClient
+  internal init(_ _builtin: Builtin.${Builtin}) {
+    _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
+  }
+    
+  /// A vector mask with the result of a pointwise equality comparison.
+  @_alwaysEmitIntoClient
+  public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_eq_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise inequality comparison.
+  @_alwaysEmitIntoClient
+  public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_ne_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise less-than comparison.
+  @_alwaysEmitIntoClient
+  public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_${s}lt_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
+  @_alwaysEmitIntoClient
+  public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_${s}le_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise greater-than comparison.
+  @_alwaysEmitIntoClient
+  public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_${s}gt_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
+  @_alwaysEmitIntoClient
+  public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_${s}ge_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+    
+  /// The wrapping sum of two vectors.
+  @_alwaysEmitIntoClient
+  public static func &+(a: Self, b: Self) -> Self {
+    Self(Builtin.add_${Builtin}(a._storage._value, b._storage._value))
+  }
+    
+  /// The wrapping difference of two vectors.
+  @_alwaysEmitIntoClient
+  public static func &-(a: Self, b: Self) -> Self {
+    Self(Builtin.sub_${Builtin}(a._storage._value, b._storage._value))
+  }
+    
+  /// The pointwise wrapping product of two vectors.
+  @_alwaysEmitIntoClient
+  public static func &*(a: Self, b: Self) -> Self {
+    Self(Builtin.mul_${Builtin}(a._storage._value, b._storage._value))
+  }
+        
+  /// Updates the left hand side with the wrapping sum of the two
+  /// vectors.
+  @_alwaysEmitIntoClient
+  public static func &+=(a: inout Self, b: Self) { a = a &+ b }
+    
+  /// Updates the left hand side with the wrapping difference of the two
+  /// vectors.
+  @_alwaysEmitIntoClient
+  public static func &-=(a: inout Self, b: Self) { a = a &- b }
+    
+  /// Updates the left hand side with the pointwise wrapping product of two
+  /// vectors.
+  @_alwaysEmitIntoClient
+  public static func &*=(a: inout Self, b: Self) { a = a &* b }
+}
+
+% end
+%end
diff --git a/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb b/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
new file mode 100644
index 0000000000000..a93bf7f34e36e
--- /dev/null
+++ b/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
@@ -0,0 +1,209 @@
+//===--- SIMDMaskConcreteOperations.swift ---------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+%{
+from SwiftIntTypes import all_signed_types
+word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
+storagescalarCounts = [2,4,8,16,32,64]
+vectorscalarCounts = storagescalarCounts + [3]
+}%
+
+%for int in all_signed_types(word_bits):
+% Scalar = int.stdlib_name
+% for n in vectorscalarCounts:
+%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
+%  storageN = 4 if n == 3 else n
+%  s = "s"
+%  Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
+%  MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
+extension SIMDMask where Storage == ${Vector} {
+  @_alwaysEmitIntoClient
+  internal init(_ _builtin: Builtin.${Builtin}) {
+    _storage = ${Vector}(_builtin)
+  }
+  
+  @_alwaysEmitIntoClient
+  internal static var allTrue: Self {
+    let zero = ${Vector}()
+    return zero .== zero
+  }
+  
+  /// A vector mask that is the pointwise logical negation of the input.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = !a[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static prefix func .!(a: Self) -> Self {
+    a .^ .allTrue
+  }
+    
+  /// A vector mask that is the pointwise logical conjunction of the inputs.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] && b[i]
+  /// }
+  /// ```
+  ///
+  /// Note that unlike the scalar `&&` operator, the SIMD `.&` operator
+  /// always fully evaluates both arguments.
+  @_alwaysEmitIntoClient
+  public static func .&(a: Self, b: Self) -> Self {
+    Self(${Vector}(Builtin.and_${Builtin}(
+      a._storage._storage._value,
+      b._storage._storage._value
+    )))
+  }
+    
+  /// Replaces `a` with the pointwise logical conjunction of `a` and `b`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// for i in a.indices {
+  ///   a[i] = a[i] && b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .&=(a: inout Self, b: Self) {
+    a = a .& b
+  }
+      
+  /// A vector mask that is the pointwise exclusive or of the inputs.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] != b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .^(a: Self, b: Self) -> Self {
+    Self(${Vector}(Builtin.xor_${Builtin}(
+      a._storage._storage._value,
+      b._storage._storage._value
+    )))
+  }
+    
+  /// Replaces `a` with the pointwise exclusive or of `a` and `b`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// for i in a.indices {
+  ///   a[i] = a[i] != b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .^=(a: inout Self, b: Self) {
+    a = a .^ b
+  }
+      
+  /// A vector mask that is the pointwise logical disjunction of the inputs.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] || b[i]
+  /// }
+  /// ```
+  ///
+  /// Note that unlike the scalar `||` operator, the SIMD `.|` operator
+  /// always fully evaluates both arguments.
+  @_alwaysEmitIntoClient
+  public static func .|(a: Self, b: Self) -> Self {
+    Self(${Vector}(Builtin.or_${Builtin}(
+      a._storage._storage._value,
+      b._storage._storage._value
+    )))
+  }
+    
+  /// Replaces `a` with the pointwise logical disjunction of `a` and `b`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// for i in a.indices {
+  ///   a[i] = a[i] || b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .|=(a: inout Self, b: Self) {
+    a = a .| b
+  }
+    
+  /// A vector mask with the result of a pointwise equality comparison.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] == b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .==(a: Self, b: Self) -> Self {
+    .!(a .^ b)
+  }
+  
+  /// A vector mask with the result of a pointwise inequality comparison.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] != b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .!=(a: Self, b: Self) -> Self {
+    a .^ b
+  }
+    
+  /// Replaces elements of this vector with elements of `other` in the lanes
+  /// where `mask` is `true`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// for i in indices {
+  ///   if mask[i] { self[i] = other[i] }
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public mutating func replace(with other: Self, where mask: Self) {
+    self = replacing(with: other, where: mask)
+  }
+    
+  /// Returns a copy of this vector, with elements replaced by elements of
+  /// `other` in the lanes where `mask` is `true`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = Self()
+  /// for i in indices {
+  ///   result[i] = mask[i] ? other[i] : self[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public func replacing(with other: Self, where mask: Self) -> Self {
+    (self .& .!mask) .| (other .& mask)
+  }
+}
+
+% end
+%end
diff --git a/utils/SwiftIntTypes.py b/utils/SwiftIntTypes.py
index fec4f489710a7..431e2f147f237 100644
--- a/utils/SwiftIntTypes.py
+++ b/utils/SwiftIntTypes.py
@@ -72,6 +72,16 @@ def all_integer_types(word_bits):
             is_word=True, bits=word_bits,
             is_signed=is_signed)
 
+def all_signed_types(word_bits):
+    for bitwidth in _all_integer_type_bitwidths:
+        yield SwiftIntegerType(
+            is_word=False, bits=bitwidth,
+            is_signed=True)
+    
+    yield SwiftIntegerType(
+        is_word=True, bits=word_bits,
+        is_signed=True)
+
 # 'truncatingBitPattern' initializer is defined if the conversion is truncating
 # on any platform that Swift supports.
 

From 37d2a2f9a26345f1892afe58471a13c2124d803d Mon Sep 17 00:00:00 2001
From: Stephen Canon <scanon@apple.com>
Date: Sat, 24 May 2025 20:53:21 -0400
Subject: [PATCH 2/5] Optimize repeating: intializers on concrete SIMD types

Previously these were not transparent, and so were not generally inlined in debug; that's basically always a win because LLVM knows how to optimize them to single instructions + some debugging bookkeeping (which we should also figure out how to eliminate). Add FileCheck tests that ensure that we get optimal -O codegen and near-optimal -Onone codegen.
---
 .../SIMDFloatConcreteOperations.swift.gyb     | 29 ++++++-
 .../SIMDIntegerConcreteOperations.swift.gyb   | 31 ++++++-
 .../core/SIMDMaskConcreteOperations.swift.gyb | 20 ++++-
 test/stdlib/SIMDFloatInitializers.swift.gyb   | 81 ++++++++++++++++++
 test/stdlib/SIMDMaskInitializers.swift.gyb    | 84 +++++++++++++++++++
 test/stdlib/SIMDSignedInitializers.swift.gyb  | 84 +++++++++++++++++++
 .../stdlib/SIMDUnsignedInitializers.swift.gyb | 83 ++++++++++++++++++
 7 files changed, 408 insertions(+), 4 deletions(-)
 create mode 100644 test/stdlib/SIMDFloatInitializers.swift.gyb
 create mode 100644 test/stdlib/SIMDMaskInitializers.swift.gyb
 create mode 100644 test/stdlib/SIMDSignedInitializers.swift.gyb
 create mode 100644 test/stdlib/SIMDUnsignedInitializers.swift.gyb

diff --git a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
index 9108b542ac4e2..71d2e91ba08ec 100644
--- a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
@@ -29,11 +29,38 @@ vectorscalarCounts = storagescalarCounts + [3]
 @available(SwiftStdlib 5.3, *)
 %  end
 extension SIMD${n} where Scalar == ${Scalar} {
-  @_alwaysEmitIntoClient
+  @_alwaysEmitIntoClient @_transparent
   internal init(_ _builtin: Builtin.${Builtin}) {
     _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
   }
   
+  @_alwaysEmitIntoClient @_transparent
+  public init(repeating scalar: ${Scalar}) {
+    let asVector = Builtin.insertelement_${Builtin}_FPIEEE${bits}_Int32(
+      Builtin.zeroInitializer(), scalar._value, Builtin.zeroInitializer()
+    )
+    let repeated = Builtin.shufflevector_${Builtin}_Vec${storageN}xInt32(
+      asVector, Builtin.zeroInitializer(), Builtin.zeroInitializer()
+    )
+%if n != 3:
+    self.init(repeated)
+%else:
+    self.init(Builtin.insertelement_${Builtin}_FPIEEE${bits}_Int32(
+      repeated, Builtin.zeroInitializer(), Int32(3)._value
+    ))
+%end
+  }
+  
+%  if n >= 4:
+  @_alwaysEmitIntoClient @_transparent
+  public init(
+    lowHalf: SIMD${n//2}<${Scalar}>,
+    highHalf: SIMD${n//2}<${Scalar}>
+  ) {
+    self = unsafe unsafeBitCast((lowHalf, highHalf), to: Self.self)
+  }
+  
+%  end
   /// A vector mask with the result of a pointwise equality comparison.
   @_alwaysEmitIntoClient
   public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
diff --git a/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb b/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
index 01ab5ab57d546..1c8ac2c162283 100644
--- a/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
@@ -26,11 +26,38 @@ vectorscalarCounts = storagescalarCounts + [3]
 %  Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
 %  MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
 extension SIMD${n} where Scalar == ${Scalar} {
-  @_alwaysEmitIntoClient
+  @_alwaysEmitIntoClient @_transparent
   internal init(_ _builtin: Builtin.${Builtin}) {
     _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
   }
-    
+  
+  @_alwaysEmitIntoClient @_transparent
+  public init(repeating scalar: ${Scalar}) {
+    let asVector = Builtin.insertelement_${Builtin}_Int${int.bits}_Int32(
+      Builtin.zeroInitializer(), scalar._value, Builtin.zeroInitializer()
+    )
+    let repeated = Builtin.shufflevector_${Builtin}_Vec${storageN}xInt32(
+      asVector, Builtin.zeroInitializer(), Builtin.zeroInitializer()
+    )
+%   if n != 3:
+    self.init(repeated)
+%   else:
+    self.init(Builtin.insertelement_${Builtin}_Int${int.bits}_Int32(
+      repeated, Builtin.zeroInitializer(), Int32(3)._value
+    ))
+%   end
+  }
+  
+%  if n >= 4:
+  @_alwaysEmitIntoClient @_transparent
+  public init(
+    lowHalf: SIMD${n//2}<${Scalar}>,
+    highHalf: SIMD${n//2}<${Scalar}>
+  ) {
+    self = unsafe unsafeBitCast((lowHalf, highHalf), to: Self.self)
+  }
+  
+%  end
   /// A vector mask with the result of a pointwise equality comparison.
   @_alwaysEmitIntoClient
   public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
diff --git a/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb b/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
index a93bf7f34e36e..aca6ef52d24bd 100644
--- a/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
@@ -26,11 +26,29 @@ vectorscalarCounts = storagescalarCounts + [3]
 %  Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
 %  MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
 extension SIMDMask where Storage == ${Vector} {
-  @_alwaysEmitIntoClient
+  @_alwaysEmitIntoClient @_transparent
   internal init(_ _builtin: Builtin.${Builtin}) {
     _storage = ${Vector}(_builtin)
   }
   
+  @_alwaysEmitIntoClient @_transparent
+  public init(repeating scalar: Bool) {
+    _storage = ${Vector}(repeating: scalar ? -1 : 0)
+  }
+  
+%  if n >= 4:
+  @_alwaysEmitIntoClient @_transparent
+  public init(
+    lowHalf: SIMDMask<SIMD${n//2}<${Scalar}>>,
+    highHalf: SIMDMask<SIMD${n//2}<${Scalar}>>
+  ) {
+    _storage = ${Vector}(
+      lowHalf: lowHalf._storage,
+      highHalf: highHalf._storage
+    )
+  }
+  
+%  end
   @_alwaysEmitIntoClient
   internal static var allTrue: Self {
     let zero = ${Vector}()
diff --git a/test/stdlib/SIMDFloatInitializers.swift.gyb b/test/stdlib/SIMDFloatInitializers.swift.gyb
new file mode 100644
index 0000000000000..decd27e3b2048
--- /dev/null
+++ b/test/stdlib/SIMDFloatInitializers.swift.gyb
@@ -0,0 +1,81 @@
+//===--- SIMDFloatInitializers.swift.gyb -------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// RUN: %empty-directory(%t)
+// RUN: %gyb %s -o %t/SIMDFloatInitializers.swift
+// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
+// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S -O | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
+
+import Swift
+
+%for bits in [16,32,64]:
+% scalar = {16:'Float16',32:'Float',64:'Double'}[bits]
+% for totalBits in [64,128]:
+%  n = totalBits // bits
+%  if n != 1:
+%   neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
+%   if bits == 16:
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+%   end
+func repeating${n}x${bits}(_ scalar: ${scalar}) -> SIMD${n}<${scalar}> {
+  SIMD${n}(repeating: scalar)
+}
+%   if bits == 16:
+#endif
+// CHECK-arm64: repeating${n}x${bits}{{[[:alnum:]]+}}:
+%   else:
+// CHECK: repeating${n}x${bits}{{[[:alnum:]]+}}:
+%   end
+// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, v0[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: dup.${neonSuffix}
+// CHECKOnone-arm64: ret
+
+%  end
+% end
+%end
+
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+func concat4x16(_ a: SIMD4<Float16>, _ b: SIMD4<Float16>) -> SIMD8<Float16> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK-arm64: _$s21SIMDFloatInitializers10concat4x16ys5SIMD8Vys7Float16VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+func concat8x16(_ a: SIMD8<Float16>, _ b: SIMD8<Float16>) -> SIMD16<Float16> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK-arm64: _$s21SIMDFloatInitializers10concat8x16ys6SIMD16Vys7Float16VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+#endif
+
+func concat2x32(_ a: SIMD2<Float>, _ b: SIMD2<Float>) -> SIMD4<Float> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s21SIMDFloatInitializers10concat2x32ys5SIMD4VySfGs5SIMD2VySfG_AHtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+
+func concat4x32(_ a: SIMD4<Float>, _ b: SIMD4<Float>) -> SIMD8<Float> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s21SIMDFloatInitializers10concat4x32ys5SIMD8VySfGs5SIMD4VySfG_AHtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x64(_ a: SIMD2<Double>, _ b: SIMD2<Double>) -> SIMD4<Double> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s21SIMDFloatInitializers10concat2x64ys5SIMD4VySdGs5SIMD2VySdG_AHtF:
+// CHECKO-arm64-NEXT: ret
diff --git a/test/stdlib/SIMDMaskInitializers.swift.gyb b/test/stdlib/SIMDMaskInitializers.swift.gyb
new file mode 100644
index 0000000000000..b92e1059bf7dd
--- /dev/null
+++ b/test/stdlib/SIMDMaskInitializers.swift.gyb
@@ -0,0 +1,84 @@
+//===--- SIMDMaskInitializers.swift.gyb -------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// RUN: %empty-directory(%t)
+// RUN: %gyb %s -o %t/SIMDMaskInitializers.swift
+// RUN: %target-swift-frontend -primary-file %t/SIMDMaskInitializers.swift -S | %FileCheck %t/SIMDMaskInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
+// RUN: %target-swift-frontend -primary-file %t/SIMDMaskInitializers.swift -S -O | %FileCheck %t/SIMDMaskInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
+
+import Swift
+
+%for bits in [8,16,32,64]:
+% for totalBits in [64,128]:
+%  n = totalBits // bits
+%  if n != 1:
+%   neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
+func repeating${n}_mask${bits}(_ scalar: Bool) -> SIMDMask<SIMD${n}<Int${bits}>> {
+  SIMDMask(repeating: scalar)
+}
+// CHECK: repeating${n}_mask${bits}{{[[:alnum:]]+}}:
+// CHECKO-arm64-NEXT: sbfx [[REG:[wx][0-9]]], {{[wx]}}0, #0, #1
+// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, [[REG]]
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: dup.${neonSuffix}
+// CHECKOnone-arm64: ret
+
+%  end
+% end
+%end
+
+func concat8x8(_ a: SIMDMask<SIMD8<Int8>>, _ b: SIMDMask<SIMD8<Int8>>) -> SIMDMask<SIMD16<Int8>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s20SIMDMaskInitializers9concat8x8ys0A0Vys6SIMD16Vys4Int8VGGADys5SIMD8VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat16x8(_ a: SIMDMask<SIMD16<Int8>>, _ b: SIMDMask<SIMD16<Int8>>) -> SIMDMask<SIMD32<Int8>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s20SIMDMaskInitializers10concat16x8ys0A0Vys6SIMD32Vys4Int8VGGADys6SIMD16VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat4x16(_ a: SIMDMask<SIMD4<Int16>>, _ b: SIMDMask<SIMD4<Int16>>) -> SIMDMask<SIMD8<Int16>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s20SIMDMaskInitializers10concat4x16ys0A0Vys5SIMD8Vys5Int16VGGADys5SIMD4VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat8x16(_ a: SIMDMask<SIMD8<Int16>>, _ b: SIMDMask<SIMD8<Int16>>) -> SIMDMask<SIMD16<Int16>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s20SIMDMaskInitializers10concat8x16ys0A0Vys6SIMD16Vys5Int16VGGADys5SIMD8VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x32(_ a: SIMDMask<SIMD2<Int32>>, _ b: SIMDMask<SIMD2<Int32>>) -> SIMDMask<SIMD4<Int32>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s20SIMDMaskInitializers10concat2x32ys0A0Vys5SIMD4Vys5Int32VGGADys5SIMD2VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat4x32(_ a: SIMDMask<SIMD4<Int32>>, _ b: SIMDMask<SIMD4<Int32>>) -> SIMDMask<SIMD8<Int32>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s20SIMDMaskInitializers10concat4x32ys0A0Vys5SIMD8Vys5Int32VGGADys5SIMD4VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x64(_ a: SIMDMask<SIMD2<Int64>>, _ b: SIMDMask<SIMD2<Int64>>) -> SIMDMask<SIMD4<Int64>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s20SIMDMaskInitializers10concat2x64ys0A0Vys5SIMD4Vys5Int64VGGADys5SIMD2VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: ret
diff --git a/test/stdlib/SIMDSignedInitializers.swift.gyb b/test/stdlib/SIMDSignedInitializers.swift.gyb
new file mode 100644
index 0000000000000..4acaf1399ec62
--- /dev/null
+++ b/test/stdlib/SIMDSignedInitializers.swift.gyb
@@ -0,0 +1,84 @@
+//===--- SIMDSignedInitializers.swift.gyb ---------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// RUN: %empty-directory(%t)
+// RUN: %gyb %s -o %t/SIMDSignedInitializers.swift
+// RUN: %target-swift-frontend -primary-file %t/SIMDSignedInitializers.swift -S | %FileCheck %t/SIMDSignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
+// RUN: %target-swift-frontend -primary-file %t/SIMDSignedInitializers.swift -S -O | %FileCheck %t/SIMDSignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
+
+import Swift
+
+%for bits in [8,16,32,64]:
+% for totalBits in [64,128]:
+%  n = totalBits // bits
+%  if n != 1:
+%   neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
+
+func repeating${n}_int${bits}(_ scalar: Int${bits}) -> SIMD${n}<Int${bits}> {
+  SIMD${n}(repeating: scalar)
+}
+// CHECK: repeating${n}_int${bits}{{[[:alnum:]]+}}:
+// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: dup.${neonSuffix}
+// CHECKOnone-arm64: ret
+%  end
+% end
+%end
+
+func concat8x8(_ a: SIMD8<Int8>, _ b: SIMD8<Int8>) -> SIMD16<Int8> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s22SIMDSignedInitializers9concat8x8ys6SIMD16Vys4Int8VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat16x8(_ a: SIMD16<Int8>, _ b: SIMD16<Int8>) -> SIMD32<Int8> {
+  SIMD32(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s22SIMDSignedInitializers10concat16x8ys6SIMD32Vys4Int8VGs6SIMD16VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat4x16(_ a: SIMD4<Int16>, _ b: SIMD4<Int16>) -> SIMD8<Int16> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s22SIMDSignedInitializers10concat4x16ys5SIMD8Vys5Int16VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat8x16(_ a: SIMD8<Int16>, _ b: SIMD8<Int16>) -> SIMD16<Int16> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s22SIMDSignedInitializers10concat8x16ys6SIMD16Vys5Int16VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x32(_ a: SIMD2<Int32>, _ b: SIMD2<Int32>) -> SIMD4<Int32> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s22SIMDSignedInitializers10concat2x32ys5SIMD4Vys5Int32VGs5SIMD2VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat4x32(_ a: SIMD4<Int32>, _ b: SIMD4<Int32>) -> SIMD8<Int32> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s22SIMDSignedInitializers10concat4x32ys5SIMD8Vys5Int32VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x64(_ a: SIMD2<Int64>, _ b: SIMD2<Int64>) -> SIMD4<Int64> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s22SIMDSignedInitializers10concat2x64ys5SIMD4Vys5Int64VGs5SIMD2VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
diff --git a/test/stdlib/SIMDUnsignedInitializers.swift.gyb b/test/stdlib/SIMDUnsignedInitializers.swift.gyb
new file mode 100644
index 0000000000000..5a502ff8aa266
--- /dev/null
+++ b/test/stdlib/SIMDUnsignedInitializers.swift.gyb
@@ -0,0 +1,83 @@
+//===--- SIMDUnsignedInitializers.swift.gyb -------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// RUN: %empty-directory(%t)
+// RUN: %gyb %s -o %t/SIMDUnsignedInitializers.swift
+// RUN: %target-swift-frontend -primary-file %t/SIMDUnsignedInitializers.swift -S | %FileCheck %t/SIMDUnsignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
+// RUN: %target-swift-frontend -primary-file %t/SIMDUnsignedInitializers.swift -S -O | %FileCheck %t/SIMDUnsignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
+
+import Swift
+
+%for bits in [8,16,32,64]:
+% for totalBits in [64,128]:
+%  n = totalBits // bits
+%  if n != 1:
+%   neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
+
+func repeating${n}_uint${bits}(_ scalar: UInt${bits}) -> SIMD${n}<UInt${bits}> {
+  SIMD${n}(repeating: scalar)
+}
+// CHECK: repeating${n}_uint${bits}{{[[:alnum:]]+}}:
+// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: dup.${neonSuffix}
+// CHECKOnone-arm64: ret
+%  end
+% end
+%end
+
+func concat8x8(_ a: SIMD8<UInt8>, _ b: SIMD8<UInt8>) -> SIMD16<UInt8> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s24SIMDUnsignedInitializers9concat8x8ys6SIMD16Vys5UInt8VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat16x8(_ a: SIMD16<UInt8>, _ b: SIMD16<UInt8>) -> SIMD32<UInt8> {
+  SIMD32(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s24SIMDUnsignedInitializers10concat16x8ys6SIMD32Vys5UInt8VGs6SIMD16VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat4x16(_ a: SIMD4<UInt16>, _ b: SIMD4<UInt16>) -> SIMD8<UInt16> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s24SIMDUnsignedInitializers10concat4x16ys5SIMD8Vys6UInt16VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat8x16(_ a: SIMD8<UInt16>, _ b: SIMD8<UInt16>) -> SIMD16<UInt16> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s24SIMDUnsignedInitializers10concat8x16ys6SIMD16Vys6UInt16VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x32(_ a: SIMD2<UInt32>, _ b: SIMD2<UInt32>) -> SIMD4<UInt32> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s24SIMDUnsignedInitializers10concat2x32ys5SIMD4Vys6UInt32VGs5SIMD2VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat4x32(_ a: SIMD4<UInt32>, _ b: SIMD4<UInt32>) -> SIMD8<UInt32> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s24SIMDUnsignedInitializers10concat4x32ys5SIMD8Vys6UInt32VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x64(_ a: SIMD2<UInt64>, _ b: SIMD2<UInt64>) -> SIMD4<UInt64> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: _$s24SIMDUnsignedInitializers10concat2x64ys5SIMD4Vys6UInt64VGs5SIMD2VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret

From 7a006190652a4f28ec88f2601dcd1100441c6b37 Mon Sep 17 00:00:00 2001
From: Stephen Canon <scanon@apple.com>
Date: Sun, 25 May 2025 16:03:31 -0400
Subject: [PATCH 3/5] Back out concrete SIMD floating-point inits for now.

I had hoped that https://github.com/swiftlang/swift/issues/54445 would unblock these, but it doesn't seem to quite be the complete story yet (or at least, these were still failing as implemented).
---
 stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb | 3 +++
 test/stdlib/SIMDFloatInitializers.swift.gyb              | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
index 71d2e91ba08ec..8c30a42b4fedf 100644
--- a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
@@ -34,6 +34,8 @@ extension SIMD${n} where Scalar == ${Scalar} {
     _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
   }
   
+  /* Breaks differentiation testing, commented out while we figure out
+     what to do about that.
   @_alwaysEmitIntoClient @_transparent
   public init(repeating scalar: ${Scalar}) {
     let asVector = Builtin.insertelement_${Builtin}_FPIEEE${bits}_Int32(
@@ -50,6 +52,7 @@ extension SIMD${n} where Scalar == ${Scalar} {
     ))
 %end
   }
+  */
   
 %  if n >= 4:
   @_alwaysEmitIntoClient @_transparent
diff --git a/test/stdlib/SIMDFloatInitializers.swift.gyb b/test/stdlib/SIMDFloatInitializers.swift.gyb
index decd27e3b2048..effaf7f61f55f 100644
--- a/test/stdlib/SIMDFloatInitializers.swift.gyb
+++ b/test/stdlib/SIMDFloatInitializers.swift.gyb
@@ -14,6 +14,10 @@
 // RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
 // RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S -O | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
 
+// Disable this test for now because aEIC/transparent functions still are not
+// correctly differentiable, and so these inits are suppressed in the stdlib.
+// REQUIRES: differentiable-aEIC-transparent
+
 import Swift
 
 %for bits in [16,32,64]:

From 5e71b1d2e44801ac47ae10db7f753853a33b1fba Mon Sep 17 00:00:00 2001
From: Stephen Canon <scanon@apple.com>
Date: Mon, 26 May 2025 10:36:54 -0400
Subject: [PATCH 4/5] Don't include leading _$ on symbol names for FileCheck

We drop the _ on Linux, so keep just the Swift mangling part.
---
 test/stdlib/SIMDFloatInitializers.swift.gyb    | 10 +++++-----
 test/stdlib/SIMDMaskInitializers.swift.gyb     | 14 +++++++-------
 test/stdlib/SIMDSignedInitializers.swift.gyb   | 14 +++++++-------
 test/stdlib/SIMDUnsignedInitializers.swift.gyb | 14 +++++++-------
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/test/stdlib/SIMDFloatInitializers.swift.gyb b/test/stdlib/SIMDFloatInitializers.swift.gyb
index effaf7f61f55f..f728764d596d9 100644
--- a/test/stdlib/SIMDFloatInitializers.swift.gyb
+++ b/test/stdlib/SIMDFloatInitializers.swift.gyb
@@ -53,7 +53,7 @@ func repeating${n}x${bits}(_ scalar: ${scalar}) -> SIMD${n}<${scalar}> {
 func concat4x16(_ a: SIMD4<Float16>, _ b: SIMD4<Float16>) -> SIMD8<Float16> {
   SIMD8(lowHalf: a, highHalf: b)
 }
-// CHECK-arm64: _$s21SIMDFloatInitializers10concat4x16ys5SIMD8Vys7Float16VGs5SIMD4VyAFG_AJtF:
+// CHECK-arm64: s21SIMDFloatInitializers10concat4x16ys5SIMD8Vys7Float16VGs5SIMD4VyAFG_AJtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 
@@ -61,25 +61,25 @@ func concat4x16(_ a: SIMD4<Float16>, _ b: SIMD4<Float16>) -> SIMD8<Float16> {
 func concat8x16(_ a: SIMD8<Float16>, _ b: SIMD8<Float16>) -> SIMD16<Float16> {
   SIMD16(lowHalf: a, highHalf: b)
 }
-// CHECK-arm64: _$s21SIMDFloatInitializers10concat8x16ys6SIMD16Vys7Float16VGs5SIMD8VyAFG_AJtF:
+// CHECK-arm64: s21SIMDFloatInitializers10concat8x16ys6SIMD16Vys7Float16VGs5SIMD8VyAFG_AJtF:
 // CHECKO-arm64-NEXT: ret
 #endif
 
 func concat2x32(_ a: SIMD2<Float>, _ b: SIMD2<Float>) -> SIMD4<Float> {
   SIMD4(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s21SIMDFloatInitializers10concat2x32ys5SIMD4VySfGs5SIMD2VySfG_AHtF:
+// CHECK: s21SIMDFloatInitializers10concat2x32ys5SIMD4VySfGs5SIMD2VySfG_AHtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 
 func concat4x32(_ a: SIMD4<Float>, _ b: SIMD4<Float>) -> SIMD8<Float> {
   SIMD8(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s21SIMDFloatInitializers10concat4x32ys5SIMD8VySfGs5SIMD4VySfG_AHtF:
+// CHECK: s21SIMDFloatInitializers10concat4x32ys5SIMD8VySfGs5SIMD4VySfG_AHtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat2x64(_ a: SIMD2<Double>, _ b: SIMD2<Double>) -> SIMD4<Double> {
   SIMD4(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s21SIMDFloatInitializers10concat2x64ys5SIMD4VySdGs5SIMD2VySdG_AHtF:
+// CHECK: s21SIMDFloatInitializers10concat2x64ys5SIMD4VySdGs5SIMD2VySdG_AHtF:
 // CHECKO-arm64-NEXT: ret
diff --git a/test/stdlib/SIMDMaskInitializers.swift.gyb b/test/stdlib/SIMDMaskInitializers.swift.gyb
index b92e1059bf7dd..b972196382748 100644
--- a/test/stdlib/SIMDMaskInitializers.swift.gyb
+++ b/test/stdlib/SIMDMaskInitializers.swift.gyb
@@ -38,7 +38,7 @@ func repeating${n}_mask${bits}(_ scalar: Bool) -> SIMDMask<SIMD${n}<Int${bits}>>
 func concat8x8(_ a: SIMDMask<SIMD8<Int8>>, _ b: SIMDMask<SIMD8<Int8>>) -> SIMDMask<SIMD16<Int8>> {
   SIMDMask(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s20SIMDMaskInitializers9concat8x8ys0A0Vys6SIMD16Vys4Int8VGGADys5SIMD8VyAHGG_ANtF:
+// CHECK: s20SIMDMaskInitializers9concat8x8ys0A0Vys6SIMD16Vys4Int8VGGADys5SIMD8VyAHGG_ANtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 // CHECKO-x86_64: punpcklqdq
@@ -46,13 +46,13 @@ func concat8x8(_ a: SIMDMask<SIMD8<Int8>>, _ b: SIMDMask<SIMD8<Int8>>) -> SIMDMa
 func concat16x8(_ a: SIMDMask<SIMD16<Int8>>, _ b: SIMDMask<SIMD16<Int8>>) -> SIMDMask<SIMD32<Int8>> {
   SIMDMask(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s20SIMDMaskInitializers10concat16x8ys0A0Vys6SIMD32Vys4Int8VGGADys6SIMD16VyAHGG_ANtF:
+// CHECK: s20SIMDMaskInitializers10concat16x8ys0A0Vys6SIMD32Vys4Int8VGGADys6SIMD16VyAHGG_ANtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat4x16(_ a: SIMDMask<SIMD4<Int16>>, _ b: SIMDMask<SIMD4<Int16>>) -> SIMDMask<SIMD8<Int16>> {
   SIMDMask(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s20SIMDMaskInitializers10concat4x16ys0A0Vys5SIMD8Vys5Int16VGGADys5SIMD4VyAHGG_ANtF:
+// CHECK: s20SIMDMaskInitializers10concat4x16ys0A0Vys5SIMD8Vys5Int16VGGADys5SIMD4VyAHGG_ANtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 // CHECKO-x86_64: punpcklqdq
@@ -60,13 +60,13 @@ func concat4x16(_ a: SIMDMask<SIMD4<Int16>>, _ b: SIMDMask<SIMD4<Int16>>) -> SIM
 func concat8x16(_ a: SIMDMask<SIMD8<Int16>>, _ b: SIMDMask<SIMD8<Int16>>) -> SIMDMask<SIMD16<Int16>> {
   SIMDMask(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s20SIMDMaskInitializers10concat8x16ys0A0Vys6SIMD16Vys5Int16VGGADys5SIMD8VyAHGG_ANtF:
+// CHECK: s20SIMDMaskInitializers10concat8x16ys0A0Vys6SIMD16Vys5Int16VGGADys5SIMD8VyAHGG_ANtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat2x32(_ a: SIMDMask<SIMD2<Int32>>, _ b: SIMDMask<SIMD2<Int32>>) -> SIMDMask<SIMD4<Int32>> {
   SIMDMask(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s20SIMDMaskInitializers10concat2x32ys0A0Vys5SIMD4Vys5Int32VGGADys5SIMD2VyAHGG_ANtF:
+// CHECK: s20SIMDMaskInitializers10concat2x32ys0A0Vys5SIMD4Vys5Int32VGGADys5SIMD2VyAHGG_ANtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 // CHECKO-x86_64: punpcklqdq
@@ -74,11 +74,11 @@ func concat2x32(_ a: SIMDMask<SIMD2<Int32>>, _ b: SIMDMask<SIMD2<Int32>>) -> SIM
 func concat4x32(_ a: SIMDMask<SIMD4<Int32>>, _ b: SIMDMask<SIMD4<Int32>>) -> SIMDMask<SIMD8<Int32>> {
   SIMDMask(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s20SIMDMaskInitializers10concat4x32ys0A0Vys5SIMD8Vys5Int32VGGADys5SIMD4VyAHGG_ANtF:
+// CHECK: s20SIMDMaskInitializers10concat4x32ys0A0Vys5SIMD8Vys5Int32VGGADys5SIMD4VyAHGG_ANtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat2x64(_ a: SIMDMask<SIMD2<Int64>>, _ b: SIMDMask<SIMD2<Int64>>) -> SIMDMask<SIMD4<Int64>> {
   SIMDMask(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s20SIMDMaskInitializers10concat2x64ys0A0Vys5SIMD4Vys5Int64VGGADys5SIMD2VyAHGG_ANtF:
+// CHECK: s20SIMDMaskInitializers10concat2x64ys0A0Vys5SIMD4Vys5Int64VGGADys5SIMD2VyAHGG_ANtF:
 // CHECKO-arm64-NEXT: ret
diff --git a/test/stdlib/SIMDSignedInitializers.swift.gyb b/test/stdlib/SIMDSignedInitializers.swift.gyb
index 4acaf1399ec62..d6b16ca6a5c13 100644
--- a/test/stdlib/SIMDSignedInitializers.swift.gyb
+++ b/test/stdlib/SIMDSignedInitializers.swift.gyb
@@ -37,7 +37,7 @@ func repeating${n}_int${bits}(_ scalar: Int${bits}) -> SIMD${n}<Int${bits}> {
 func concat8x8(_ a: SIMD8<Int8>, _ b: SIMD8<Int8>) -> SIMD16<Int8> {
   SIMD16(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s22SIMDSignedInitializers9concat8x8ys6SIMD16Vys4Int8VGs5SIMD8VyAFG_AJtF:
+// CHECK: s22SIMDSignedInitializers9concat8x8ys6SIMD16Vys4Int8VGs5SIMD8VyAFG_AJtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 // CHECKO-x86_64: punpcklqdq
@@ -45,13 +45,13 @@ func concat8x8(_ a: SIMD8<Int8>, _ b: SIMD8<Int8>) -> SIMD16<Int8> {
 func concat16x8(_ a: SIMD16<Int8>, _ b: SIMD16<Int8>) -> SIMD32<Int8> {
   SIMD32(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s22SIMDSignedInitializers10concat16x8ys6SIMD32Vys4Int8VGs6SIMD16VyAFG_AJtF:
+// CHECK: s22SIMDSignedInitializers10concat16x8ys6SIMD32Vys4Int8VGs6SIMD16VyAFG_AJtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat4x16(_ a: SIMD4<Int16>, _ b: SIMD4<Int16>) -> SIMD8<Int16> {
   SIMD8(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s22SIMDSignedInitializers10concat4x16ys5SIMD8Vys5Int16VGs5SIMD4VyAFG_AJtF:
+// CHECK: s22SIMDSignedInitializers10concat4x16ys5SIMD8Vys5Int16VGs5SIMD4VyAFG_AJtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 // CHECKO-x86_64: punpcklqdq
@@ -59,13 +59,13 @@ func concat4x16(_ a: SIMD4<Int16>, _ b: SIMD4<Int16>) -> SIMD8<Int16> {
 func concat8x16(_ a: SIMD8<Int16>, _ b: SIMD8<Int16>) -> SIMD16<Int16> {
   SIMD16(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s22SIMDSignedInitializers10concat8x16ys6SIMD16Vys5Int16VGs5SIMD8VyAFG_AJtF:
+// CHECK: s22SIMDSignedInitializers10concat8x16ys6SIMD16Vys5Int16VGs5SIMD8VyAFG_AJtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat2x32(_ a: SIMD2<Int32>, _ b: SIMD2<Int32>) -> SIMD4<Int32> {
   SIMD4(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s22SIMDSignedInitializers10concat2x32ys5SIMD4Vys5Int32VGs5SIMD2VyAFG_AJtF:
+// CHECK: s22SIMDSignedInitializers10concat2x32ys5SIMD4Vys5Int32VGs5SIMD2VyAFG_AJtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 // CHECKO-x86_64: punpcklqdq
@@ -73,12 +73,12 @@ func concat2x32(_ a: SIMD2<Int32>, _ b: SIMD2<Int32>) -> SIMD4<Int32> {
 func concat4x32(_ a: SIMD4<Int32>, _ b: SIMD4<Int32>) -> SIMD8<Int32> {
   SIMD8(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s22SIMDSignedInitializers10concat4x32ys5SIMD8Vys5Int32VGs5SIMD4VyAFG_AJtF:
+// CHECK: s22SIMDSignedInitializers10concat4x32ys5SIMD8Vys5Int32VGs5SIMD4VyAFG_AJtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat2x64(_ a: SIMD2<Int64>, _ b: SIMD2<Int64>) -> SIMD4<Int64> {
   SIMD4(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s22SIMDSignedInitializers10concat2x64ys5SIMD4Vys5Int64VGs5SIMD2VyAFG_AJtF:
+// CHECK: s22SIMDSignedInitializers10concat2x64ys5SIMD4Vys5Int64VGs5SIMD2VyAFG_AJtF:
 // CHECKO-arm64-NEXT: ret
 
diff --git a/test/stdlib/SIMDUnsignedInitializers.swift.gyb b/test/stdlib/SIMDUnsignedInitializers.swift.gyb
index 5a502ff8aa266..191b75d7d29fb 100644
--- a/test/stdlib/SIMDUnsignedInitializers.swift.gyb
+++ b/test/stdlib/SIMDUnsignedInitializers.swift.gyb
@@ -37,7 +37,7 @@ func repeating${n}_uint${bits}(_ scalar: UInt${bits}) -> SIMD${n}<UInt${bits}> {
 func concat8x8(_ a: SIMD8<UInt8>, _ b: SIMD8<UInt8>) -> SIMD16<UInt8> {
   SIMD16(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s24SIMDUnsignedInitializers9concat8x8ys6SIMD16Vys5UInt8VGs5SIMD8VyAFG_AJtF:
+// CHECK: s24SIMDUnsignedInitializers9concat8x8ys6SIMD16Vys5UInt8VGs5SIMD8VyAFG_AJtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 // CHECKO-x86_64: punpcklqdq
@@ -45,13 +45,13 @@ func concat8x8(_ a: SIMD8<UInt8>, _ b: SIMD8<UInt8>) -> SIMD16<UInt8> {
 func concat16x8(_ a: SIMD16<UInt8>, _ b: SIMD16<UInt8>) -> SIMD32<UInt8> {
   SIMD32(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s24SIMDUnsignedInitializers10concat16x8ys6SIMD32Vys5UInt8VGs6SIMD16VyAFG_AJtF:
+// CHECK: s24SIMDUnsignedInitializers10concat16x8ys6SIMD32Vys5UInt8VGs6SIMD16VyAFG_AJtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat4x16(_ a: SIMD4<UInt16>, _ b: SIMD4<UInt16>) -> SIMD8<UInt16> {
   SIMD8(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s24SIMDUnsignedInitializers10concat4x16ys5SIMD8Vys6UInt16VGs5SIMD4VyAFG_AJtF:
+// CHECK: s24SIMDUnsignedInitializers10concat4x16ys5SIMD8Vys6UInt16VGs5SIMD4VyAFG_AJtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 // CHECKO-x86_64: punpcklqdq
@@ -59,13 +59,13 @@ func concat4x16(_ a: SIMD4<UInt16>, _ b: SIMD4<UInt16>) -> SIMD8<UInt16> {
 func concat8x16(_ a: SIMD8<UInt16>, _ b: SIMD8<UInt16>) -> SIMD16<UInt16> {
   SIMD16(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s24SIMDUnsignedInitializers10concat8x16ys6SIMD16Vys6UInt16VGs5SIMD8VyAFG_AJtF:
+// CHECK: s24SIMDUnsignedInitializers10concat8x16ys6SIMD16Vys6UInt16VGs5SIMD8VyAFG_AJtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat2x32(_ a: SIMD2<UInt32>, _ b: SIMD2<UInt32>) -> SIMD4<UInt32> {
   SIMD4(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s24SIMDUnsignedInitializers10concat2x32ys5SIMD4Vys6UInt32VGs5SIMD2VyAFG_AJtF:
+// CHECK: s24SIMDUnsignedInitializers10concat2x32ys5SIMD4Vys6UInt32VGs5SIMD2VyAFG_AJtF:
 // CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
 // CHECKO-arm64-NEXT: ret
 // CHECKO-x86_64: punpcklqdq
@@ -73,11 +73,11 @@ func concat2x32(_ a: SIMD2<UInt32>, _ b: SIMD2<UInt32>) -> SIMD4<UInt32> {
 func concat4x32(_ a: SIMD4<UInt32>, _ b: SIMD4<UInt32>) -> SIMD8<UInt32> {
   SIMD8(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s24SIMDUnsignedInitializers10concat4x32ys5SIMD8Vys6UInt32VGs5SIMD4VyAFG_AJtF:
+// CHECK: s24SIMDUnsignedInitializers10concat4x32ys5SIMD8Vys6UInt32VGs5SIMD4VyAFG_AJtF:
 // CHECKO-arm64-NEXT: ret
 
 func concat2x64(_ a: SIMD2<UInt64>, _ b: SIMD2<UInt64>) -> SIMD4<UInt64> {
   SIMD4(lowHalf: a, highHalf: b)
 }
-// CHECK: _$s24SIMDUnsignedInitializers10concat2x64ys5SIMD4Vys6UInt64VGs5SIMD2VyAFG_AJtF:
+// CHECK: s24SIMDUnsignedInitializers10concat2x64ys5SIMD4Vys6UInt64VGs5SIMD2VyAFG_AJtF:
 // CHECKO-arm64-NEXT: ret

From 57433bd4db46ffc81435597802eb9ed86484789d Mon Sep 17 00:00:00 2001
From: Stephen Canon <scanon@apple.com>
Date: Tue, 27 May 2025 08:41:56 -0400
Subject: [PATCH 5/5] Reflect new files into Runtimes/Core/core build.

---
 Runtimes/Core/core/CMakeLists.txt | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/Runtimes/Core/core/CMakeLists.txt b/Runtimes/Core/core/CMakeLists.txt
index ef45236a7ff79..6e5dd866812f5 100644
--- a/Runtimes/Core/core/CMakeLists.txt
+++ b/Runtimes/Core/core/CMakeLists.txt
@@ -264,8 +264,14 @@ if(SwiftCore_ENABLE_COMMANDLINE_SUPPORT)
 endif()
 
 if(SwiftCore_ENABLE_VECTOR_TYPES)
-  gyb_expand(SIMDConcreteOperations.swift.gyb
-    SIMDConcreteOperations.swift
+  gyb_expand(SIMDFloatConcreteOperations.swift.gyb
+    SIMDFloatConcreteOperations.swift
+    FLAGS "-DCMAKE_SIZEOF_VOID_P=${SwiftCore_SIZEOF_POINTER}")
+  gyb_expand(SIMDIntegerConcreteOperations.swift.gyb
+    SIMDIntegerConcreteOperations.swift
+    FLAGS "-DCMAKE_SIZEOF_VOID_P=${SwiftCore_SIZEOF_POINTER}")
+  gyb_expand(SIMDMaskConcreteOperations.swift.gyb
+    SIMDMaskConcreteOperations.swift
     FLAGS "-DCMAKE_SIZEOF_VOID_P=${SwiftCore_SIZEOF_POINTER}")
   gyb_expand(SIMDVectorTypes.swift.gyb
     SIMDVectorTypes.swift
@@ -273,7 +279,9 @@ if(SwiftCore_ENABLE_VECTOR_TYPES)
 
   target_sources(swiftCore PRIVATE
     SIMDVector.swift
-    "${CMAKE_CURRENT_BINARY_DIR}/SIMDConcreteOperations.swift"
+    "${CMAKE_CURRENT_BINARY_DIR}/SIMDFloatConcreteOperations.swift"
+    "${CMAKE_CURRENT_BINARY_DIR}/SIMDIntegerConcreteOperations.swift"
+    "${CMAKE_CURRENT_BINARY_DIR}/SIMDMaskConcreteOperations.swift"
     "${CMAKE_CURRENT_BINARY_DIR}/SIMDVectorTypes.swift")
 endif()