From 78a3d93ffdebd9a384b3085ccde0aaf74062067b Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Fri, 26 Apr 2024 09:56:41 -0700
Subject: [PATCH 01/33] try setting MAX_JOBS=4 for oom in arm wheel

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index e0b34c24b..1761d2276 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -201,7 +201,7 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = "MAX_JOBS=4 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")

From 160daf376219476acede59aecdd5a719102de2fa Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Sun, 5 May 2024 00:55:03 -0700
Subject: [PATCH 02/33] change to desired_cuda

---
 aarch64_linux/aarch64_ci_build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aarch64_linux/aarch64_ci_build.sh b/aarch64_linux/aarch64_ci_build.sh
index 6d9a2f6b0..64632ff13 100644
--- a/aarch64_linux/aarch64_ci_build.sh
+++ b/aarch64_linux/aarch64_ci_build.sh
@@ -26,8 +26,8 @@ cd /
 git config --global --add safe.directory /pytorch
 pip install -r /pytorch/requirements.txt
 pip install auditwheel
-if [ -n "$GPU_ARCH_VERSION" ]; then
-    echo "BASE_CUDA_VERSION is set to: $GPU_ARCH_VERSION"
+if [ -n "$DESIRED_CUDA" ]; then
+    echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
     python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
 else
     echo "BASE_CUDA_VERSION is not set."

From 78dd24d3faab9e154807b372e115cbb40984743d Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Sun, 5 May 2024 07:26:15 -0700
Subject: [PATCH 03/33] change desired_cuda check

---
 aarch64_linux/aarch64_ci_build.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/aarch64_linux/aarch64_ci_build.sh b/aarch64_linux/aarch64_ci_build.sh
index 64632ff13..5451df2b6 100644
--- a/aarch64_linux/aarch64_ci_build.sh
+++ b/aarch64_linux/aarch64_ci_build.sh
@@ -26,10 +26,10 @@ cd /
 git config --global --add safe.directory /pytorch
 pip install -r /pytorch/requirements.txt
 pip install auditwheel
-if [ -n "$DESIRED_CUDA" ]; then
+if [ "$DESIRED_CUDA" = "cpu" ]; then
+    echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
+    python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
+else
     echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
     python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
-else
-    echo "BASE_CUDA_VERSION is not set."
-    python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
 fi
\ No newline at end of file

From 6eed27215a8a48afd5b006ee38da7067edfb603a Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Mon, 6 May 2024 13:19:07 -0700
Subject: [PATCH 04/33] change path

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 1761d2276..b34c5cd25 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -202,7 +202,7 @@ def parse_arguments():
 
     print("Building PyTorch wheel")
     build_vars = "MAX_JOBS=4 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
-    os.system("python setup.py clean")
+    os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
     if override_package_version is not None:

From fa2a485af03cd86e81bd8102f3fb6ae537916537 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Tue, 7 May 2024 07:25:37 -0700
Subject: [PATCH 05/33] remove libopenblas file

---
 aarch64_linux/aarch64_wheel_ci_build.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index b34c5cd25..aa6aaabc4 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -122,7 +122,6 @@ def update_wheel(wheel_path) -> None:
         "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8",
         "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8",
         "/usr/local/cuda/lib64/libcudnn_ops_train.so.8",
-        "/opt/conda/envs/aarch64_env/lib/libopenblas.so.0",
         "/opt/conda/envs/aarch64_env/lib/libgfortran.so.5",
         "/opt/conda/envs/aarch64_env/lib/libgomp.so.1",
         "/acl/build/libarm_compute.so",

From 19feff41ee5e58a62f4f09b59bb219341adb771a Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Tue, 7 May 2024 16:44:52 -0700
Subject: [PATCH 06/33] test only hopper for quicker tat

---
 aarch64_linux/aarch64_wheel_ci_build.py | 3 +--
 manywheel/build_cuda.sh                 | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index aa6aaabc4..0c653184c 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -122,7 +122,6 @@ def update_wheel(wheel_path) -> None:
         "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8",
         "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8",
         "/usr/local/cuda/lib64/libcudnn_ops_train.so.8",
-        "/opt/conda/envs/aarch64_env/lib/libgfortran.so.5",
         "/opt/conda/envs/aarch64_env/lib/libgomp.so.1",
         "/acl/build/libarm_compute.so",
         "/acl/build/libarm_compute_graph.so",
@@ -200,7 +199,7 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "MAX_JOBS=4 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index 4fc1ed278..f10aa3bc3 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -60,7 +60,7 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
 TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
 case ${CUDA_VERSION} in
     12.4)
-        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
+        TORCH_CUDA_ARCH_LIST="9.0"
         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
         ;;
     12.1)

From 3931c11781518a3340d033daf225f27b82d062eb Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Wed, 8 May 2024 19:17:41 -0700
Subject: [PATCH 07/33] add back max_jobs=4

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 0c653184c..8d0882098 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -199,7 +199,7 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = "MAX_JOBS=4 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")

From f2f8250ef682b0efd4d8d7dc55131d9aad1c9477 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Wed, 8 May 2024 20:46:22 -0700
Subject: [PATCH 08/33] cherrypick #1808

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 manywheel/build_cuda.sh                 | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 8d0882098..0c653184c 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -199,7 +199,7 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "MAX_JOBS=4 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index f10aa3bc3..2837b3793 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -155,7 +155,7 @@ if [[ $CUDA_VERSION == "12.1" || $CUDA_VERSION == "12.4" ]]; then
             "/usr/local/cuda/lib64/libcudart.so.12"
             "/usr/local/cuda/lib64/libnvToolsExt.so.1"
             "/usr/local/cuda/lib64/libnvrtc.so.12"
-            "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.1"
+            "/usr/local/cuda/lib64/libnvrtc-builtins.so"
         )
         DEPS_SONAME+=(
             "libcudnn_adv_infer.so.8"
@@ -170,7 +170,7 @@ if [[ $CUDA_VERSION == "12.1" || $CUDA_VERSION == "12.4" ]]; then
             "libcudart.so.12"
             "libnvToolsExt.so.1"
             "libnvrtc.so.12"
-            "libnvrtc-builtins.so.12.1"
+            "libnvrtc-builtins.so"
         )
     else
         echo "Using nvidia libs from pypi."

From 71bc4f283cae3f13659cde8955469f7b2c49ee40 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Wed, 8 May 2024 21:30:42 -0700
Subject: [PATCH 09/33] need maxjobs=4

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 0c653184c..8d0882098 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -199,7 +199,7 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = "MAX_JOBS=4 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")

From 5dcd9dd1dfb8bde8b6b8caf01126fd3b7eed0684 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Thu, 9 May 2024 08:16:42 -0700
Subject: [PATCH 10/33] fix path to copy wheel

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 8d0882098..2a9573883 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -138,7 +138,7 @@ def update_wheel(wheel_path) -> None:
     os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
     shutil.move(
         f"{folder}/cuda_wheel/{wheelname}",
-        f"/dist/{wheelname}",
+        f"{folder}/{wheelname}",
         copy_function=shutil.copy2,
     )
     os.system(f"rm -rf {folder}/tmp {folder}/dist/cuda_wheel/")

From 3c9ff9813d903d4c76ff06894a0dd46c9a3a32c4 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Thu, 9 May 2024 08:18:16 -0700
Subject: [PATCH 11/33] fix path to rm

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 2a9573883..38fd58314 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -141,7 +141,7 @@ def update_wheel(wheel_path) -> None:
         f"{folder}/{wheelname}",
         copy_function=shutil.copy2,
     )
-    os.system(f"rm -rf {folder}/tmp {folder}/dist/cuda_wheel/")
+    os.system(f"rm -rf {folder}/tmp/ {folder}/cuda_wheel/")
 
 
 def complete_wheel(folder: str) -> str:

From 42ea49397617685fb026956cd37cfc00dff4c89f Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Thu, 9 May 2024 08:23:00 -0700
Subject: [PATCH 12/33] try set max jobs to 5 as 4 is too slow

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 38fd58314..876fbfe76 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -199,7 +199,7 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "MAX_JOBS=4 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")

From f670d5bfcf3f86ea7b5af179bdf5abfe33f0acf0 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Thu, 16 May 2024 08:48:30 -0700
Subject: [PATCH 13/33] cuda 9.0 for aarch64 only

---
 manywheel/build_cuda.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index 2837b3793..6dc2e0f90 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -60,7 +60,11 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
 TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
 case ${CUDA_VERSION} in
     12.4)
-        TORCH_CUDA_ARCH_LIST="9.0"
+        if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
+            TORCH_CUDA_ARCH_LIST="9.0"
+        else
+            TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
+        fi
         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
         ;;
     12.1)

From 0eecef2c979dd9406dd76a770b722d5967101a72 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Fri, 17 May 2024 04:58:29 -0700
Subject: [PATCH 14/33] add libopenblas.so new location (from OpenBLAS)

---
 aarch64_linux/aarch64_wheel_ci_build.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 876fbfe76..2127e5d0c 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -123,6 +123,7 @@ def update_wheel(wheel_path) -> None:
         "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8",
         "/usr/local/cuda/lib64/libcudnn_ops_train.so.8",
         "/opt/conda/envs/aarch64_env/lib/libgomp.so.1",
+        "/opt/OpenBLAS/lib/libopenblas.so.0",
         "/acl/build/libarm_compute.so",
         "/acl/build/libarm_compute_graph.so",
         "/acl/build/libarm_compute_core.so",

From 3841eaf931a2701d2b77768431e3519aa7805a92 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Fri, 17 May 2024 07:09:49 -0700
Subject: [PATCH 15/33] upgrade ACL version to 24.04 (1824)

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 aarch64_linux/build_aarch64_wheel.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 2127e5d0c..9a9ed88e3 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -78,7 +78,7 @@ def build_ArmComputeLibrary() -> None:
             "clone",
             "https://github.com/ARM-software/ComputeLibrary.git",
             "-b",
-            "v23.08",
+            "v24.04",
             "--depth",
             "1",
             "--shallow-submodules",
diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index 0ff286ad2..3956f0463 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -229,7 +229,7 @@ def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None
     print('Building Arm Compute Library')
     acl_build_flags=" ".join(["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
                               "arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"])
-    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}")
+    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v24.04 {git_clone_flags}")
     host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
 
 

From 9f62e48810cae2a1f6a678054663fa716b1de552 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Fri, 17 May 2024 13:05:17 -0700
Subject: [PATCH 16/33] remove copy libarm_compute_core.so

---
 aarch64_linux/aarch64_wheel_ci_build.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 9a9ed88e3..1a10c8efc 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -126,7 +126,6 @@ def update_wheel(wheel_path) -> None:
         "/opt/OpenBLAS/lib/libopenblas.so.0",
         "/acl/build/libarm_compute.so",
         "/acl/build/libarm_compute_graph.so",
-        "/acl/build/libarm_compute_core.so",
     ]
     # Copy libraries to unzipped_folder/a/lib
     for lib_path in libs_to_copy:
@@ -200,7 +199,7 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")

From 374e9e18fcc7a8cc82128ec593862af3d71456d0 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Fri, 17 May 2024 19:20:10 -0700
Subject: [PATCH 17/33] still need max_jobs=5 as 6 oom

---
 aarch64_linux/README.md                 | 2 +-
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aarch64_linux/README.md b/aarch64_linux/README.md
index 583ed4af9..4a3be5312 100644
--- a/aarch64_linux/README.md
+++ b/aarch64_linux/README.md
@@ -16,4 +16,4 @@ __NOTE:__ CI build is currently __EXPERMINTAL__
 This app allows a person to build using AWS EC3 resources and requires AWS-CLI and Boto3 with AWS credentials to support building EC2 instances for the wheel builds. Can be used in a codebuild CD or from a local system.
 
 ### Usage
-```build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>```
+```build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>```
\ No newline at end of file
diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 1a10c8efc..5297cf42d 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -199,7 +199,7 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")

From c49a7570dac77553563243d9fab0ff08a2200de9 Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Thu, 2 May 2024 12:11:00 -0500
Subject: [PATCH 18/33] aarch64: cd: fix issue with invoking cpu wheel build
 option (#1791)

---
 aarch64_linux/aarch64_ci_build.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/aarch64_linux/aarch64_ci_build.sh b/aarch64_linux/aarch64_ci_build.sh
index 5451df2b6..dc9f25a1f 100644
--- a/aarch64_linux/aarch64_ci_build.sh
+++ b/aarch64_linux/aarch64_ci_build.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 set -eux -o pipefail
 
+GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
+
 SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
 source $SCRIPTPATH/aarch64_ci_setup.sh
 

From d31681eceb335c65022e19ac6e3db0cad0c93233 Mon Sep 17 00:00:00 2001
From: Aleksei Nikiforov
 <103434461+AlekseiNikiforovIBM@users.noreply.github.com>
Date: Fri, 3 May 2024 22:36:04 +0200
Subject: [PATCH 19/33] Update s390x builder (#1802)

* Disable automatic building of s390x docker image

* Update docker image and build scripts for s390x

* Switch devtoolset to 13

There is a not yet investigated build failure
caused by gcc 12, but it doesn't reproduce
with gcc 13.

* Adapt binaries check for s390x

* Switch to ubuntu:24.04 for s390x

* Update libgomp.so.1 path for s390x
---
 .github/workflows/build-manywheel-images.yml | 15 -----
 check_binary.sh                              |  6 +-
 manywheel/Dockerfile_s390x                   | 62 +++++++++++---------
 manywheel/build.sh                           |  2 +-
 manywheel/build_cpu.sh                       |  6 +-
 manywheel/build_scripts/build.sh             | 55 +++++++++++------
 manywheel/build_scripts/manylinux1-check.py  |  2 +-
 7 files changed, 82 insertions(+), 66 deletions(-)

diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index cf7d82828..a599635f8 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -137,18 +137,3 @@ jobs:
       - name: Build Docker Image
         run: |
           manywheel/build_docker.sh
-  build-docker-cpu-s390x:
-    runs-on: linux.s390x
-    env:
-      GPU_ARCH_TYPE: cpu-s390x
-    steps:
-      - name: Checkout PyTorch
-        uses: actions/checkout@v3
-      - name: Authenticate if WITH_PUSH
-        run: |
-          if [[ "${WITH_PUSH}" == true ]]; then
-            echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
-          fi
-      - name: Build Docker Image
-        run: |
-          manywheel/build_docker.sh
diff --git a/check_binary.sh b/check_binary.sh
index 98a5267eb..be2b5252b 100755
--- a/check_binary.sh
+++ b/check_binary.sh
@@ -330,7 +330,7 @@ fi
 if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
   echo "Checking that MKL is available"
   build_and_run_example_cpp check-torch-mkl
-elif [[ "$(uname -m)" != "arm64" ]]; then
+elif [[ "$(uname -m)" != "arm64" && "$(uname -m)" != "s390x" ]]; then
   if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]]; then
     if [[ "$(uname -m)" == "aarch64" ]]; then
       echo "Checking that MKLDNN is available on aarch64"
@@ -354,7 +354,7 @@ if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
   echo "Checking that XNNPACK is available"
   build_and_run_example_cpp check-torch-xnnpack
 else
-  if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]]; then
+  if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]] && [[ "$(uname -m)" != "s390x"  ]]; then
     echo "Checking that XNNPACK is available"
     pushd /tmp
     python -c 'import torch.backends.xnnpack; exit(0 if torch.backends.xnnpack.enabled else 1)'
@@ -375,7 +375,7 @@ if [[ "$OSTYPE" == "msys" ]]; then
 fi
 
 # Test that CUDA builds are setup correctly
-if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* ]]; then
+if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* && "$(uname -m)" != "s390x" ]]; then
   if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
     build_and_run_example_cpp check-torch-cuda
   else
diff --git a/manywheel/Dockerfile_s390x b/manywheel/Dockerfile_s390x
index e30d0bea5..caa5d00bd 100644
--- a/manywheel/Dockerfile_s390x
+++ b/manywheel/Dockerfile_s390x
@@ -1,18 +1,15 @@
-FROM --platform=linux/s390x docker.io/redhat/ubi9 as base
+FROM --platform=linux/s390x docker.io/ubuntu:24.04 as base
 
-# earliest available version in ubi9
-ARG DEVTOOLSET_VERSION=12
-
-# Language variabes
-ENV LC_ALL=en_US.UTF-8
-ENV LANG=en_US.UTF-8
-ENV LANGUAGE=en_US.UTF-8
+# Language variables
+ENV LC_ALL=C.UTF-8
+ENV LANG=C.UTF-8
+ENV LANGUAGE=C.UTF-8
 
 # Installed needed OS packages. This is to support all
 # the binary builds (torch, vision, audio, text, data)
-RUN dnf -y install redhat-release
-RUN dnf -y update
-RUN dnf install -y --allowerasing \
+RUN apt update ; apt upgrade -y
+RUN apt install -y \
+  build-essential \
   autoconf \
   automake \
   bzip2 \
@@ -27,20 +24,19 @@ RUN dnf install -y --allowerasing \
   util-linux \
   wget \
   which \
-  xz \
+  xz-utils \
   less \
   zstd \
-  libgomp \
   cmake \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gcc \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
-  gcc-toolset-${DEVTOOLSET_VERSION}-binutils
-
-# Ensure the expected gcc-toolset is used
-ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
-ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
-
+  python3 \
+  python3-dev \
+  python3-setuptools \
+  python3-yaml \
+  python3-typing-extensions \
+  libblas-dev \
+  libopenblas-dev \
+  liblapack-dev \
+  libatlas-base-dev
 
 # git236+ would refuse to run git commands in repos owned by other users
 # Which causes version check to fail, as pytorch repo is bind-mounted into the image
@@ -57,9 +53,21 @@ ADD ./common/install_openssl.sh install_openssl.sh
 RUN bash ./install_openssl.sh && rm install_openssl.sh
 ENV SSL_CERT_FILE=/opt/_internal/certs.pem
 
+# EPEL for cmake
+FROM base as patchelf
+# Install patchelf
+ADD ./common/install_patchelf.sh install_patchelf.sh
+RUN bash ./install_patchelf.sh && rm install_patchelf.sh
+RUN cp $(which patchelf) /patchelf
+
+FROM patchelf as python
+# build python
+COPY manywheel/build_scripts /build_scripts
+ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh
+RUN bash build_scripts/build.sh && rm -r build_scripts
+
 FROM openssl as final
-# remove unncessary python versions
-RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
-RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
-RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
-RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
+COPY --from=python             /opt/python                           /opt/python
+COPY --from=python             /opt/_internal                        /opt/_internal
+COPY --from=python             /opt/python/cp37-cp37m/bin/auditwheel /usr/local/bin/auditwheel
+COPY --from=patchelf           /usr/local/bin/patchelf               /usr/local/bin/patchelf
diff --git a/manywheel/build.sh b/manywheel/build.sh
index 43725615d..a04d05869 100755
--- a/manywheel/build.sh
+++ b/manywheel/build.sh
@@ -15,7 +15,7 @@ case "${GPU_ARCH_TYPE:-BLANK}" in
     rocm)
         bash "${SCRIPTPATH}/build_rocm.sh"
         ;;
-    cpu | cpu-cxx11-abi)
+    cpu | cpu-cxx11-abi | cpu-s390x)
         bash "${SCRIPTPATH}/build_cpu.sh"
         ;;
     *)
diff --git a/manywheel/build_cpu.sh b/manywheel/build_cpu.sh
index 4669c8a3c..24c95f14e 100755
--- a/manywheel/build_cpu.sh
+++ b/manywheel/build_cpu.sh
@@ -32,7 +32,11 @@ if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
 elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
     LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
 elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-    LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
+    if [[ "$(uname -m)" == "s390x" ]]; then
+        LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1"
+    else
+        LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
+    fi
 fi
 
 DEPS_LIST=(
diff --git a/manywheel/build_scripts/build.sh b/manywheel/build_scripts/build.sh
index d139abcb6..c545ca967 100644
--- a/manywheel/build_scripts/build.sh
+++ b/manywheel/build_scripts/build.sh
@@ -15,22 +15,37 @@ CURL_HASH=cf34fe0b07b800f1c01a499a6e8b2af548f6d0e044dca4a29d88a4bee146d131
 AUTOCONF_ROOT=autoconf-2.69
 AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
 
-# Dependencies for compiling Python that we want to remove from
-# the final image after compiling Python
-PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel libffi-devel"
-
-# Libraries that are allowed as part of the manylinux1 profile
-MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel  mesa-libGL-devel libICE-devel libSM-devel ncurses-devel"
-
 # Get build utilities
 MY_DIR=$(dirname "${BASH_SOURCE[0]}")
 source $MY_DIR/build_utils.sh
 
-# Development tools and libraries
-yum -y install bzip2 make git patch unzip bison yasm diffutils \
-    automake which file cmake28 \
-    kernel-devel-`uname -r` \
-    ${PYTHON_COMPILE_DEPS}
+if [ "$(uname -m)" != "s390x" ] ; then
+    # Dependencies for compiling Python that we want to remove from
+    # the final image after compiling Python
+    PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel libffi-devel"
+
+    # Libraries that are allowed as part of the manylinux1 profile
+    MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel  mesa-libGL-devel libICE-devel libSM-devel ncurses-devel"
+
+    # Development tools and libraries
+    yum -y install bzip2 make git patch unzip bison yasm diffutils \
+        automake which file cmake28 \
+        kernel-devel-`uname -r` \
+        ${PYTHON_COMPILE_DEPS}
+else
+    # Dependencies for compiling Python that we want to remove from
+    # the final image after compiling Python
+    PYTHON_COMPILE_DEPS="zlib1g-dev libbz2-dev libncurses-dev libsqlite3-dev libdb-dev libpcap-dev liblzma-dev libffi-dev"
+
+    # Libraries that are allowed as part of the manylinux1 profile
+    MANYLINUX1_DEPS="libglib2.0-dev libX11-dev libncurses-dev"
+
+    # Development tools and libraries
+    apt install -y bzip2 make git patch unzip diffutils \
+        automake which file cmake \
+        linux-headers-virtual \
+        ${PYTHON_COMPILE_DEPS}
+fi
 
 # Install newest autoconf
 build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH
@@ -76,12 +91,16 @@ ln -s $PY37_BIN/auditwheel /usr/local/bin/auditwheel
 
 # Clean up development headers and other unnecessary stuff for
 # final image
-yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \
-    avahi freetype bitstream-vera-fonts \
-    ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
-yum -y install ${MANYLINUX1_DEPS}
-yum -y clean all > /dev/null 2>&1
-yum list installed
+if [ "$(uname -m)" != "s390x" ] ; then
+    yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \
+        avahi freetype bitstream-vera-fonts \
+        ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
+    yum -y install ${MANYLINUX1_DEPS}
+    yum -y clean all > /dev/null 2>&1
+    yum list installed
+else
+    apt purge -y ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
+fi
 # we don't need libpython*.a, and they're many megabytes
 find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f
 # Strip what we can -- and ignore errors, because this just attempts to strip
diff --git a/manywheel/build_scripts/manylinux1-check.py b/manywheel/build_scripts/manylinux1-check.py
index fa77ef43a..7cb62e0c0 100644
--- a/manywheel/build_scripts/manylinux1-check.py
+++ b/manywheel/build_scripts/manylinux1-check.py
@@ -3,7 +3,7 @@
 def is_manylinux1_compatible():
     # Only Linux, and only x86-64 / i686
     from distutils.util import get_platform
-    if get_platform() not in ["linux-x86_64", "linux-i686"]:
+    if get_platform() not in ["linux-x86_64", "linux-i686", "linux-s390x"]:
         return False
 
     # Check for presence of _manylinux module

From 4fcabbe91513ed6243ed18d0c74c0a96879793e1 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 6 May 2024 12:26:23 -0400
Subject: [PATCH 20/33] Fix cuda windows validations update cuda driver.
 (#1810)

---
 .github/workflows/validate-windows-binaries.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/validate-windows-binaries.yml b/.github/workflows/validate-windows-binaries.yml
index fed7b1939..cef51b77f 100644
--- a/.github/workflows/validate-windows-binaries.yml
+++ b/.github/workflows/validate-windows-binaries.yml
@@ -127,7 +127,7 @@ jobs:
 
         printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
         source /c/Jenkins/Miniconda3/etc/profile.d/conda.sh
-        if [[ ${MATRIX_GPU_ARCH_VERSION} == "12.1" ]]; then
+        if [[ ${MATRIX_GPU_ARCH_TYPE} == "cuda" ]]; then
           ./windows/internal/driver_update.bat
         fi
         source ./.github/scripts/validate_binaries.sh

From a8f71c070ea11da16493c88efff7bf725877564a Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 7 May 2024 11:44:59 -0400
Subject: [PATCH 21/33] Revert "aarch64: upgrade ACL version to 24.04" (#1813)

This reverts commit 6b90c090ebb01d86f65493d1d609d7fadc0feab8.
---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 aarch64_linux/build_aarch64_wheel.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 5297cf42d..2444429cb 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -78,7 +78,7 @@ def build_ArmComputeLibrary() -> None:
             "clone",
             "https://github.com/ARM-software/ComputeLibrary.git",
             "-b",
-            "v24.04",
+            "v23.08",
             "--depth",
             "1",
             "--shallow-submodules",
diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index 3956f0463..0ff286ad2 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -229,7 +229,7 @@ def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None
     print('Building Arm Compute Library')
     acl_build_flags=" ".join(["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
                               "arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"])
-    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v24.04 {git_clone_flags}")
+    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}")
     host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
 
 

From 57425f477871e0ec2425526ee930fc14981e73ae Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 7 May 2024 17:09:37 -0400
Subject: [PATCH 22/33] Don't deactivate/remove conda on linux after validation
 (#1814)

* Don't deactivate/remove conda on linux

* test
---
 .github/scripts/validate_binaries.sh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index 8779e8064..bf5c15690 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -54,6 +54,11 @@ else
         ${PWD}/check_binary.sh
     fi
 
+     # We are only interested in CUDA tests and Python 3.8-3.11. Not all requirement libraries are available for 3.12 yet.
+    if [[ ${INCLUDE_TEST_OPS:-} == 'true' &&  ${MATRIX_GPU_ARCH_TYPE} == 'cuda' && ${MATRIX_PYTHON_VERSION} != "3.12" ]]; then
+        source ./.github/scripts/validate_test_ops.sh
+    fi
+
     if [[ ${TARGET_OS} == 'windows' ]]; then
         python  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX}
     else
@@ -64,13 +69,8 @@ else
         export PATH=${OLD_PATH}
     fi
 
-    # We are only interested in CUDA tests and Python 3.8-3.11. Not all requirement libraries are available for 3.12 yet.
-    if [[ ${INCLUDE_TEST_OPS:-} == 'true' &&  ${MATRIX_GPU_ARCH_TYPE} == 'cuda' && ${MATRIX_PYTHON_VERSION} != "3.12" ]]; then
-        source ./.github/scripts/validate_test_ops.sh
-    fi
-
-    # TODO: remove if statement currently this step is timing out on linx-aarch64
-    if [[ ${TARGET_OS} != 'linux-aarch64' ]]; then
+    # this is optional step
+    if [[ ${TARGET_OS} != linux*  ]]; then
         conda deactivate
         conda env remove -n ${ENV_NAME}
     fi

From 8d58e64375770cc53418711bab926175256c7f71 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 10 May 2024 12:50:47 -0400
Subject: [PATCH 23/33] Add manylinux_2_28 image (#1816)

* Add manylinux_2_28 image
---
 .github/workflows/build-manywheel-images.yml |  15 ++
 manywheel/Dockerfile_2_28                    | 143 +++++++++++++++++++
 manywheel/build_docker.sh                    |   8 ++
 3 files changed, 166 insertions(+)
 create mode 100644 manywheel/Dockerfile_2_28

diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index a599635f8..f308edcac 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -107,6 +107,21 @@ jobs:
       - name: Build Docker Image
         run: |
           manywheel/build_docker.sh
+  build-docker-cpu-manylinux_2_28:
+    runs-on: ubuntu-22.04
+    env:
+      GPU_ARCH_TYPE: cpu-manylinux_2_28
+    steps:
+      - name: Checkout PyTorch
+        uses: actions/checkout@v3
+      - name: Authenticate if WITH_PUSH
+        run: |
+          if [[ "${WITH_PUSH}" == true ]]; then
+            echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
+          fi
+      - name: Build Docker Image
+        run: |
+          manywheel/build_docker.sh
   build-docker-cpu-aarch64:
     runs-on: linux.arm64.2xlarge
     env:
diff --git a/manywheel/Dockerfile_2_28 b/manywheel/Dockerfile_2_28
new file mode 100644
index 000000000..6566f115d
--- /dev/null
+++ b/manywheel/Dockerfile_2_28
@@ -0,0 +1,143 @@
+# syntax = docker/dockerfile:experimental
+ARG ROCM_VERSION=3.7
+ARG BASE_CUDA_VERSION=11.8
+ARG GPU_IMAGE=amd64/almalinux:8
+FROM quay.io/pypa/manylinux_2_28_x86_64 as base
+
+ENV LC_ALL en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US.UTF-8
+
+ARG DEVTOOLSET_VERSION=11
+RUN yum install -y wget curl perl util-linux xz bzip2 git patch which perl zlib-devel yum-utils gcc-toolset-${DEVTOOLSET_VERSION}-toolchain
+ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
+ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
+
+# cmake-3.18.4 from pip
+RUN yum install -y python3-pip && \
+    python3 -mpip install cmake==3.18.4 && \
+    ln -s /usr/local/bin/cmake /usr/bin/cmake
+
+FROM base as openssl
+# Install openssl (this must precede `build python` step)
+# (In order to have a proper SSL module, Python is compiled
+# against a recent openssl [see env vars above], which is linked
+# statically. We delete openssl afterwards.)
+ADD ./common/install_openssl.sh install_openssl.sh
+RUN bash ./install_openssl.sh && rm install_openssl.sh
+
+
+# remove unncessary python versions
+RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
+RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
+RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
+RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
+
+FROM base as cuda
+ARG BASE_CUDA_VERSION=11.8
+# Install CUDA
+ADD ./common/install_cuda.sh install_cuda.sh
+RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
+
+FROM base as intel
+# MKL
+ADD ./common/install_mkl.sh install_mkl.sh
+RUN bash ./install_mkl.sh && rm install_mkl.sh
+
+FROM base as magma
+ARG BASE_CUDA_VERSION=10.2
+# Install magma
+ADD ./common/install_magma.sh install_magma.sh
+RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
+
+FROM base as jni
+# Install java jni header
+ADD ./common/install_jni.sh install_jni.sh
+ADD ./java/jni.h jni.h
+RUN bash ./install_jni.sh && rm install_jni.sh
+
+FROM base as libpng
+# Install libpng
+ADD ./common/install_libpng.sh install_libpng.sh
+RUN bash ./install_libpng.sh && rm install_libpng.sh
+
+FROM ${GPU_IMAGE} as common
+ARG DEVTOOLSET_VERSION=11
+ENV LC_ALL en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US.UTF-8
+RUN yum -y install epel-release
+RUN yum -y update
+RUN yum install -y \
+        autoconf \
+        automake \
+        bison \
+        bzip2 \
+        curl \
+        diffutils \
+        file \
+        git \
+        make \
+        patch \
+        perl \
+        unzip \
+        util-linux \
+        wget \
+        which \
+        xz \
+        gcc-toolset-${DEVTOOLSET_VERSION}-toolchain
+
+RUN yum install -y \
+    https://repo.ius.io/ius-release-el7.rpm \
+    https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+RUN yum swap -y git git236-core
+# git236+ would refuse to run git commands in repos owned by other users
+# Which causes version check to fail, as pytorch repo is bind-mounted into the image
+# Override this behaviour by treating every folder as safe
+# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
+RUN git config --global --add safe.directory "*"
+
+ENV SSL_CERT_FILE=/opt/_internal/certs.pem
+# Install LLVM version
+COPY --from=openssl            /opt/openssl                          /opt/openssl
+COPY --from=base               /opt/python                           /opt/python
+COPY --from=base               /opt/_internal                        /opt/_internal
+COPY --from=base               /usr/local/bin/auditwheel             /usr/local/bin/auditwheel
+COPY --from=intel              /opt/intel                            /opt/intel
+COPY --from=base               /usr/local/bin/patchelf               /usr/local/bin/patchelf
+COPY --from=libpng             /usr/local/bin/png*                   /usr/local/bin/
+COPY --from=libpng             /usr/local/bin/libpng*                /usr/local/bin/
+COPY --from=libpng             /usr/local/include/png*               /usr/local/include/
+COPY --from=libpng             /usr/local/include/libpng*            /usr/local/include/
+COPY --from=libpng             /usr/local/lib/libpng*                /usr/local/lib/
+COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/lib/pkgconfig
+COPY --from=jni                /usr/local/include/jni.h              /usr/local/include/jni.h
+
+FROM common as cpu_final
+ARG BASE_CUDA_VERSION=11.8
+ARG DEVTOOLSET_VERSION=11
+# Ensure the expected devtoolset is used
+ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
+ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
+# cmake
+RUN yum install -y cmake3 && \
+    ln -s /usr/bin/cmake3 /usr/bin/cmake
+
+
+FROM cpu_final as cuda_final
+RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
+COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
+COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
+
+FROM common as rocm_final
+ARG ROCM_VERSION=3.7
+# Install ROCm
+ADD ./common/install_rocm.sh install_rocm.sh
+RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh
+# cmake is already installed inside the rocm base image, but both 2 and 3 exist
+# cmake3 is needed for the later MIOpen custom build, so that step is last.
+RUN yum install -y cmake3 && \
+    rm -f /usr/bin/cmake && \
+    ln -s /usr/bin/cmake3 /usr/bin/cmake
+ADD ./common/install_miopen.sh install_miopen.sh
+RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh
index 4d3816588..05f3dad81 100755
--- a/manywheel/build_docker.sh
+++ b/manywheel/build_docker.sh
@@ -20,6 +20,14 @@ case ${GPU_ARCH_TYPE} in
         GPU_IMAGE=centos:7
         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
         ;;
+    cpu-manylinux_2_28)
+        TARGET=cpu_final
+        DOCKER_TAG=cpu
+        LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux_2_28-cpu
+        GPU_IMAGE=amd64/almalinux:8
+        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
+        MANY_LINUX_VERSION="2_28"
+        ;;
     cpu-aarch64)
         TARGET=final
         DOCKER_TAG=cpu-aarch64

From 5625515ada66cacb0218b6db2bce0d4253fbaa62 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 10 May 2024 14:17:32 -0400
Subject: [PATCH 24/33] Add manylinux_2_28 image - fix cmake (#1817)

* Manylinux 2_28 fix cmake install

* fix
---
 .github/workflows/build-manywheel-images.yml | 2 ++
 manywheel/Dockerfile_2_28                    | 9 +++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index f308edcac..33c0a12be 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -12,6 +12,7 @@ on:
     paths:
       - .github/workflows/build-manywheel-images.yml
       - manywheel/Dockerfile
+      - manywheel/Dockerfile_2_28
       - manywheel/Dockerfile_aarch64
       - manywheel/Dockerfile_cuda_aarch64
       - manywheel/Dockerfile_cxx11-abi
@@ -21,6 +22,7 @@ on:
     paths:
       - .github/workflows/build-manywheel-images.yml
       - manywheel/Dockerfile
+      - manywheel/Dockerfile_2_28
       - manywheel/Dockerfile_aarch64
       - manywheel/Dockerfile_cuda_aarch64
       - manywheel/Dockerfile_cxx11-abi
diff --git a/manywheel/Dockerfile_2_28 b/manywheel/Dockerfile_2_28
index 6566f115d..f5f21bf3d 100644
--- a/manywheel/Dockerfile_2_28
+++ b/manywheel/Dockerfile_2_28
@@ -16,7 +16,7 @@ ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/op
 # cmake-3.18.4 from pip
 RUN yum install -y python3-pip && \
     python3 -mpip install cmake==3.18.4 && \
-    ln -s /usr/local/bin/cmake /usr/bin/cmake
+    ln -s /usr/local/bin/cmake /usr/bin/cmake3
 
 FROM base as openssl
 # Install openssl (this must precede `build python` step)
@@ -119,10 +119,11 @@ ARG DEVTOOLSET_VERSION=11
 # Ensure the expected devtoolset is used
 ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
 ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
-# cmake
-RUN yum install -y cmake3 && \
-    ln -s /usr/bin/cmake3 /usr/bin/cmake
 
+# cmake-3.18.4 from pip
+RUN yum install -y python3-pip && \
+    python3 -mpip install cmake==3.18.4 && \
+    ln -s /usr/local/bin/cmake /usr/bin/cmake3
 
 FROM cpu_final as cuda_final
 RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}

From b47978fa08dff62235ebf64c0a99a499daf32c3c Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 13 May 2024 11:00:51 -0400
Subject: [PATCH 25/33] Add Almalinux to manywheel build script (#1818)

---
 manywheel/build_common.sh   | 2 ++
 manywheel/build_cpu.sh      | 2 ++
 manywheel/build_cuda.sh     | 2 ++
 manywheel/build_libtorch.sh | 6 ++++--
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh
index 2a6f37ec4..d68d9a323 100644
--- a/manywheel/build_common.sh
+++ b/manywheel/build_common.sh
@@ -25,6 +25,8 @@ retry () {
 OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
 if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
     retry yum install -q -y zip openssl
+elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
+    retry yum install -q -y zip openssl
 elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
     retry dnf install -q -y zip openssl
 elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
diff --git a/manywheel/build_cpu.sh b/manywheel/build_cpu.sh
index 24c95f14e..9d982bd30 100755
--- a/manywheel/build_cpu.sh
+++ b/manywheel/build_cpu.sh
@@ -31,6 +31,8 @@ if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
     LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
 elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
     LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
+elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
+    LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
 elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
     if [[ "$(uname -m)" == "s390x" ]]; then
         LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1"
diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index 6dc2e0f90..ffc280e42 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -117,6 +117,8 @@ mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
 OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
 if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
     LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
+elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
+    LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
 elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
     LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
 elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
diff --git a/manywheel/build_libtorch.sh b/manywheel/build_libtorch.sh
index 2436d5b10..ea11f0c51 100644
--- a/manywheel/build_libtorch.sh
+++ b/manywheel/build_libtorch.sh
@@ -24,6 +24,8 @@ retry () {
 OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
 if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
     retry yum install -q -y zip openssl
+elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
+    retry yum install -q -y zip openssl
 elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
     retry dnf install -q -y zip openssl
 elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
@@ -278,7 +280,7 @@ for pkg in /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do
             if [[ "$filepath" != "$destpath" ]]; then
                 cp $filepath $destpath
             fi
-            
+
             if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
                 patchedpath=$(fname_without_so_number $destpath)
             else
@@ -299,7 +301,7 @@ for pkg in /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do
                 patchedname=${patched[i]}
                 if [[ "$origname" != "$patchedname" ]] || [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
                     set +e
-                    origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*") 
+                    origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*")
                     ERRCODE=$?
                     set -e
                     if [ "$ERRCODE" -eq "0" ]; then

From 242fa685747ba6c345a9c5cc41fa0d8b2f0d0c57 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 13 May 2024 11:34:32 -0400
Subject: [PATCH 26/33] [BE] Remove unused files and dead code (#1819)

---
 .circleci/scripts/binary_checkout.sh     | 61 --------------------
 .circleci/scripts/binary_populate_env.sh |  1 -
 cron/update_s3_htmls.sh                  | 71 ------------------------
 manywheel/build_scripts/ssl-check.py     | 33 -----------
 manywheel/test_wheel.sh                  | 27 ---------
 5 files changed, 193 deletions(-)
 delete mode 100755 .circleci/scripts/binary_checkout.sh
 delete mode 100755 cron/update_s3_htmls.sh
 delete mode 100644 manywheel/build_scripts/ssl-check.py
 delete mode 100755 manywheel/test_wheel.sh

diff --git a/.circleci/scripts/binary_checkout.sh b/.circleci/scripts/binary_checkout.sh
deleted file mode 100755
index b634f5c9a..000000000
--- a/.circleci/scripts/binary_checkout.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-# Derived from https://github.com/pytorch/pytorch/blob/2c7df1360aa17d4a6d6726998eede3671bcb36ee/.circleci/scripts/binary_populate_env.sh
-
-set -eux -o pipefail
-
-retry () {
-    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
-
-
-# This step runs on multiple executors with different envfile locations
-if [[ "$OSTYPE" == "msys" ]]; then
-  # windows executor (builds and tests)
-  rm -rf /c/w
-  ln -s "${HOME}" /c/w
-  WORK_DIR="/c/w"
-elif [[ -d "/home/circleci/project" ]]; then
-  # machine executor (binary tests)
-  WORK_DIR="${HOME}/project"
-else
-  # macos executor (builds and tests)
-  # docker executor (binary builds)
-  WORK_DIR="${HOME}"
-fi
-
-if [[ "$OSTYPE" == "msys" ]]; then
-  # We need to make the paths as short as possible on Windows
-  PYTORCH_ROOT="$WORK_DIR/p"
-  BUILDER_ROOT="$WORK_DIR/b"
-else
-  PYTORCH_ROOT="$WORK_DIR/pytorch"
-  BUILDER_ROOT="$WORK_DIR/builder"
-fi
-
-# Persist these variables for the subsequent steps
-echo "export WORK_DIR=${WORK_DIR}" >> ${BASH_ENV}
-echo "export PYTORCH_ROOT=${PYTORCH_ROOT}" >> ${BASH_ENV}
-echo "export BUILDER_ROOT=${BUILDER_ROOT}" >> ${BASH_ENV}
-
-# Clone the Pytorch branch
-retry git clone --depth 1 https://github.com/pytorch/pytorch.git "$PYTORCH_ROOT"
-# Removed checking out pytorch/pytorch using CIRCLE_PR_NUMBER and CIRCLE_SHA1 as
-# those environment variables are tied to the host repo where the build is being
-# triggered.
-retry git submodule update --init --recursive
-pushd "$PYTORCH_ROOT"
-echo "Using Pytorch from "
-git --no-pager log --max-count 1
-popd
-
-# Clone the Builder master repo
-retry git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT"
-pushd "$BUILDER_ROOT"
-if [[ -n "${CIRCLE_SHA1:-}" ]]; then
-  # Check out a specific commit (typically the latest) from pytorch/builder
-  git reset --hard "${CIRCLE_SHA1}"
-  git checkout -q -B main
-fi
-echo "Using builder from "
-git --no-pager log --max-count 1
-popd
diff --git a/.circleci/scripts/binary_populate_env.sh b/.circleci/scripts/binary_populate_env.sh
index 7e663a64b..5b141ac38 100755
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@@ -143,7 +143,6 @@ export BUILD_JNI=$BUILD_JNI
 export PIP_UPLOAD_FOLDER="$PIP_UPLOAD_FOLDER"
 export DOCKER_IMAGE="$DOCKER_IMAGE"
 
-# Remove WORKD_DIR, PYTORCH_ROOT, BUILDER_ROOT defined & persisted in binary_checkout.sh
 export MAC_PACKAGE_WORK_DIR="$WORK_DIR"
 export MINICONDA_ROOT="$WORK_DIR/miniconda"
 export PYTORCH_FINAL_PACKAGE_DIR="$WORK_DIR/final_pkgs"
diff --git a/cron/update_s3_htmls.sh b/cron/update_s3_htmls.sh
deleted file mode 100755
index 2dbd172a5..000000000
--- a/cron/update_s3_htmls.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# Update the html links file in the s3 bucket Pip uses this html file to look
-# through all the wheels and pick the most recently uploaded one (by the
-# version, not the actual date of upload). There is one html file per cuda/cpu
-# version
-
-# Upload for all CUDA/cpu versions if not given one to use
-if [[ -z "$CUDA_VERSIONS" ]]; then
-    export CUDA_VERSIONS=('cpu' 'cu92' 'cu100' 'cu101' 'cu102' 'cu110' 'rocm5.0' 'rocm5.1.1')
-fi
-
-if [[ -z "$HTML_NAME" ]]; then
-    export HTML_NAME='torch_nightly.html'
-fi
-
-# Dry run disabled by default for legacy purposes
-DRY_RUN=${DRY_RUN:-disabled}
-DRY_RUN_FLAG=""
-if [[ "${DRY_RUN}" != disabled ]]; then
-  DRY_RUN_FLAG="--dryrun"
-fi
-
-# NB: includes trailing slash (from PIP_UPLOAD_FOLDER)
-s3_base="s3://pytorch/whl/${PIP_UPLOAD_FOLDER}"
-
-# Pull all existing whls in this directory and turn them into html links
-# N.B. we use the .dev as a hacky way to exclude all wheels with old
-# 'yyyy.mm.dd' versions
-#
-# NB: replacing + with %2B is to fix old versions of pip which don't
-# this transform automatically.  This makes the display a little
-# ugly but whatever
-function generate_html() {
-  # Trailing slash required in both cases
-  dir="$1"
-  url_prefix="$2"
-  aws s3 ls "${s3_base}${dir}" | grep --only-matching '\S*\.whl' | sed 's#+#%2B#g' | sed 's#.*#<a href="'"${url_prefix}"'&">'"${url_prefix}"'&</a><br>#g'
-}
-
-# This will be included in all the sub-indices
-generate_html '' '../' > "root-$HTML_NAME"
-generate_html '' '' > "$HTML_NAME"
-
-for cuda_ver in "${CUDA_VERSIONS[@]}"; do
-    generate_html "${cuda_ver}/" "" > "${cuda_ver}-$HTML_NAME"
-    cat "root-$HTML_NAME" >> "${cuda_ver}-$HTML_NAME"
-    generate_html "${cuda_ver}/" "${cuda_ver}/" >> "$HTML_NAME"
-
-    # Check your work every once in a while
-    echo "Setting ${cuda_ver}/$HTML_NAME to:"
-    cat "${cuda_ver}-$HTML_NAME"
-    (
-      set -x
-      aws s3 cp ${DRY_RUN_FLAG} "${cuda_ver}-$HTML_NAME" "s3://pytorch/whl/${PIP_UPLOAD_FOLDER}${cuda_ver}/$HTML_NAME"  --acl public-read --cache-control 'no-cache,no-store,must-revalidate'
-    )
-
-done
-
-# Check your work every once in a while
-echo "Setting $HTML_NAME to:"
-cat "$HTML_NAME"
-(
-  set -x
-
-  # Upload the html file back up
-  # Note the lack of a / b/c duplicate / do cause problems in s3
-  aws s3 cp ${DRY_RUN_FLAG} "$HTML_NAME" "$s3_base$HTML_NAME"  --acl public-read --cache-control 'no-cache,no-store,must-revalidate'
-)
diff --git a/manywheel/build_scripts/ssl-check.py b/manywheel/build_scripts/ssl-check.py
deleted file mode 100644
index b91927173..000000000
--- a/manywheel/build_scripts/ssl-check.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# cf. https://github.com/pypa/manylinux/issues/53
-
-GOOD_SSL = "https://google.com"
-BAD_SSL = "https://self-signed.badssl.com"
-
-import sys
-
-print("Testing SSL certificate checking for Python:", sys.version)
-
-if (sys.version_info[:2] < (2, 7)
-    or sys.version_info[:2] < (3, 4)):
-    print("This version never checks SSL certs; skipping tests")
-    sys.exit(0)
-
-if sys.version_info[0] >= 3:
-    from urllib.request import urlopen
-    EXC = OSError
-else:
-    from urllib import urlopen
-    EXC = IOError
-
-print("Connecting to %s should work" % (GOOD_SSL,))
-urlopen(GOOD_SSL)
-print("...it did, yay.")
-
-print("Connecting to %s should fail" % (BAD_SSL,))
-try:
-    urlopen(BAD_SSL)
-    # If we get here then we failed:
-    print("...it DIDN'T!!!!!11!!1one!")
-    sys.exit(1)
-except EXC:
-    print("...it did, yay.")
\ No newline at end of file
diff --git a/manywheel/test_wheel.sh b/manywheel/test_wheel.sh
deleted file mode 100755
index ada7d93f0..000000000
--- a/manywheel/test_wheel.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env bash
-set -e
-
-yum install -y wget git
-
-rm -rf /usr/local/cuda*
-
-# Install Anaconda
-if ! ls /py
-then
-    echo "Miniconda needs to be installed"
-    wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
-    bash ~/miniconda.sh -b -p /py
-else
-    echo "Miniconda is already installed"
-fi
-
-export PATH="/py/bin:$PATH"
-
-# Anaconda token
-if ls /remote/token
-then
-   source /remote/token
-fi
-
-conda install -y conda-build anaconda-client
-

From 1f19db52205b57dddf12321e71fdfba933c4cce1 Mon Sep 17 00:00:00 2001
From: snadampal <87143774+snadampal@users.noreply.github.com>
Date: Mon, 13 May 2024 12:54:00 -0500
Subject: [PATCH 27/33] arch64: CD: add manylinux_2_28 docker build workflow
 (#1784)

---
 .github/workflows/build-manywheel-images.yml | 17 ++++++
 manywheel/Dockerfile_2_28_aarch64            | 56 ++++++++++++++++++++
 manywheel/build_docker.sh                    |  8 +++
 3 files changed, 81 insertions(+)
 create mode 100644 manywheel/Dockerfile_2_28_aarch64

diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index 33c0a12be..d88fcbedd 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -14,6 +14,7 @@ on:
       - manywheel/Dockerfile
       - manywheel/Dockerfile_2_28
       - manywheel/Dockerfile_aarch64
+      - manywheel/Dockerfile_2_28_aarch64
       - manywheel/Dockerfile_cuda_aarch64
       - manywheel/Dockerfile_cxx11-abi
       - manywheel/build_docker.sh
@@ -24,6 +25,7 @@ on:
       - manywheel/Dockerfile
       - manywheel/Dockerfile_2_28
       - manywheel/Dockerfile_aarch64
+      - manywheel/Dockerfile_2_28_aarch64
       - manywheel/Dockerfile_cuda_aarch64
       - manywheel/Dockerfile_cxx11-abi
       - 'common/*'
@@ -139,6 +141,21 @@ jobs:
       - name: Build Docker Image
         run: |
           manywheel/build_docker.sh
+  build-docker-cpu-aarch64-2_28:
+    runs-on: linux.arm64.2xlarge
+    env:
+      GPU_ARCH_TYPE: cpu-aarch64-2_28
+    steps:
+      - name: Checkout PyTorch
+        uses: actions/checkout@v3
+      - name: Authenticate if WITH_PUSH
+        run: |
+          if [[ "${WITH_PUSH}" == true ]]; then
+            echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
+          fi
+      - name: Build Docker Image
+        run: |
+          manywheel/build_docker.sh
   build-docker-cpu-cxx11-abi:
     runs-on: ubuntu-22.04
     env:
diff --git a/manywheel/Dockerfile_2_28_aarch64 b/manywheel/Dockerfile_2_28_aarch64
new file mode 100644
index 000000000..222d261ef
--- /dev/null
+++ b/manywheel/Dockerfile_2_28_aarch64
@@ -0,0 +1,56 @@
+FROM quay.io/pypa/manylinux_2_28_aarch64 as base
+
+# Graviton needs GCC 10 or above for the build. GCC12 is the default version in almalinux-8.
+ARG GCCTOOLSET_VERSION=11
+
+# Language variabes
+ENV LC_ALL=en_US.UTF-8
+ENV LANG=en_US.UTF-8
+ENV LANGUAGE=en_US.UTF-8
+
+# Installed needed OS packages. This is to support all
+# the binary builds (torch, vision, audio, text, data)
+RUN yum -y install epel-release
+RUN yum -y update
+RUN yum install -y \
+  autoconf \
+  automake \
+  bison \
+  bzip2 \
+  curl \
+  diffutils \
+  file \
+  git \
+  less \
+  libffi-devel \
+  libgomp \
+  make \
+  openssl-devel \
+  patch \
+  perl \
+  unzip \
+  util-linux \
+  wget \
+  which \
+  xz \
+  yasm \
+  zstd \
+  gcc-toolset-${GCCTOOLSET_VERSION}-toolchain
+
+# Ensure the expected devtoolset is used
+ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH
+ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
+
+# git236+ would refuse to run git commands in repos owned by other users
+# Which causes version check to fail, as pytorch repo is bind-mounted into the image
+# Override this behaviour by treating every folder as safe
+# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
+RUN git config --global --add safe.directory "*"
+
+FROM base as final
+
+# remove unncessary python versions
+RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
+RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
+RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
+RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh
index 05f3dad81..5c35d32ae 100755
--- a/manywheel/build_docker.sh
+++ b/manywheel/build_docker.sh
@@ -36,6 +36,14 @@ case ${GPU_ARCH_TYPE} in
         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10"
         MANY_LINUX_VERSION="aarch64"
         ;;
+    cpu-aarch64-2_28)
+        TARGET=final
+        DOCKER_TAG=cpu-aarch64
+        LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux_2_28-cpu-aarch64
+        GPU_IMAGE=arm64v8/almalinux:8
+        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
+        MANY_LINUX_VERSION="2_28_aarch64"
+        ;;
     cpu-cxx11-abi)
         TARGET=final
         DOCKER_TAG=cpu-cxx11-abi

From 455b572ecbc116bc40fa0bdae475d935c6280c58 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Mon, 13 May 2024 16:12:45 -0400
Subject: [PATCH 28/33] Revert "[BE] Remove unused files and dead code" (#1821)

This reverts commit bebc062488523afe62b8ec90ee91455316448406.
---
 .circleci/scripts/binary_checkout.sh     | 61 ++++++++++++++++++++
 .circleci/scripts/binary_populate_env.sh |  1 +
 cron/update_s3_htmls.sh                  | 71 ++++++++++++++++++++++++
 manywheel/build_scripts/ssl-check.py     | 33 +++++++++++
 manywheel/test_wheel.sh                  | 27 +++++++++
 5 files changed, 193 insertions(+)
 create mode 100755 .circleci/scripts/binary_checkout.sh
 create mode 100755 cron/update_s3_htmls.sh
 create mode 100644 manywheel/build_scripts/ssl-check.py
 create mode 100755 manywheel/test_wheel.sh

diff --git a/.circleci/scripts/binary_checkout.sh b/.circleci/scripts/binary_checkout.sh
new file mode 100755
index 000000000..b634f5c9a
--- /dev/null
+++ b/.circleci/scripts/binary_checkout.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# Derived from https://github.com/pytorch/pytorch/blob/2c7df1360aa17d4a6d6726998eede3671bcb36ee/.circleci/scripts/binary_populate_env.sh
+
+set -eux -o pipefail
+
+retry () {
+    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
+}
+
+
+# This step runs on multiple executors with different envfile locations
+if [[ "$OSTYPE" == "msys" ]]; then
+  # windows executor (builds and tests)
+  rm -rf /c/w
+  ln -s "${HOME}" /c/w
+  WORK_DIR="/c/w"
+elif [[ -d "/home/circleci/project" ]]; then
+  # machine executor (binary tests)
+  WORK_DIR="${HOME}/project"
+else
+  # macos executor (builds and tests)
+  # docker executor (binary builds)
+  WORK_DIR="${HOME}"
+fi
+
+if [[ "$OSTYPE" == "msys" ]]; then
+  # We need to make the paths as short as possible on Windows
+  PYTORCH_ROOT="$WORK_DIR/p"
+  BUILDER_ROOT="$WORK_DIR/b"
+else
+  PYTORCH_ROOT="$WORK_DIR/pytorch"
+  BUILDER_ROOT="$WORK_DIR/builder"
+fi
+
+# Persist these variables for the subsequent steps
+echo "export WORK_DIR=${WORK_DIR}" >> ${BASH_ENV}
+echo "export PYTORCH_ROOT=${PYTORCH_ROOT}" >> ${BASH_ENV}
+echo "export BUILDER_ROOT=${BUILDER_ROOT}" >> ${BASH_ENV}
+
+# Clone the Pytorch branch
+retry git clone --depth 1 https://github.com/pytorch/pytorch.git "$PYTORCH_ROOT"
+# Removed checking out pytorch/pytorch using CIRCLE_PR_NUMBER and CIRCLE_SHA1 as
+# those environment variables are tied to the host repo where the build is being
+# triggered.
+retry git submodule update --init --recursive
+pushd "$PYTORCH_ROOT"
+echo "Using Pytorch from "
+git --no-pager log --max-count 1
+popd
+
+# Clone the Builder master repo
+retry git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT"
+pushd "$BUILDER_ROOT"
+if [[ -n "${CIRCLE_SHA1:-}" ]]; then
+  # Check out a specific commit (typically the latest) from pytorch/builder
+  git reset --hard "${CIRCLE_SHA1}"
+  git checkout -q -B main
+fi
+echo "Using builder from "
+git --no-pager log --max-count 1
+popd
diff --git a/.circleci/scripts/binary_populate_env.sh b/.circleci/scripts/binary_populate_env.sh
index 5b141ac38..7e663a64b 100755
--- a/.circleci/scripts/binary_populate_env.sh
+++ b/.circleci/scripts/binary_populate_env.sh
@@ -143,6 +143,7 @@ export BUILD_JNI=$BUILD_JNI
 export PIP_UPLOAD_FOLDER="$PIP_UPLOAD_FOLDER"
 export DOCKER_IMAGE="$DOCKER_IMAGE"
 
+# Remove WORKD_DIR, PYTORCH_ROOT, BUILDER_ROOT defined & persisted in binary_checkout.sh
 export MAC_PACKAGE_WORK_DIR="$WORK_DIR"
 export MINICONDA_ROOT="$WORK_DIR/miniconda"
 export PYTORCH_FINAL_PACKAGE_DIR="$WORK_DIR/final_pkgs"
diff --git a/cron/update_s3_htmls.sh b/cron/update_s3_htmls.sh
new file mode 100755
index 000000000..2dbd172a5
--- /dev/null
+++ b/cron/update_s3_htmls.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+set -e
+
+# Update the html links file in the s3 bucket Pip uses this html file to look
+# through all the wheels and pick the most recently uploaded one (by the
+# version, not the actual date of upload). There is one html file per cuda/cpu
+# version
+
+# Upload for all CUDA/cpu versions if not given one to use
+if [[ -z "$CUDA_VERSIONS" ]]; then
+    export CUDA_VERSIONS=('cpu' 'cu92' 'cu100' 'cu101' 'cu102' 'cu110' 'rocm5.0' 'rocm5.1.1')
+fi
+
+if [[ -z "$HTML_NAME" ]]; then
+    export HTML_NAME='torch_nightly.html'
+fi
+
+# Dry run disabled by default for legacy purposes
+DRY_RUN=${DRY_RUN:-disabled}
+DRY_RUN_FLAG=""
+if [[ "${DRY_RUN}" != disabled ]]; then
+  DRY_RUN_FLAG="--dryrun"
+fi
+
+# NB: includes trailing slash (from PIP_UPLOAD_FOLDER)
+s3_base="s3://pytorch/whl/${PIP_UPLOAD_FOLDER}"
+
+# Pull all existing whls in this directory and turn them into html links
+# N.B. we use the .dev as a hacky way to exclude all wheels with old
+# 'yyyy.mm.dd' versions
+#
+# NB: replacing + with %2B is to fix old versions of pip which don't
+# this transform automatically.  This makes the display a little
+# ugly but whatever
+function generate_html() {
+  # Trailing slash required in both cases
+  dir="$1"
+  url_prefix="$2"
+  aws s3 ls "${s3_base}${dir}" | grep --only-matching '\S*\.whl' | sed 's#+#%2B#g' | sed 's#.*#<a href="'"${url_prefix}"'&">'"${url_prefix}"'&</a><br>#g'
+}
+
+# This will be included in all the sub-indices
+generate_html '' '../' > "root-$HTML_NAME"
+generate_html '' '' > "$HTML_NAME"
+
+for cuda_ver in "${CUDA_VERSIONS[@]}"; do
+    generate_html "${cuda_ver}/" "" > "${cuda_ver}-$HTML_NAME"
+    cat "root-$HTML_NAME" >> "${cuda_ver}-$HTML_NAME"
+    generate_html "${cuda_ver}/" "${cuda_ver}/" >> "$HTML_NAME"
+
+    # Check your work every once in a while
+    echo "Setting ${cuda_ver}/$HTML_NAME to:"
+    cat "${cuda_ver}-$HTML_NAME"
+    (
+      set -x
+      aws s3 cp ${DRY_RUN_FLAG} "${cuda_ver}-$HTML_NAME" "s3://pytorch/whl/${PIP_UPLOAD_FOLDER}${cuda_ver}/$HTML_NAME"  --acl public-read --cache-control 'no-cache,no-store,must-revalidate'
+    )
+
+done
+
+# Check your work every once in a while
+echo "Setting $HTML_NAME to:"
+cat "$HTML_NAME"
+(
+  set -x
+
+  # Upload the html file back up
+  # Note the lack of a / b/c duplicate / do cause problems in s3
+  aws s3 cp ${DRY_RUN_FLAG} "$HTML_NAME" "$s3_base$HTML_NAME"  --acl public-read --cache-control 'no-cache,no-store,must-revalidate'
+)
diff --git a/manywheel/build_scripts/ssl-check.py b/manywheel/build_scripts/ssl-check.py
new file mode 100644
index 000000000..b91927173
--- /dev/null
+++ b/manywheel/build_scripts/ssl-check.py
@@ -0,0 +1,33 @@
+# cf. https://github.com/pypa/manylinux/issues/53
+
+GOOD_SSL = "https://google.com"
+BAD_SSL = "https://self-signed.badssl.com"
+
+import sys
+
+print("Testing SSL certificate checking for Python:", sys.version)
+
+if (sys.version_info[:2] < (2, 7)
+    or sys.version_info[:2] < (3, 4)):
+    print("This version never checks SSL certs; skipping tests")
+    sys.exit(0)
+
+if sys.version_info[0] >= 3:
+    from urllib.request import urlopen
+    EXC = OSError
+else:
+    from urllib import urlopen
+    EXC = IOError
+
+print("Connecting to %s should work" % (GOOD_SSL,))
+urlopen(GOOD_SSL)
+print("...it did, yay.")
+
+print("Connecting to %s should fail" % (BAD_SSL,))
+try:
+    urlopen(BAD_SSL)
+    # If we get here then we failed:
+    print("...it DIDN'T!!!!!11!!1one!")
+    sys.exit(1)
+except EXC:
+    print("...it did, yay.")
\ No newline at end of file
diff --git a/manywheel/test_wheel.sh b/manywheel/test_wheel.sh
new file mode 100755
index 000000000..ada7d93f0
--- /dev/null
+++ b/manywheel/test_wheel.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -e
+
+yum install -y wget git
+
+rm -rf /usr/local/cuda*
+
+# Install Anaconda
+if ! ls /py
+then
+    echo "Miniconda needs to be installed"
+    wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
+    bash ~/miniconda.sh -b -p /py
+else
+    echo "Miniconda is already installed"
+fi
+
+export PATH="/py/bin:$PATH"
+
+# Anaconda token
+if ls /remote/token
+then
+   source /remote/token
+fi
+
+conda install -y conda-build anaconda-client
+

From 85e8b9f6b674e53c459dfd30dcd82ebd482bf5ae Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Tue, 14 May 2024 10:46:15 -0400
Subject: [PATCH 29/33] Add manylinux_2_28 cuda docker images (#1820)

---
 .github/workflows/build-manywheel-images.yml | 21 ++++++++++++++++++++
 manywheel/build_docker.sh                    |  8 ++++++++
 2 files changed, 29 insertions(+)

diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index d88fcbedd..333f3de5d 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -60,6 +60,27 @@ jobs:
       - name: Build Docker Image
         run: |
           manywheel/build_docker.sh
+  build-docker-cuda-manylinux_2_28:
+    runs-on: linux.12xlarge.ephemeral
+    strategy:
+      matrix:
+        cuda_version: ["12.4", "12.1", "11.8"]
+    env:
+      GPU_ARCH_TYPE: cuda-manylinux_2_28
+      GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
+    steps:
+      - name: Purge tools folder (free space for build)
+        run: rm -rf /opt/hostedtoolcache
+      - name: Checkout PyTorch builder
+        uses: actions/checkout@v3
+      - name: Authenticate if WITH_PUSH
+        run: |
+          if [[ "${WITH_PUSH}" == true ]]; then
+            echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
+          fi
+      - name: Build Docker Image
+        run: |
+          manywheel/build_docker.sh
   build-docker-cuda-aarch64:
     runs-on: linux.arm64.2xlarge
     strategy:
diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh
index 5c35d32ae..819a4a003 100755
--- a/manywheel/build_docker.sh
+++ b/manywheel/build_docker.sh
@@ -68,6 +68,14 @@ case ${GPU_ARCH_TYPE} in
         GPU_IMAGE=centos:7
         DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=9"
         ;;
+    cuda-manylinux_2_28)
+        TARGET=cuda_final
+        DOCKER_TAG=cuda${GPU_ARCH_VERSION}
+        LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux_2_28-cuda${GPU_ARCH_VERSION//./}
+        GPU_IMAGE=amd64/almalinux:8
+        DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
+        MANY_LINUX_VERSION="2_28"
+        ;;
     cuda-aarch64)
         TARGET=cuda_final
         DOCKER_TAG=cuda${GPU_ARCH_VERSION}

From 06ca2923b9c1d5bcac1bb4b05f7787749a997927 Mon Sep 17 00:00:00 2001
From: Andrey Talman <atalman@fb.com>
Date: Fri, 17 May 2024 13:38:39 -0400
Subject: [PATCH 30/33] [Validations] Turn off CUDA exception catch test
 (#1825)

---
 .github/scripts/validate_binaries.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
index bf5c15690..835c16c63 100755
--- a/.github/scripts/validate_binaries.sh
+++ b/.github/scripts/validate_binaries.sh
@@ -62,7 +62,7 @@ else
     if [[ ${TARGET_OS} == 'windows' ]]; then
         python  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX}
     else
-        python3  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX}
+        python3  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check disabled
     fi
 
     if [[ ${TARGET_OS} == 'macos-arm64' ]]; then

From 4116508f78c9f47dc89d0faae164e23cd6859138 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Sun, 19 May 2024 23:16:06 -0700
Subject: [PATCH 31/33] test with linker script enabled

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 2444429cb..298525144 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -199,7 +199,7 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    build_vars = "MAX_JOBS=5 USE_PRIORITIZED_TEXT_FOR_LD=1 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")

From d1baef5ed76ad2e86b76d2b51e3600b42056fb1c Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Mon, 20 May 2024 00:13:30 -0700
Subject: [PATCH 32/33] reapply acl version 24.04 as git history is messed

---
 aarch64_linux/aarch64_wheel_ci_build.py | 2 +-
 aarch64_linux/build_aarch64_wheel.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index 298525144..e9bd7df62 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -78,7 +78,7 @@ def build_ArmComputeLibrary() -> None:
             "clone",
             "https://github.com/ARM-software/ComputeLibrary.git",
             "-b",
-            "v23.08",
+            "v24.04",
             "--depth",
             "1",
             "--shallow-submodules",
diff --git a/aarch64_linux/build_aarch64_wheel.py b/aarch64_linux/build_aarch64_wheel.py
index 0ff286ad2..3956f0463 100755
--- a/aarch64_linux/build_aarch64_wheel.py
+++ b/aarch64_linux/build_aarch64_wheel.py
@@ -229,7 +229,7 @@ def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None
     print('Building Arm Compute Library')
     acl_build_flags=" ".join(["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
                               "arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"])
-    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}")
+    host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v24.04 {git_clone_flags}")
     host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
 
 

From d7ffad81c370f2ae484f0b99f2a4454eff547ff8 Mon Sep 17 00:00:00 2001
From: Ting Lu <tingl@nvidia.com>
Date: Mon, 20 May 2024 06:37:40 -0700
Subject: [PATCH 33/33] Use export USE_PRIORITIZED_TEXT_FOR_LD=1 instead of
 command line

---
 aarch64_linux/aarch64_wheel_ci_build.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py
index e9bd7df62..6739d24de 100755
--- a/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/aarch64_linux/aarch64_wheel_ci_build.py
@@ -199,7 +199,8 @@ def parse_arguments():
         branch = "master"
 
     print("Building PyTorch wheel")
-    build_vars = "MAX_JOBS=5 USE_PRIORITIZED_TEXT_FOR_LD=1 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
+    os.system("export USE_PRIORITIZED_TEXT_FOR_LD=1")
+    build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
     os.system("cd /pytorch; python setup.py clean")
 
     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")