pytorch
diff --git a/‎.github/scripts/validate_binaries.sh
Lines changed: 1 addition & 1 deletion b/‎.github/scripts/validate_binaries.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-manywheel-images.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/build-manywheel-images.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/validate_docker_images.yml renamed to ‎.github/workflows/validate-docker-images.yml
Lines changed: 37 additions & 6 deletions b/‎.github/workflows/validate_docker_images.yml renamed to ‎.github/workflows/validate-docker-images.yml
Lines changed: 37 additions & 6 deletions
diff --git a/‎aarch64_linux/aarch64_wheel_ci_build.py
Lines changed: 11 additions & 46 deletions b/‎aarch64_linux/aarch64_wheel_ci_build.py
Lines changed: 11 additions & 46 deletions
diff --git a/‎analytics/validate_pypi_staging.py
Lines changed: 11 additions & 4 deletions b/‎analytics/validate_pypi_staging.py
Lines changed: 11 additions & 4 deletions
diff --git a/‎common/aotriton_version.txt
Lines changed: 5 additions & 0 deletions b/‎common/aotriton_version.txt
Lines changed: 5 additions & 0 deletions
diff --git a/‎common/install_aotriton.sh
Lines changed: 21 additions & 0 deletions b/‎common/install_aotriton.sh
Lines changed: 21 additions & 0 deletions
diff --git a/‎common/install_cuda.sh
Lines changed: 17 additions & 15 deletions b/‎common/install_cuda.sh
Lines changed: 17 additions & 15 deletions
diff --git a/‎common/install_cuda_aarch64.sh
Lines changed: 5 additions & 5 deletions b/‎common/install_cuda_aarch64.sh
Lines changed: 5 additions & 5 deletions
diff --git a/‎common/install_openblas.sh
Lines changed: 21 additions & 0 deletions b/‎common/install_openblas.sh
Lines changed: 21 additions & 0 deletions
@@ -62,7 +62,7 @@ else
     if [[ ${TARGET_OS} == 'windows' ]]; then
         python  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX}
     else
-        python3  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX}
+        python3  ./test/smoke_test/smoke_test.py ${TEST_SUFFIX} --runtime-error-check "disabled"
     fi
 
     if [[ ${TARGET_OS} == 'macos-arm64' ]]; then
 
@@ -60,6 +60,7 @@ jobs:
       - name: Build Docker Image
         run: |
           manywheel/build_docker.sh
+  # NOTE: manylinux_2_28 are still experimental, see https://github.com/pytorch/pytorch/issues/123649
   build-docker-cuda-manylinux_2_28:
     runs-on: linux.12xlarge
     strategy:
 
@@ -1,5 +1,22 @@
-name: Validate Docker Images (with Matrix Generation)
+name: Validate Nightly Docker Images
 on:
+  workflow_call:
+    inputs:
+      channel:
+        description: 'PyTorch channel to use (nightly, test, release, all)'
+        required: true
+        type: string
+        default: 'nightly'
+      generate_dockerhub_images:
+        description: 'Generate Docker Hub images (strip ghcr.io/ prefix for release)'
+        default: false
+        required: false
+        type: boolean
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
   workflow_dispatch:
     inputs:
       channel:
@@ -15,8 +32,13 @@ on:
         description: 'Generate Docker Hub images (strip ghcr.io/ prefix for release)'
         default: false
         required: false
-        type: boolean     
-    
+        type: boolean
+      ref:
+        description: 'Reference to checkout, defaults to empty'
+        default: ""
+        required: false
+        type: string
+
 jobs:
   generate-matrix:
     uses: pytorch/test-infra/.github/workflows/generate_docker_release_matrix.yml@main
@@ -31,7 +53,7 @@ jobs:
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     with:
-      runner: linux.g5.4xlarge.nvidia.gpu
+      runner: ${{ matrix.validation_runner }}
       repository: "pytorch/builder"
       ref: ${{ inputs.ref || github.ref }}
       job-name: cuda${{ matrix.cuda }}-cudnn${{ matrix.cudnn_version }}-${{ matrix.image_type }}
@@ -40,7 +62,16 @@ jobs:
       timeout: 180
       script: |
         set -ex
-        export MATRIX_GPU_ARCH_TYPE="cuda"
+        
         export MATRIX_GPU_ARCH_VERSION="${{ matrix.cuda }}"
+        export MATRIX_IMAGE_TYPE="${{ matrix.image_type }}"
         export TARGET_OS="linux"
-        python test/smoke_test/smoke_test.py --package torchonly --runtime-error-check enabled
+        TORCH_COMPILE_CHECK="--torch-compile-check enabled"
+        if [[ ${MATRIX_IMAGE_TYPE} == "runtime" ]]; then
+          TORCH_COMPILE_CHECK="--torch-compile-check disabled"
+        fi
+        export MATRIX_GPU_ARCH_TYPE="cuda"
+        if [[ ${MATRIX_GPU_ARCH_VERSION} == "cpu" ]]; then 
+          export MATRIX_GPU_ARCH_TYPE="cpu"
+        fi
+        python test/smoke_test/smoke_test.py --package torchonly --runtime-error-check disabled ${TORCH_COMPILE_CHECK}
@@ -14,44 +14,6 @@ def list_dir(path: str) -> List[str]:
     """
     return check_output(["ls", "-1", path]).decode().split("\n")
 
-
-def build_OpenBLAS() -> None:
-    '''
-    Building OpenBLAS, because the package in many linux is old
-    '''
-    print('Building OpenBLAS')
-    openblas_build_flags = [
-        "NUM_THREADS=128",
-        "USE_OPENMP=1",
-        "NO_SHARED=0",
-        "DYNAMIC_ARCH=1",
-        "TARGET=ARMV8",
-        "CFLAGS=-O3",
-    ]
-    openblas_checkout_dir = "OpenBLAS"
-
-    check_call(
-        [
-            "git",
-            "clone",
-            "https://github.com/OpenMathLib/OpenBLAS.git",
-            "-b",
-            "v0.3.25",
-            "--depth",
-            "1",
-            "--shallow-submodules",
-        ]
-    )
-
-    check_call(["make", "-j8"]
-                + openblas_build_flags,
-                cwd=openblas_checkout_dir)
-    check_call(["make", "-j8"]
-                + openblas_build_flags
-                + ["install"],
-                cwd=openblas_checkout_dir)
-
-
 def build_ArmComputeLibrary() -> None:
     """
     Using ArmComputeLibrary for aarch64 PyTorch
@@ -103,7 +65,7 @@ def update_wheel(wheel_path) -> None:
     os.system(f"unzip {wheel_path} -d {folder}/tmp")
     libs_to_copy = [
         "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
-        "/usr/local/cuda/lib64/libcudnn.so.8",
+        "/usr/local/cuda/lib64/libcudnn.so.9",
         "/usr/local/cuda/lib64/libcublas.so.12",
         "/usr/local/cuda/lib64/libcublasLt.so.12",
         "/usr/local/cuda/lib64/libcudart.so.12",
@@ -116,12 +78,13 @@ def update_wheel(wheel_path) -> None:
         "/usr/local/cuda/lib64/libnvJitLink.so.12",
         "/usr/local/cuda/lib64/libnvrtc.so.12",
         "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4",
-        "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8",
-        "/usr/local/cuda/lib64/libcudnn_adv_train.so.8",
-        "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8",
-        "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8",
-        "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8",
-        "/usr/local/cuda/lib64/libcudnn_ops_train.so.8",
+        "/usr/local/cuda/lib64/libcudnn_adv.so.9",
+        "/usr/local/cuda/lib64/libcudnn_cnn.so.9",
+        "/usr/local/cuda/lib64/libcudnn_graph.so.9",
+        "/usr/local/cuda/lib64/libcudnn_ops.so.9",
+        "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9",
+        "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9",
+        "/usr/local/cuda/lib64/libcudnn_heuristic.so.9",
         "/opt/conda/envs/aarch64_env/lib/libgomp.so.1",
         "/opt/OpenBLAS/lib/libopenblas.so.0",
         "/acl/build/libarm_compute.so",
@@ -134,6 +97,9 @@ def update_wheel(wheel_path) -> None:
     os.system(
         f"cd {folder}/tmp/torch/lib/; patchelf --set-rpath '$ORIGIN' {folder}/tmp/torch/lib/libtorch_cuda.so"
     )
+    os.system(
+        f"cd {folder}/tmp/torch/lib/; patchelf --set-rpath '$ORIGIN' {folder}/tmp/torch/lib/libcudnn_graph.so.9"
+    )
     os.mkdir(f"{folder}/cuda_wheel")
     os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
     shutil.move(
@@ -227,7 +193,6 @@ def parse_arguments():
     elif branch.startswith(("v1.", "v2.")):
         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
 
-    build_OpenBLAS()
     if enable_mkldnn:
         build_ArmComputeLibrary()
         print("build pytorch with mkldnn+acl backend")
 
@@ -15,13 +15,20 @@
     "win_amd64",
     "macosx_11_0_arm64",
 ]
-PYTHON_VERSIONS = ["cp38", "cp39", "cp310", "cp311", "cp312"]
+PYTHON_VERSIONS = [
+    "cp38", 
+    "cp39", 
+    "cp310", 
+    "cp311", 
+    "cp312"
+    ]
 S3_PYPI_STAGING = "pytorch-backup"
 PACKAGE_RELEASES = {
-    "torch": "2.3.0",
-    "torchvision": "0.18.0",
-    "torchaudio": "2.3.0",
+    "torch": "2.3.1",
+    "torchvision": "0.18.1",
+    "torchaudio": "2.3.1",
     "torchtext": "0.18.0",
+    "executorch": "0.2.1"
 }
 
 PATTERN_V = "Version:"
 
@@ -0,0 +1,5 @@
+0.6b
+manylinux_2_17
+rocm6
+04b5df8c8123f90cba3ede7e971e6fbc6040d506
+3db6ecbc915893ff967abd6e1b43bd5f54949868873be60dc802086c3863e648
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -ex
+
+TARBALL='aotriton.tar.bz2'
+# This read command alwasy returns with exit code 1
+read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true
+ARCH=$(uname -m)
+AOTRITON_INSTALL_PREFIX="$1"
+AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}.tar.bz2"
+
+cd "${AOTRITON_INSTALL_PREFIX}"
+# Must use -L to follow redirects
+curl -L --retry 3 -o "${TARBALL}" "${AOTRITON_URL}"
+ACTUAL_SHA256=$(sha256sum "${TARBALL}" | cut -d " " -f 1)
+if [ "${SHA256}" != "${ACTUAL_SHA256}" ]; then
+  echo -n "Error: The SHA256 of downloaded tarball is ${ACTUAL_SHA256},"
+  echo " which does not match the expected value ${SHA256}."
+  exit
+fi
+tar xf "${TARBALL}" && rm -rf "${TARBALL}"
@@ -2,6 +2,8 @@
 
 set -ex
 
+CUDNN_VERSION=9.1.0.70
+
 function install_cusparselt_040 {
     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
     mkdir tmp_cusparselt && pushd tmp_cusparselt
@@ -25,7 +27,7 @@ function install_cusparselt_052 {
 }
 
 function install_118 {
-    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.4.0"
+    echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL 2.15 and cuSparseLt-0.4.0"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
@@ -36,10 +38,10 @@ function install_118 {
 
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
     mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/
+    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
+    tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_cudnn
 
@@ -58,7 +60,7 @@ function install_118 {
 }
 
 function install_121 {
-    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
+    echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL 2.20.5 and cuSparseLt-0.5.2"
     rm -rf /usr/local/cuda-12.1 /usr/local/cuda
     # install CUDA 12.1.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
@@ -69,10 +71,10 @@ function install_121 {
 
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
     mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
+    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+    tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_cudnn
 
@@ -91,7 +93,7 @@ function install_121 {
 }
 
 function install_124 {
-  echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
+  echo "Installing CUDA 12.4 and cuDNN ${CUDNN_VERSION} and NCCL 2.20.5 and cuSparseLt-0.5.2"
   rm -rf /usr/local/cuda-12.4 /usr/local/cuda
   # install CUDA 12.4.0 in the same container
   wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run
@@ -102,10 +104,10 @@ function install_124 {
 
   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
   mkdir tmp_cudnn && cd tmp_cudnn
-  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-  tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-  cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
-  cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
+  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
   cd ..
   rm -rf tmp_cudnn
 
 
@@ -14,7 +14,7 @@ function install_cusparselt_052 {
 }
 
 function install_124 {
-  echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
+  echo "Installing CUDA 12.4 and cuDNN 9.1 and NCCL 2.20.5 and cuSparseLt-0.5.2"
   rm -rf /usr/local/cuda-12.4 /usr/local/cuda
   # install CUDA 12.4.0 in the same container
   wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux_sbsa.run
@@ -25,10 +25,10 @@ function install_124 {
 
   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
   mkdir tmp_cudnn && cd tmp_cudnn
-  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz
-  tar xf cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz
-  cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
-  cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
+  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz -O cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz
+  tar xf cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz
+  cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/include/* /usr/local/cuda/include/
+  cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/lib/* /usr/local/cuda/lib64/
   cd ..
   rm -rf tmp_cudnn
 
 
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -ex
+
+cd /
+git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.25 --depth 1 --shallow-submodules
+
+
+OPENBLAS_BUILD_FLAGS="
+NUM_THREADS=128
+USE_OPENMP=1
+NO_SHARED=0
+DYNAMIC_ARCH=1
+TARGET=ARMV8
+CFLAGS=-O3
+"
+
+OPENBLAS_CHECKOUT_DIR="OpenBLAS"
+
+make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR}
+make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR}