From 68b93b9f0175e9f6cab8e8b86854fb453dec90e3 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Mon, 14 Nov 2022 22:49:19 +0000 Subject: [PATCH 01/10] Enable ROCm5.3 nightly wheels --- .github/workflows/build-libtorch-images.yml | 2 +- .github/workflows/build-manywheel-images.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml index f866b27ec..e5a19b560 100644 --- a/.github/workflows/build-libtorch-images.yml +++ b/.github/workflows/build-libtorch-images.yml @@ -47,7 +47,7 @@ jobs: runs-on: ubuntu-18.04 strategy: matrix: - rocm_version: ["5.1.1", "5.2"] + rocm_version: ["5.2", "5.3"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index 27f0da234..c0ce4c8e8 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -47,7 +47,7 @@ jobs: runs-on: ubuntu-18.04 strategy: matrix: - rocm_version: ["5.1.1", "5.2"] + rocm_version: ["5.2", "5.3"] env: GPU_ARCH_TYPE: rocm GPU_ARCH_VERSION: ${{ matrix.rocm_version }} From e278838ea21efcf1e53a59731ba50daf2ed428b3 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Mon, 14 Nov 2022 23:01:12 +0000 Subject: [PATCH 02/10] Enable ROCm5.3 docker builds --- libtorch/Dockerfile | 8 ++++---- libtorch/build_all_docker.sh | 2 +- manywheel/build_all_docker.sh | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile index 80b081e0e..aaa7b6d6b 100644 --- a/libtorch/Dockerfile +++ b/libtorch/Dockerfile @@ -77,13 +77,13 @@ RUN apt-get update -y && \ apt-get install python -y && \ apt-get clean -FROM rocm as rocm5.1.1 -RUN ROCM_VERSION=5.1.1 bash ./install_rocm.sh && rm install_rocm.sh +FROM rocm as rocm5.2 +RUN ROCM_VERSION=5.2 bash ./install_rocm.sh && rm install_rocm.sh RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh -FROM rocm as rocm5.2 -RUN ROCM_VERSION=5.2 bash ./install_rocm.sh && rm install_rocm.sh +FROM rocm as rocm5.3 +RUN ROCM_VERSION=5.3 bash ./install_rocm.sh && rm install_rocm.sh RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh diff --git a/libtorch/build_all_docker.sh b/libtorch/build_all_docker.sh index 5703ca41c..efc1d033f 100755 --- a/libtorch/build_all_docker.sh +++ b/libtorch/build_all_docker.sh @@ -8,6 +8,6 @@ for cuda_version in 11.7 11.6 11.5 11.3 10.2; do GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh" done -for rocm_version in 5.1.1 5.2; do +for rocm_version in 5.2 5.3; do GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/libtorch/build_docker.sh" done diff --git a/manywheel/build_all_docker.sh b/manywheel/build_all_docker.sh index d50eea49d..25f1c550c 100644 --- a/manywheel/build_all_docker.sh +++ b/manywheel/build_all_docker.sh @@ -14,7 +14,7 @@ for cuda_version in 11.5 11.3 10.2; do MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh" done -for rocm_version in 5.1.1 5.2; do +for rocm_version in 5.2 5.3; do GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" MANYLINUX_VERSION=2014 GPU_ARCH_TYPE=rocm GPU_ARCH_VERSION="${rocm_version}" "${TOPDIR}/manywheel/build_docker.sh" done From 73e280a5439362ed7b95065b11ead3b65f282db5 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Thu, 27 Oct 2022 05:08:24 +0000 Subject: [PATCH 03/10] Update amdgpu repo url for ROCm5.3 --- common/install_rocm.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/common/install_rocm.sh b/common/install_rocm.sh index a69d1767c..6fca6cfa1 100644 --- a/common/install_rocm.sh +++ b/common/install_rocm.sh @@ -33,7 +33,12 @@ install_ubuntu() { if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then # Add amdgpu repository UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'` - local amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu" + local amdgpu_baseurl + if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then + amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu" + else + amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/ubuntu" + fi echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list fi @@ -83,7 +88,12 @@ install_centos() { if [[ $(ver $ROCM_VERSION) -ge $(ver 4.5) ]]; then # Add amdgpu repository - local amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64" + local amdgpu_baseurl + if [[ $(ver $ROCM_VERSION) -ge $(ver 5.3) ]]; then + amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64" + else + amdgpu_baseurl="https://repo.radeon.com/amdgpu/${AMDGPU_VERSIONS[$ROCM_VERSION]}/rhel/7.9/main/x86_64" + fi echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo From 2f49c3a0532a9a0dfd6eb31e2480589ef591ac2e Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 16 Nov 2022 00:00:51 +0000 Subject: [PATCH 04/10] ROCm5.3 not supported on Ubuntu 18.04 --- libtorch/build_docker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libtorch/build_docker.sh b/libtorch/build_docker.sh index fe441bb9a..bbf42b1d0 100755 --- a/libtorch/build_docker.sh +++ b/libtorch/build_docker.sh @@ -27,7 +27,7 @@ case ${GPU_ARCH_TYPE} in rocm) BASE_TARGET=rocm${GPU_ARCH_VERSION} DOCKER_TAG=rocm${GPU_ARCH_VERSION} - GPU_IMAGE=rocm/dev-ubuntu-18.04:${GPU_ARCH_VERSION} + GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION} PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908" ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then From 241f059b1a82e294e71048c7bde9d66e3901630a Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 16 Nov 2022 23:04:35 +0000 Subject: [PATCH 05/10] empty From 5a1d7444608217e79bd1e92dd36d70398f69af27 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Thu, 17 Nov 2022 07:15:26 +0000 Subject: [PATCH 06/10] Another empty commit From c1a5043cd9cb4db15ee0f34b757b7481f2761668 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Sun, 20 Nov 2022 08:50:33 +0000 Subject: [PATCH 07/10] Try disabling MLIR build to shorten docker build time --- common/install_miopen.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 9f0759e84..15f7bdcf3 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -88,6 +88,11 @@ fi git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH} pushd MIOpen +# Don't build MLIR to save docker build time +# since we are disabling MLIR backend for MIOpen anyway +if [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then + sed -i '/rocMLIR/d' requirements.txt +fi ## MIOpen minimum requirements cmake -P install_deps.cmake --minimum ## Build MIOpen From 2e964684d4fb693894bff299deb77c612ac76b9d Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Tue, 22 Nov 2022 19:48:03 +0000 Subject: [PATCH 08/10] Clean up disk space --- common/install_miopen.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 15f7bdcf3..6ae8e4145 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -104,12 +104,15 @@ PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang -DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}" make MIOpen -j $(nproc) make -j $(nproc) package -yum install -y miopen-*.rpm -popd -rm -rf MIOpen -# Cleanup +# clean up since CI runner was running out of disk space +rm -rf /usr/local/cget +rm -rf /tmp/* yum clean all rm -rf /var/cache/yum rm -rf /var/lib/yum/yumdb rm -rf /var/lib/yum/history + +yum install -y miopen-*.rpm +popd +rm -rf MIOpen From a029dd8664d4859b60a66ea81caedb21282b13c9 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Tue, 22 Nov 2022 20:16:08 +0000 Subject: [PATCH 09/10] MLIR project changed names from ROCm5.4 --- common/install_miopen.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common/install_miopen.sh b/common/install_miopen.sh index 6ae8e4145..92ffb35dc 100644 --- a/common/install_miopen.sh +++ b/common/install_miopen.sh @@ -90,8 +90,10 @@ git clone https://github.com/ROCmSoftwarePlatform/MIOpen -b ${MIOPEN_BRANCH} pushd MIOpen # Don't build MLIR to save docker build time # since we are disabling MLIR backend for MIOpen anyway -if [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then +if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then sed -i '/rocMLIR/d' requirements.txt +elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then + sed -i '/llvm-project-mlir/d' requirements.txt fi ## MIOpen minimum requirements cmake -P install_deps.cmake --minimum From 4c63c8caf629d2dddc724253053e28277bce6181 Mon Sep 17 00:00:00 2001 From: Jithun Nair Date: Wed, 23 Nov 2022 00:07:29 +0000 Subject: [PATCH 10/10] Retrigger CI to get around flaky magma git access error