Skip to content

Switch DESIRED_CUDA -> GPU_ARCH_{TYPE,VERSION} #1113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions check_binary.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ set -eux -o pipefail
# 8. Magma is available for CUDA builds
# 9. CuDNN is available for CUDA builds
#
# This script needs the env variables DESIRED_PYTHON, DESIRED_CUDA,
# This script needs the env variables DESIRED_PYTHON, GPU_ARCH_VERSION
# DESIRED_DEVTOOLSET and PACKAGE_TYPE
#
# This script expects PyTorch to be installed into the active Python (the
Expand All @@ -38,14 +38,9 @@ else
install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/"
fi

if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then
# cu90, cu92, cu100, cu101
if [[ ${#DESIRED_CUDA} -eq 4 ]]; then
CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}"
elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then
CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}"
fi
echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"
if [[ "$GPU_ARCH_TYPE" = 'cuda' ]]; then
CUDA_VERSION=${GPU_ARCH_VERSION}
echo "Using CUDA $CUDA_VERSION as determined by GPU_ARCH_VERSION"

# Switch `/usr/local/cuda` to the desired CUDA version
rm -rf /usr/local/cuda || true
Expand Down Expand Up @@ -366,7 +361,7 @@ if [[ "$OSTYPE" == "msys" ]]; then
fi

# Test that CUDA builds are setup correctly
if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then
if [[ "$GPU_ARCH_TYPE" == 'cuda' ]]; then
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
build_and_run_example_cpp check-torch-cuda
else
Expand Down
6 changes: 6 additions & 0 deletions conda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,26 +43,32 @@ RUN bash ./install_conda.sh && rm install_conda.sh
FROM base as cuda
RUN rm -rf /usr/local/cuda-*
ADD ./common/install_cuda.sh install_cuda.sh
ENV GPU_ARCH_TYPE=cuda

FROM cuda as cuda10.2
RUN bash ./install_cuda.sh 10.2
ENV DESIRED_CUDA=10.2
ENV GPU_ARCH_TYPE=10.2

FROM cuda as cuda11.3
RUN bash ./install_cuda.sh 11.3
ENV DESIRED_CUDA=11.3
ENV GPU_ARCH_TYPE=11.3

FROM cuda as cuda11.5
RUN bash ./install_cuda.sh 11.5
ENV DESIRED_CUDA=11.5
ENV GPU_ARCH_TYPE=11.5

FROM cuda as cuda11.6
RUN bash ./install_cuda.sh 11.6
ENV DESIRED_CUDA=11.6
ENV GPU_ARCH_TYPE=11.6

FROM cuda as cuda11.7
RUN bash ./install_cuda.sh 11.7
ENV DESIRED_CUDA=11.7
ENV GPU_ARCH_TYPE=11.7

# Install MNIST test data
FROM base as mnist
Expand Down
1 change: 0 additions & 1 deletion conda/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ docker push pytorch/conda-builder
# building pytorch
docker run --rm -it \
-e PACKAGE_TYPE=conda \
-e DESIRED_CUDA=cu92 \
-e DESIRED_PYTHON=3.8 \
-e PYTORCH_BUILD_VERSION=1.5.0 \
-e PYTORCH_BUILD_NUMBER=1 \
Expand Down
2 changes: 1 addition & 1 deletion conda/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
# TODO: Remove this once we fully move binary builds on master to GHA

SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
DESIRED_CUDA=${DESIRED_CUDA:-cpu} bash ${SCRIPTPATH}/build_pytorch.sh
bash ${SCRIPTPATH}/build_pytorch.sh
10 changes: 6 additions & 4 deletions conda/build_pytorch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ retry () {

# Parse arguments and determmine version
###########################################################
if [[ -n "$DESIRED_CUDA" && -n "$PYTORCH_BUILD_VERSION" && -n "$PYTORCH_BUILD_NUMBER" ]]; then
desired_cuda="$DESIRED_CUDA"
if [[ -n "$GPU_ARCH_VERSION" && -n "$PYTORCH_BUILD_VERSION" && -n "$PYTORCH_BUILD_NUMBER" ]]; then
desired_cuda="$GPU_ARCH_VERSION"
build_version="$PYTORCH_BUILD_VERSION"
build_number="$PYTORCH_BUILD_NUMBER"
else
Expand All @@ -49,9 +49,11 @@ else
build_version="$2"
build_number="$3"
fi
if [[ "$desired_cuda" != cpu ]]; then
desired_cuda="$(echo $desired_cuda | tr -d cuda. )"

if [[ $desired_cuda = "" ]]; then
desired_cuda="cpu"
fi

echo "Building cuda version $desired_cuda and pytorch version: $build_version build_number: $build_number"

if [[ "$OSTYPE" == "msys" ]]; then
Expand Down
9 changes: 6 additions & 3 deletions manywheel/build_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ else
export LLVM_DIR="$USE_LLVM/lib/cmake/llvm"
fi

if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
if [[ "$GPU_ARCH_TYPE" = "rocm" ]]; then
echo "Calling build_amd.py at $(date)"
python tools/amd_build/build_amd.py
fi
Expand Down Expand Up @@ -326,7 +326,7 @@ for pkg in /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip;
fi

# ROCm workaround for roctracer dlopens
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
if [[ "${GPU_ARCH_TYPE}" = "rocm" ]]; then
patchedpath=$(fname_without_so_number $destpath)
else
patchedpath=$(fname_with_sha256 $destpath)
Expand Down Expand Up @@ -459,7 +459,10 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
echo "$(date) :: Running tests"
pushd "$PYTORCH_ROOT"
LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
"${SOURCE_DIR}/../run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
PACAKGE_TYPE=manywheel \
DESIRED_PYTHON="${py_majmin}" \
GPU_ARCH_TYPE=${GPU_ARCH_TYPE} \
"${SOURCE_DIR}/../run_tests.sh"
popd
echo "$(date) :: Finished tests"
fi
36 changes: 2 additions & 34 deletions manywheel/build_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,37 +24,9 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
EXTRA_CAFFE2_CMAKE_FLAGS=()
fi

# Determine CUDA version and architectures to build for
#
# NOTE: We should first check `DESIRED_CUDA` when determining `CUDA_VERSION`,
# because in some cases a single Docker image can have multiple CUDA versions
# on it, and `nvcc --version` might not show the CUDA version we want.
if [[ -n "$DESIRED_CUDA" ]]; then
# If the DESIRED_CUDA already matches the format that we expect
if [[ ${DESIRED_CUDA} =~ ^[0-9]+\.[0-9]+$ ]]; then
CUDA_VERSION=${DESIRED_CUDA}
else
# cu90, cu92, cu100, cu101
if [[ ${#DESIRED_CUDA} -eq 4 ]]; then
CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}"
elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then
CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}"
fi
fi
echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"

# There really has to be a better way to do this - eli
# Possibly limiting builds to specific cuda versions be delimiting images would be a choice
if [[ "$OS_NAME" == *"Ubuntu"* ]]; then
echo "Switching to CUDA version $desired_cuda"
/builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}"
fi
else
CUDA_VERSION=$(nvcc --version|grep release|cut -f5 -d" "|cut -f1 -d",")
echo "CUDA $CUDA_VERSION Detected"
fi

cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
CUDA_VERSION="${GPU_ARCH_VERSION:-}"
cuda_version_nodot=$(echo "${CUDA_VERSION}" | tr -d '.')

TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0"
case ${CUDA_VERSION} in
Expand All @@ -63,7 +35,6 @@ case ${CUDA_VERSION} in
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
;;
10.*)
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
;;
*)
Expand Down Expand Up @@ -273,9 +244,6 @@ else
exit 1
fi

# builder/test.sh requires DESIRED_CUDA to know what tests to exclude
export DESIRED_CUDA="$cuda_version_nodot"

# Switch `/usr/local/cuda` to the desired CUDA version
rm -rf /usr/local/cuda || true
ln -s "/usr/local/cuda-${CUDA_VERSION}" /usr/local/cuda
Expand Down
2 changes: 1 addition & 1 deletion manywheel/build_libtorch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ else
export LLVM_DIR="$USE_LLVM/lib/cmake/llvm"
fi

if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
if [[ "$GPU_ARCH_TYPE" = "rocm" ]]; then
echo "Calling build_amd.py at $(date)"
python tools/amd_build/build_amd.py
# TODO remove this work-around once pytorch sources are updated
Expand Down
15 changes: 2 additions & 13 deletions manywheel/build_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,8 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
fi

# Determine ROCm version and architectures to build for
#
# NOTE: We should first check `DESIRED_CUDA` when determining `ROCM_VERSION`
if [[ -n "$DESIRED_CUDA" ]]; then
if ! echo "${DESIRED_CUDA}"| grep "^rocm" >/dev/null 2>/dev/null; then
export DESIRED_CUDA="rocm${DESIRED_CUDA}"
fi
# rocm3.7, rocm3.5.1
ROCM_VERSION="$DESIRED_CUDA"
echo "Using $ROCM_VERSION as determined by DESIRED_CUDA"
else
echo "Must set DESIRED_CUDA"
exit 1
fi
ROCM_VERSION="$GPU_ARCH_VERSION"
echo "Using $ROCM_VERSION as determined by GPU_ARCH_VERSION"

# Package directories
WHEELHOUSE_DIR="wheelhouse$ROCM_VERSION"
Expand Down
Loading