diff --git a/check_binary.sh b/check_binary.sh
index 153fca745..f38900996 100755
--- a/check_binary.sh
+++ b/check_binary.sh
@@ -13,7 +13,7 @@ set -eux -o pipefail
 # 8. Magma is available for CUDA builds
 # 9. CuDNN is available for CUDA builds
 #
-# This script needs the env variables DESIRED_PYTHON, DESIRED_CUDA,
+# This script needs the env variables DESIRED_PYTHON, GPU_ARCH_VERSION
 # DESIRED_DEVTOOLSET and PACKAGE_TYPE
 #
 # This script expects PyTorch to be installed into the active Python (the
@@ -38,14 +38,9 @@ else
   install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/"
 fi
 
-if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then
-  # cu90, cu92, cu100, cu101
-  if [[ ${#DESIRED_CUDA} -eq 4 ]]; then
-    CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}"
-  elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then
-    CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}"
-  fi
-  echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"
+if [[ "$GPU_ARCH_TYPE" = 'cuda' ]]; then
+  CUDA_VERSION=${GPU_ARCH_VERSION}
+  echo "Using CUDA $CUDA_VERSION as determined by GPU_ARCH_VERSION"
 
   # Switch `/usr/local/cuda` to the desired CUDA version
   rm -rf /usr/local/cuda || true
@@ -366,7 +361,7 @@ if [[ "$OSTYPE" == "msys" ]]; then
 fi
 
 # Test that CUDA builds are setup correctly
-if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != *"rocm"* ]]; then
+if [[ "$GPU_ARCH_TYPE" == 'cuda' ]]; then
   if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
     build_and_run_example_cpp check-torch-cuda
   else
diff --git a/conda/Dockerfile b/conda/Dockerfile
index f4f4c834a..ce9cc8b3a 100644
--- a/conda/Dockerfile
+++ b/conda/Dockerfile
@@ -43,26 +43,32 @@ RUN bash ./install_conda.sh && rm install_conda.sh
 FROM base as cuda
 RUN rm -rf /usr/local/cuda-*
 ADD ./common/install_cuda.sh install_cuda.sh
+ENV GPU_ARCH_TYPE=cuda
 
 FROM cuda as cuda10.2
 RUN bash ./install_cuda.sh 10.2
 ENV DESIRED_CUDA=10.2
+ENV GPU_ARCH_TYPE=10.2
 
 FROM cuda as cuda11.3
 RUN bash ./install_cuda.sh 11.3
 ENV DESIRED_CUDA=11.3
+ENV GPU_ARCH_TYPE=11.3
 
 FROM cuda as cuda11.5
 RUN bash ./install_cuda.sh 11.5
 ENV DESIRED_CUDA=11.5
+ENV GPU_ARCH_TYPE=11.5
 
 FROM cuda as cuda11.6
 RUN bash ./install_cuda.sh 11.6
 ENV DESIRED_CUDA=11.6
+ENV GPU_ARCH_TYPE=11.6
 
 FROM cuda as cuda11.7
 RUN bash ./install_cuda.sh 11.7
 ENV DESIRED_CUDA=11.7
+ENV GPU_ARCH_TYPE=11.7
 
 # Install MNIST test data
 FROM base as mnist
diff --git a/conda/README.md b/conda/README.md
index 4c1b719f6..4044882ae 100644
--- a/conda/README.md
+++ b/conda/README.md
@@ -20,7 +20,6 @@ docker push pytorch/conda-builder
 # building pytorch
 docker run --rm -it \
     -e PACKAGE_TYPE=conda \
-    -e DESIRED_CUDA=cu92 \
     -e DESIRED_PYTHON=3.8 \
     -e PYTORCH_BUILD_VERSION=1.5.0 \
     -e PYTORCH_BUILD_NUMBER=1 \
diff --git a/conda/build.sh b/conda/build.sh
index ab6176884..0b4e55c14 100755
--- a/conda/build.sh
+++ b/conda/build.sh
@@ -5,4 +5,4 @@
 # TODO: Remove this once we fully move binary builds on master to GHA
 
 SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-DESIRED_CUDA=${DESIRED_CUDA:-cpu} bash ${SCRIPTPATH}/build_pytorch.sh
+bash ${SCRIPTPATH}/build_pytorch.sh
diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 8ac05edbd..29d5247b0 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -33,8 +33,8 @@ retry () {
 
 # Parse arguments and determmine version
 ###########################################################
-if [[ -n "$DESIRED_CUDA" && -n "$PYTORCH_BUILD_VERSION" && -n "$PYTORCH_BUILD_NUMBER" ]]; then
-    desired_cuda="$DESIRED_CUDA"
+if [[ -n "$GPU_ARCH_VERSION" && -n "$PYTORCH_BUILD_VERSION" && -n "$PYTORCH_BUILD_NUMBER" ]]; then
+    desired_cuda="$GPU_ARCH_VERSION"
     build_version="$PYTORCH_BUILD_VERSION"
     build_number="$PYTORCH_BUILD_NUMBER"
 else
@@ -49,9 +49,11 @@ else
     build_version="$2"
     build_number="$3"
 fi
-if [[ "$desired_cuda" != cpu ]]; then
-  desired_cuda="$(echo $desired_cuda | tr -d cuda. )"
+
+if [[ $desired_cuda = "" ]]; then
+    desired_cuda="cpu"
 fi
+
 echo "Building cuda version $desired_cuda and pytorch version: $build_version build_number: $build_number"
 
 if [[ "$OSTYPE" == "msys" ]]; then
diff --git a/manywheel/build_common.sh b/manywheel/build_common.sh
index 95a67cdd9..e7acb6f5d 100644
--- a/manywheel/build_common.sh
+++ b/manywheel/build_common.sh
@@ -147,7 +147,7 @@ else
     export LLVM_DIR="$USE_LLVM/lib/cmake/llvm"
 fi
 
-if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
+if [[ "$GPU_ARCH_TYPE" = "rocm" ]]; then
     echo "Calling build_amd.py at $(date)"
     python tools/amd_build/build_amd.py
 fi
@@ -326,7 +326,7 @@ for pkg in /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip;
             fi
 
             # ROCm workaround for roctracer dlopens
-            if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
+            if [[ "${GPU_ARCH_TYPE}" = "rocm" ]]; then
                 patchedpath=$(fname_without_so_number $destpath)
             else
                 patchedpath=$(fname_with_sha256 $destpath)
@@ -459,7 +459,10 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
   echo "$(date) :: Running tests"
   pushd "$PYTORCH_ROOT"
   LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
-          "${SOURCE_DIR}/../run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
+    PACAKGE_TYPE=manywheel \
+    DESIRED_PYTHON="${py_majmin}" \
+    GPU_ARCH_TYPE=${GPU_ARCH_TYPE} \
+          "${SOURCE_DIR}/../run_tests.sh"
   popd
   echo "$(date) :: Finished tests"
 fi
diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index efea1ae93..070e1e22b 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -24,37 +24,9 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
     EXTRA_CAFFE2_CMAKE_FLAGS=()
 fi
 
-# Determine CUDA version and architectures to build for
-#
-# NOTE: We should first check `DESIRED_CUDA` when determining `CUDA_VERSION`,
-# because in some cases a single Docker image can have multiple CUDA versions
-# on it, and `nvcc --version` might not show the CUDA version we want.
-if [[ -n "$DESIRED_CUDA" ]]; then
-    # If the DESIRED_CUDA already matches the format that we expect
-    if [[ ${DESIRED_CUDA} =~ ^[0-9]+\.[0-9]+$ ]]; then
-        CUDA_VERSION=${DESIRED_CUDA}
-    else
-        # cu90, cu92, cu100, cu101
-        if [[ ${#DESIRED_CUDA} -eq 4 ]]; then
-            CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}"
-        elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then
-            CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}"
-        fi
-    fi
-    echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"
 
-    # There really has to be a better way to do this - eli
-    # Possibly limiting builds to specific cuda versions be delimiting images would be a choice
-    if [[ "$OS_NAME" == *"Ubuntu"* ]]; then
-        echo "Switching to CUDA version $desired_cuda"
-        /builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}"
-    fi
-else
-    CUDA_VERSION=$(nvcc --version|grep release|cut -f5 -d" "|cut -f1 -d",")
-    echo "CUDA $CUDA_VERSION Detected"
-fi
-
-cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
+CUDA_VERSION="${GPU_ARCH_VERSION:-}"
+cuda_version_nodot=$(echo "${CUDA_VERSION}" | tr -d '.')
 
 TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0"
 case ${CUDA_VERSION} in
@@ -63,7 +35,6 @@ case ${CUDA_VERSION} in
         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
         ;;
     10.*)
-        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
         ;;
     *)
@@ -273,9 +244,6 @@ else
     exit 1
 fi
 
-# builder/test.sh requires DESIRED_CUDA to know what tests to exclude
-export DESIRED_CUDA="$cuda_version_nodot"
-
 # Switch `/usr/local/cuda` to the desired CUDA version
 rm -rf /usr/local/cuda || true
 ln -s "/usr/local/cuda-${CUDA_VERSION}" /usr/local/cuda
diff --git a/manywheel/build_libtorch.sh b/manywheel/build_libtorch.sh
index f481dfd90..d7a77eabe 100644
--- a/manywheel/build_libtorch.sh
+++ b/manywheel/build_libtorch.sh
@@ -113,7 +113,7 @@ else
     export LLVM_DIR="$USE_LLVM/lib/cmake/llvm"
 fi
 
-if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
+if [[ "$GPU_ARCH_TYPE" = "rocm" ]]; then
     echo "Calling build_amd.py at $(date)"
     python tools/amd_build/build_amd.py
     # TODO remove this work-around once pytorch sources are updated
diff --git a/manywheel/build_rocm.sh b/manywheel/build_rocm.sh
index 9b4d36348..bb70053b4 100755
--- a/manywheel/build_rocm.sh
+++ b/manywheel/build_rocm.sh
@@ -25,19 +25,8 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
 fi
 
 # Determine ROCm version and architectures to build for
-#
-# NOTE: We should first check `DESIRED_CUDA` when determining `ROCM_VERSION`
-if [[ -n "$DESIRED_CUDA" ]]; then
-    if ! echo "${DESIRED_CUDA}"| grep "^rocm" >/dev/null 2>/dev/null; then
-        export DESIRED_CUDA="rocm${DESIRED_CUDA}"
-    fi
-    # rocm3.7, rocm3.5.1
-    ROCM_VERSION="$DESIRED_CUDA"
-    echo "Using $ROCM_VERSION as determined by DESIRED_CUDA"
-else
-    echo "Must set DESIRED_CUDA"
-    exit 1
-fi
+ROCM_VERSION="$GPU_ARCH_VERSION"
+echo "Using $ROCM_VERSION as determined by GPU_ARCH_VERSION"
 
 # Package directories
 WHEELHOUSE_DIR="wheelhouse$ROCM_VERSION"
diff --git a/run_tests.sh b/run_tests.sh
index 18b00f00b..a3b597cc6 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -35,31 +35,18 @@ retry () {
 # Parameters
 ##############################################################################
 if [[ "$#" != 3 ]]; then
-  if [[ -z "${DESIRED_PYTHON:-}" || -z "${DESIRED_CUDA:-}" || -z "${PACKAGE_TYPE:-}" ]]; then
-    echo "USAGE: run_tests.sh  PACKAGE_TYPE  DESIRED_PYTHON  DESIRED_CUDA"
+  if [[ -z "${DESIRED_PYTHON:-}" || -z "${PACKAGE_TYPE:-}" || -z "${GPU_ARCH_TYPE}" ]]; then
+    echo "USAGE: run_tests.sh  PACKAGE_TYPE  DESIRED_PYTHON  GPU_ARCH_TYPE"
     echo "The env variable PACKAGE_TYPE must be set to 'conda' or 'manywheel' or 'libtorch'"
     echo "The env variable DESIRED_PYTHON must be set like '2.7mu' or '3.6m' etc"
-    echo "The env variable DESIRED_CUDA must be set like 'cpu' or 'cu80' etc"
+    echo "The env variable GPU_ARCH_TYPE must be set like 'cpu' or 'cuda' or 'rocm' etc"
     exit 1
   fi
   package_type="$PACKAGE_TYPE"
   py_ver="$DESIRED_PYTHON"
-  cuda_ver="$DESIRED_CUDA"
 else
   package_type="$1"
   py_ver="$2"
-  cuda_ver="$3"
-fi
-
-if [[ "$cuda_ver" == 'cpu-cxx11-abi' ]]; then
-    cuda_ver="cpu"
-fi
-
-# cu80, cu90, cu100, cpu
-if [[ ${#cuda_ver} -eq 4 ]]; then
-    cuda_ver_majmin="${cuda_ver:2:1}.${cuda_ver:3:1}"
-elif [[ ${#cuda_ver} -eq 5 ]]; then
-    cuda_ver_majmin="${cuda_ver:2:2}.${cuda_ver:4:1}"
 fi
 
 NUMPY_PACKAGE=""
@@ -80,7 +67,7 @@ if [[ "$package_type" == conda || "$(uname)" == Darwin ]]; then
     # overwrite the currently installed "local" pytorch package meaning you aren't actually testing
     # the right package.
     # TODO (maybe): Make the "cpu" package of pytorch depend on "cpuonly"
-    if [[ "$cuda_ver" = 'cpu' ]]; then
+    if [[ "${GPU_ARCH_TYPE}" = 'cpu' ]]; then
       # Installing cpuonly will also install dependencies as well
       retry conda install -y -c pytorch cpuonly
     else
@@ -139,7 +126,7 @@ echo "Checking that we are testing the package that is just built"
 python -c "import torch; exit(0 if torch.__version__ == '$expected_version' else 1)"
 
 # Test that CUDA builds are setup correctly
-if [[ "$cuda_ver" != 'cpu' ]]; then
+if [[ "${GPU_ARCH_TYPE}" != 'cpu' ]]; then
     # Test CUDA archs
     echo "Checking that CUDA archs are setup correctly"
     timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()'
@@ -165,573 +152,3 @@ if [[ "$(uname)" == 'Darwin' ]]; then
 fi
 
 popd
-
-# TODO re-enable the other tests after the nightlies are moved to CI. This is
-# because the binaries keep breaking, often from additional tests, that aren't
-# real problems. Once these are on circleci and a smoke-binary-build is added
-# to PRs then this should stop happening and these can be re-enabled.
-echo "Not running unit tests. Hopefully these problems are caught by CI"
-exit 0
-
-
-##############################################################################
-# Running unit tests (except not right now)
-##############################################################################
-echo "$(date) :: Starting tests for $package_type package for python$py_ver and $cuda_ver"
-
-# We keep track of exact tests to skip, as otherwise we would be hardly running
-# any tests. But b/c of issues working with pytest/normal-python-test/ and b/c
-# of special snowflake tests in test/run_test.py we also take special care of
-# those
-tests_to_skip=()
-
-#
-# Entire file exclusions
-##############################################################################
-entire_file_exclusions=("-x")
-
-# cpp_extensions doesn't work with pytest, so we exclude it from the pytest run
-# here and then manually run it later. Note that this is only because this
-# entire_fil_exclusions flag is only passed to the pytest run
-entire_file_exclusions+=("cpp_extensions")
-
-# TODO temporary line to fix next days nightlies, but should be removed when
-# issue is fixed
-entire_file_exclusions+=('type_info')
-
-if [[ "$cuda_ver" == 'cpu' ]]; then
-    # test/test_cuda.py exits early if the installed torch is not built with
-    # CUDA, but the exit doesn't work when running with pytest, so pytest will
-    # still try to run all the CUDA tests and then fail
-    entire_file_exclusions+=("cuda")
-    entire_file_exclusions+=("nccl")
-fi
-
-if [[ "$(uname)" == 'Darwin' || "$OSTYPE" == "msys" ]]; then
-    # pytest on Mac doesn't like the exits in these files
-    entire_file_exclusions+=('c10d')
-    entire_file_exclusions+=('distributed')
-
-    # pytest doesn't mind the exit but fails the tests. On Mac we run this
-    # later without pytest
-    entire_file_exclusions+=('thd_distributed')
-fi
-
-
-#
-# Universal flaky tests
-##############################################################################
-
-# RendezvousEnvTest sometimes hangs forever
-# Otherwise it will fail on CUDA with
-#   Traceback (most recent call last):
-#     File "test_c10d.py", line 179, in test_common_errors
-#       next(gen)
-#   AssertionError: ValueError not raised
-tests_to_skip+=('RendezvousEnvTest and test_common_errors')
-
-# This hung forever once on conda_3.5_cu92
-tests_to_skip+=('TestTorch and test_sum_dim')
-
-# test_trace_warn isn't actually flaky, but it doesn't work with pytest so we
-# just skip it
-tests_to_skip+=('TestJit and test_trace_warn')
-#
-# Python specific flaky tests
-##############################################################################
-
-# test_dataloader.py:721: AssertionError
-# looks like a timeout, but interestingly only appears on python 3
-if [[ "$py_ver" == 3* ]]; then
-    tests_to_skip+=('TestDataLoader and test_proper_exit')
-fi
-
-#
-# CUDA flaky tests, all package types
-##############################################################################
-if [[ "$cuda_ver" != 'cpu' ]]; then
-
-    #
-    # DistributedDataParallelTest
-    # All of these seem to fail
-    tests_to_skip+=('DistributedDataParallelTest')
-
-    #
-    # RendezvousEnvTest
-    # Traceback (most recent call last):
-    #   File "test_c10d.py", line 201, in test_nominal
-    #     store0, rank0, size0 = next(gen0)
-    #   File "/opt/python/cp36-cp36m/lib/python3.6/site-packages/torch/distributed/rendezvous.py", line 131, in _env_rendezvous_handler
-    #     store = TCPStore(master_addr, master_port, start_daemon)
-    # RuntimeError: Address already in use
-    tests_to_skip+=('RendezvousEnvTest and test_nominal')
-
-    #
-    # TestCppExtension
-    #
-    # Traceback (most recent call last):
-    #   File "test_cpp_extensions.py", line 134, in test_jit_cudnn_extension
-    #     with_cuda=True)
-    #   File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 552, in load
-    #     with_cuda)
-    #   File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 729, in _jit_compile
-    #     return _import_module_from_library(name, build_directory)
-    #   File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 867, in _import_module_from_library
-    #     return imp.load_module(module_name, file, path, description)
-    #   File "/opt/python/cp35-cp35m/lib/python3.5/imp.py", line 243, in load_module
-    #     return load_dynamic(name, filename, file)
-    #   File "/opt/python/cp35-cp35m/lib/python3.5/imp.py", line 343, in load_dynamic
-    #     return _load(spec)
-    #   File "<frozen importlib._bootstrap>", line 693, in _load
-    #   File "<frozen importlib._bootstrap>", line 666, in _load_unlocked
-    #   File "<frozen importlib._bootstrap>", line 577, in module_from_spec
-    #   File "<frozen importlib._bootstrap_external>", line 938, in create_module
-    #   File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
-    # ImportError: libcudnn.so.7: cannot open shared object file: No such file or directory
-    tests_to_skip+=('TestCppExtension and test_jit_cudnn_extension')
-
-    #
-    # TestCuda
-    #
-
-    # 3.7_cu80
-    #  RuntimeError: CUDA error: out of memory
-    tests_to_skip+=('TestCuda and test_arithmetic_large_tensor')
-
-    # 3.7_cu80
-    # RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch-nightly_1538097262541/work/aten/src/THC/THCTensorCopy.cu:205
-    tests_to_skip+=('TestCuda and test_autogpu')
-
-    #
-    # TestDistBackend
-    #
-
-    # Traceback (most recent call last):
-    #   File "test_thd_distributed.py", line 1046, in wrapper
-    #     self._join_and_reduce(fn)
-    #   File "test_thd_distributed.py", line 1108, in _join_and_reduce
-    #     self.assertEqual(p.exitcode, first_process.exitcode)
-    #   File "/pytorch/test/common.py", line 399, in assertEqual
-    #     super(TestCase, self).assertEqual(x, y, message)
-    # AssertionError: None != 77 :
-    tests_to_skip+=('TestDistBackend and test_all_gather_group')
-    tests_to_skip+=('TestDistBackend and test_all_reduce_group_max')
-    tests_to_skip+=('TestDistBackend and test_all_reduce_group_min')
-    tests_to_skip+=('TestDistBackend and test_all_reduce_group_sum')
-    tests_to_skip+=('TestDistBackend and test_all_reduce_group_product')
-    tests_to_skip+=('TestDistBackend and test_barrier_group')
-    tests_to_skip+=('TestDistBackend and test_broadcast_group')
-
-    # Traceback (most recent call last):
-    #   File "test_thd_distributed.py", line 1046, in wrapper
-    #     self._join_and_reduce(fn)
-    #   File "test_thd_distributed.py", line 1108, in _join_and_reduce
-    #     self.assertEqual(p.exitcode, first_process.exitcode)
-    #   File "/pytorch/test/common.py", line 397, in assertEqual
-    #     super(TestCase, self).assertLessEqual(abs(x - y), prec, message)
-    # AssertionError: 12 not less than or equal to 1e-05
-    tests_to_skip+=('TestDistBackend and test_barrier')
-
-    # Traceback (most recent call last):
-    #   File "test_distributed.py", line 1267, in wrapper
-    #     self._join_and_reduce(fn)
-    #   File "test_distributed.py", line 1350, in _join_and_reduce
-    #     self.assertEqual(p.exitcode, first_process.exitcode)
-    #   File "/pytorch/test/common.py", line 399, in assertEqual
-    #     super(TestCase, self).assertEqual(x, y, message)
-    # AssertionError: None != 1
-    tests_to_skip+=('TestDistBackend and test_broadcast')
-
-    # Memory leak very similar to all the conda ones below, but appears on manywheel
-    # 3.6m_cu80
-    # AssertionError: 1605632 not less than or equal to 1e-05 : __main__.TestEndToEndHybridFrontendModels.test_vae_cuda leaked 1605632 bytes CUDA memory on device 0
-    tests_to_skip+=('TestEndToEndHybridFrontendModels and test_vae_cuda')
-
-    # ________________________ TestNN.test_embedding_bag_cuda ________________________
-    #
-    # self = <test_nn.TestNN testMethod=test_embedding_bag_cuda>
-    # dtype = torch.float32
-    #
-    #     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
-    #     @repeat_test_for_types(ALL_TENSORTYPES)
-    #     @skipIfRocm
-    #     def test_embedding_bag_cuda(self, dtype=torch.float):
-    #         self._test_EmbeddingBag(True, 'sum', False, dtype)
-    #         self._test_EmbeddingBag(True, 'mean', False, dtype)
-    #         self._test_EmbeddingBag(True, 'max', False, dtype)
-    #         if dtype != torch.half:
-    #             # torch.cuda.sparse.HalfTensor is not enabled.
-    #             self._test_EmbeddingBag(True, 'sum', True, dtype)
-    # >           self._test_EmbeddingBag(True, 'mean', True, dtype)
-    #
-    # test_nn.py:2144:
-    # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
-    # test_nn.py:2062: in _test_EmbeddingBag
-    #     _test_vs_Embedding(N, D, B, L)
-    # test_nn.py:2059: in _test_vs_Embedding
-    #     self.assertEqual(es_weight_grad, e.weight.grad, needed_prec)
-    # common.py:373: in assertEqual
-    #     assertTensorsEqual(x, y)
-    # common.py:365: in assertTensorsEqual
-    #     self.assertLessEqual(max_err, prec, message)
-    # E   AssertionError: tensor(0.0000, device='cuda:0', dtype=torch.float32) not less than or equal to 2e-05 :
-    #  1 failed, 1202 passed, 19 skipped, 2 xfailed, 796 warnings in 1166.73 seconds =
-    # Traceback (most recent call last):
-    #   File "test/run_test.py", line 391, in <module>
-    #     main()
-    #   File "test/run_test.py", line 383, in main
-    #     raise RuntimeError(message)
-    tests_to_skip+=('TestNN and test_embedding_bag_cuda')
-fi
-
-
-##########################################################################
-# Conda specific flaky tests
-##########################################################################
-
-# Only on Anaconda's python 2.7
-# So, this doesn't really make sense. All the mac jobs are run on the same
-# machine, so the wheel jobs still use conda to silo their python
-# installations. The wheel job for Python 2.7 should use the exact same Python
-# from conda as the conda job for Python 2.7. Yet, this only appears on the
-# conda jobs.
-if [[ "$package_type" == 'conda' && "$py_ver" == '2.7' ]]; then
-    # Traceback (most recent call last):
-    #   File "test_jit.py", line 6281, in test_wrong_return_type
-    #     @torch.jit.script
-    #   File "/Users/administrator/nightlies/2018_09_30/wheel_build_dirs/conda_2.7/conda/envs/env_py2.7_0_20180930/lib/python2.7/site-packages/torch/jit/__init__.py", line 639, in script
-    #     graph = _jit_script_compile(ast, rcb)
-    #   File "/Users/administrator/nightlies/2018_09_30/wheel_build_dirs/conda_2.7/conda/envs/env_py2.7_0_20180930/lib/python2.7/site-packages/torch/jit/annotations.py", line 80, in get_signature
-    #     return parse_type_line(type_line)
-    #   File "/Users/administrator/nightlies/2018_09_30/wheel_build_dirs/conda_2.7/conda/envs/env_py2.7_0_20180930/lib/python2.7/site-packages/torch/jit/annotations.py", line 131, in parse_type_line
-    #     return arg_types, ann_to_type(ret_ann)
-    #   File "/Users/administrator/nightlies/2018_09_30/wheel_build_dirs/conda_2.7/conda/envs/env_py2.7_0_20180930/lib/python2.7/site-packages/torch/jit/annotations.py", line 192, in ann_to_type
-    #     return TupleType([ann_to_type(a) for a in ann.__args__])
-    # TypeError: 'TupleInstance' object is not iterable
-    tests_to_skip+=('TestScript and test_wrong_return_type')
-fi
-
-# Lots of memory leaks on CUDA
-if [[ "$package_type" == 'conda' && "$cuda_ver" != 'cpu' ]]; then
-
-    # 3.7_cu92
-    # AssertionError: 63488 not less than or equal to 1e-05 : __main__.TestEndToEndHybridFrontendModels.test_mnist_cuda leaked 63488 bytes CUDA memory on device 0
-    tests_to_skip+=('TestEndToEndHybridFrontendModels and test_mnist_cuda')
-
-    # 2.7_cu92
-    # AssertionError: __main__.TestNN.test_BatchNorm3d_momentum_eval_cuda leaked -1024 bytes CUDA memory on device 0
-    tests_to_skip+=('TestNN and test_BatchNorm3d_momentum_eval_cuda')
-
-    #
-    # All of test_BCE is flaky
-    tests_to_skip+=('TestNN and test_BCE')
-
-    # 3.5_cu80
-    # AssertionError: 3584 not less than or equal to 1e-05 : test_nn.TestNN.test_BCEWithLogitsLoss_cuda_double leaked 3584 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_BCEWithLogitsLoss_cuda_double')
-
-    # 2.7_cu92
-    # AssertionError: __main__.TestNN.test_ConvTranspose2d_cuda leaked -1024 bytes CUDA memory on device 0
-    tests_to_skip+=('TestNN and test_ConvTranspose2d_cuda')
-
-    # 3.7_cu90
-    # AssertionError: 1024 not less than or equal to 1e-05 : __main__.TestNN.test_ConvTranspose3d_cuda leaked -1024 bytes CUDA memory on device 0
-    tests_to_skip+=('TestNN and test_ConvTranspose3d_cuda')
-
-    #
-    #
-    # CTCLoss
-    # These are all flaky
-    tests_to_skip+=('TestNN and test_CTCLoss')
-
-    # 2.7_cu90
-    # 2.7_cu92
-    # 3.5_cu90 x2
-    # 3.6_cu90
-    # 3.7_cu80 x3
-    # 3.7_cu90
-    # AssertionError: 37376 not less than or equal to 1e-05 : __main__.TestNN.test_CTCLoss_1d_target_cuda_double leaked 37376 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_1d_target_cuda_double')
-
-    # 2.7_cu80 --18944
-    # 2.7_cu92
-    # 3.5_cu90 --18944 x2
-    # 3.5_cu92 --18944 x2
-    # 3.6_cu90 --18944
-    # 3.6_cu92 --18944
-    # 3.7_cu80
-    # AssertionError: 37376 not less than or equal to 1e-05 : __main__.TestNN.test_CTCLoss_1d_target_cuda_float leaked -37376 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_1d_target_cuda_float')
-
-    # 3.5_cu90
-    # 3.7_cu92
-    # AssertionError: 37376 not less than or equal to 1e-05 : __main__.TestNN.test_CTCLoss_1d_target_sum_reduction_cuda_double leaked 37376 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_1d_target_sum_reduction_cuda_double')
-
-    # 3.7_cu92
-    # AssertionError: 18432 not less than or equal to 1e-05 : __main__.TestNN.test_CTCLoss_1d_target_sum_reduction_cuda_float leaked -18432 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_1d_target_sum_reduction_cuda_float')
-
-    # 3.5_cu92 x2
-    # 3.6_cu80
-    # 3.7_cu90
-    # AssertionError: AssertionError: 37376 not less than or equal to 1e-05 : __main__.TestNN.test_CTCLoss_2d_int_target_cuda_double leaked 37376 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_2d_int_target_cuda_double')
-
-    # 3.5_cu92
-    # 3.6_cu80 --37376
-    # 3.6_cu92
-    # AssertionError: 18944 not less than or equal to 1e-05 : __main__.TestNN.test_CTCLoss_2d_int_target_cuda_float leaked 18944 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_2d_int_target_cuda_float')
-
-    # 2.7_cu90
-    # 3.5_cu80
-    # 3.7_cu80 x2
-    # AssertionError: 37376 not less than or equal to 1e-05 : __main__.TestNN.test_CTCLoss_2d_int_target_sum_reduction_cuda_double leaked 37376 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_2d_int_target_sum_reduction_cuda_double')
-
-    # 2.7_cu90
-    # 2.7_cu92 --18944
-    # AssertionError: __main__.TestNN.test_CTCLoss_2d_int_target_sum_reduction_cuda_float leaked -37376 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_2d_int_target_sum_reduction_cuda_float')
-
-    # 2.7_cu92
-    # AssertionError: __main__.TestNN.test_CTCLoss_cuda_double leaked 37376 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_cuda_double')
-
-    # 2.7_cu92
-    # AssertionError: __main__.TestNN.test_CTCLoss_cuda_float leaked 18944 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_cuda_float')
-
-    # 2.7_cu92
-    # 3.5_cu90 x2
-    # 3.5_cu92
-    # 3.5_cu92
-    # 3.6_cu80 x2
-    # AssertionError: 37376 not less than or equal to 1e-05 : __main__.TestNN.test_CTCLoss_sum_reduction_cuda_double leaked 37376 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_sum_reduction_cuda_double')
-
-    # 2.7_cu92 --18944
-    # 3.6_cu80
-    # AssertionError: 37376 not less than or equal to 1e-05 : __main__.TestNN.test_CTCLoss_sum_reduction_cuda_float leaked -37376 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_CTCLoss_sum_reduction_cuda_float')
-
-    #
-    #
-    # NLLLoss
-    # These are all flaky
-    tests_to_skip+=('TestNN and NLLLoss')
-
-    # 3.5_cu90 x2
-    # AssertionError: 3584 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_2d_cuda_double leaked 3584 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_cuda_double')
-
-    # 2.7_cu80
-    # AssertionError: __main__.TestNN.test_NLLLoss_2d_cuda_float leaked 2560 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_cuda_float')
-
-    # 2.7_cu80
-    # 2.7_cu92
-    # 3.6_cu80 x2
-    # AssertionError: 1536 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_2d_cuda_half leaked 1536 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_cuda_half')
-
-    # 2.7_cu90
-    # 3.6_cu80 x2
-    # 3.6_cu90
-    # 3.6_cu92
-    # AssertionError: 3584 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_2d_ignore_index_cuda_double leaked 3584 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_ignore_index_cuda_double')
-
-    # 3.6_cu80 x2
-    # 3.6_cu90
-    # AssertionError: 3584 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_2d_ignore_index_cuda_float leaked -3584 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_ignore_index_cuda_float')
-
-    # 3.6_cu90
-    # AssertionError: 3584 not less than or equal to 1e-05 : test_nn.TestNN.test_NLLLoss_2d_weights_cuda_double leaked 3584 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_ignore_index_cuda_half')
-
-    # 3.6_cu80
-    # AssertionError: 3584 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_2d_sum_reduction_cuda_double leaked 3584 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_sum_reduction_cuda_double')
-
-    # 3.6_cu80
-    # AssertionError: 2560 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_2d_sum_reduction_cuda_float leaked 2560 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_sum_reduction_cuda_float')
-
-    # 3.7_cu92
-    # AssertionError: 1536 not less than or equal to 1e-05 : test_nn.TestNN.test_NLLLoss_2d_weights_cuda_half leaked 1536 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_weights_cuda_half')
-
-    # 3.6_cu80
-    # AssertionError: 1536 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_2d_sum_reduction_cuda_half leaked 1536 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_sum_reduction_cuda_half')
-
-    # 2.7_cu92
-    # AssertionError: __main__.TestNN.test_NLLLoss_2d_weights_cuda_float leaked 2560 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_2d_weights_cuda_float')
-
-    # 3.5_cu80 x2
-    # 3.6_cu90
-    # AssertionError: 1536 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_dim_is_3_cuda_double leaked 1536 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_dim_is_3_cuda_double')
-
-    # 3.6_cu80
-    # AssertionError: 1536 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_dim_is_3_sum_reduction_cuda_float leaked 1536 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_dim_is_3_sum_reduction_cuda_float')
-
-    # 3.6_cu80
-    # 3.7_cu80 x2
-    # AssertionError: 1536 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_dim_is_3_sum_reduction_cuda_half leaked 1536 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_dim_is_3_sum_reduction_cuda_half')
-
-    # 3.5_cu80
-    # 3.7_cu80 x2
-    # AssertionError: 10752 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_higher_dim_cuda_double leaked 10752 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_higher_dim_cuda_double')
-
-    # 3.5_cu80
-    # 3.7_cu80 --10752 x2
-    # AssertionError: 5120 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_higher_dim_cuda_float leaked -5120 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_higher_dim_cuda_float')
-
-    # 3.5_cu80
-    # 3.5 cu90
-    # AssertionError: 3584 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_higher_dim_cuda_half leaked 3584 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_higher_dim_cuda_half')
-
-    # 3.5_cu90
-    # AssertionError: 10752 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_higher_dim_sum_reduction_cuda_double leaked 10752 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_higher_dim_sum_reduction_cuda_double')
-
-    # 3.5_cu90
-    # AssertionError: 5120 not less than or equal to 1e-05 : __main__.TestNN.test_NLLLoss_higher_dim_sum_reduction_cuda_float leaked -5120 bytes CUDA memory on device 0
-    #tests_to_skip+=('TestNN and test_NLLLoss_higher_dim_sum_reduction_cuda_float')
-
-    # ______________________ TestNN.test_variable_sequence_cuda ______________________
-    # common_utils.py:277: in wrapper
-    #     method(*args, **kwargs)
-    # common_utils.py:241: in __exit__
-    #     self.name, after - before, i))
-    # common_utils.py:399: in assertEqual
-    #     super(TestCase, self).assertLessEqual(abs(x - y), prec, message)
-    # E   AssertionError: 1024 not less than or equal to 1e-05 : test_nn.TestNN.test_variable_sequence_cuda leaked 1024 bytes CUDA memory on device 0
-    tests_to_skip+=('TestNN and test_variable_sequence_cuda')
-
-    # 3.7_cu90
-    # AssertionError: 1024 not less than or equal to 1e-05 : __main__.TestJit.test_fuse_last_device_cuda leaked 1024 bytes CUDA memory on device 1
-    tests_to_skip+=('TestJit and test_fuse_last_device_cuda')
-
-    # 3.7_cu92 x2
-    # AssertionError: 1024 not less than or equal to 1e-05 : __main__.TestJit.test_ge_cuda leaked 1024 bytes CUDA memory on device 0
-    tests_to_skip+=('TestJit and test_ge_cuda')
-
-    # 3.5_cu90
-    # AssertionError: 1024 not less than or equal to 1e-05 : test_jit.TestJit.test_comparison_ge_le_cuda leaked -1024 bytes CUDA memory on device 0
-    tests_to_skip+=('TestJit and test_comparison_ge_le_cuda')
-
-    # 3.6_cu92
-    # 3.7_cu92
-    # AssertionError: 1024 not less than or equal to 1e-05 : __main__.TestJit.test_relu_cuda leaked 1024 bytes CUDA memory on device 0
-    tests_to_skip+=('TestJit and test_relu_cuda')
-
-    # 3.7_cu92 x3
-    # AssertionError: 1024 not less than or equal to 1e-05 : __main__.TestScript.test_milstm_fusion_cuda leaked 1024 bytes CUDA memory on device 1
-    tests_to_skip+=('TestScript and test_milstm_fusion_cuda')
-fi
-
-
-##############################################################################
-# MacOS specific flaky tests
-##############################################################################
-
-if [[ "$(uname)" == 'Darwin' ]]; then
-    # TestCppExtensions by default uses a temp folder in /tmp. This doesn't
-    # work for this Mac machine cause there is only one machine and /tmp is
-    # shared. (All the linux builds are on docker so have their own /tmp).
-    tests_to_skip+=('TestCppExtension')
-fi
-
-if [[ "$(uname)" == 'Darwin' && "$package_type" == 'conda' ]]; then
-
-    #
-    # TestDistBackend
-    # Seems like either most of the Mac builds get this error or none of them
-    # do
-    #
-
-    # Traceback (most recent call last):
-    #   File "test_thd_distributed.py", line 1046, in wrapper
-    #     self._join_and_reduce(fn)
-    #   File "test_thd_distributed.py", line 1120, in _join_and_reduce
-    #     first_process.exitcode == SKIP_IF_SMALL_WORLDSIZE_EXIT_CODE
-    # AssertionError
-    tests_to_skip+=('TestDistBackend and test_reduce_group_max')
-
-    # Traceback (most recent call last):
-    #   File "test_thd_distributed.py", line 1046, in wrapper
-    #     self._join_and_reduce(fn)
-    #   File "test_thd_distributed.py", line 1132, in _join_and_reduce
-    #     self.assertEqual(first_process.exitcode, 0)
-    #   File "/Users/administrator/nightlies/2018_10_01/wheel_build_dirs/conda_2.7/pytorch/test/common.py", line 397, in assertEqual
-    #     super(TestCase, self).assertLessEqual(abs(x - y), prec, message)
-    # AssertionError: 1 not less than or equal to 1e-05
-    tests_to_skip+=('TestDistBackend and test_isend')
-    tests_to_skip+=('TestDistBackend and test_reduce_group_min')
-    tests_to_skip+=('TestDistBackend and test_reduce_max')
-    tests_to_skip+=('TestDistBackend and test_reduce_min')
-    tests_to_skip+=('TestDistBackend and test_reduce_group_max')
-    tests_to_skip+=('TestDistBackend and test_reduce_group_min')
-    tests_to_skip+=('TestDistBackend and test_reduce_max')
-    tests_to_skip+=('TestDistBackend and test_reduce_min')
-    tests_to_skip+=('TestDistBackend and test_reduce_product')
-    tests_to_skip+=('TestDistBackend and test_reduce_sum')
-    tests_to_skip+=('TestDistBackend and test_scatter')
-    tests_to_skip+=('TestDistBackend and test_send_recv')
-    tests_to_skip+=('TestDistBackend and test_send_recv_any_source')
-fi
-
-
-# Turn the set of tests to skip into an invocation that pytest understands
-excluded_tests_logic=''
-for exclusion in "${tests_to_skip[@]}"; do
-    if [[ -z "$excluded_tests_logic" ]]; then
-        # Only true for i==0
-        excluded_tests_logic="not ($exclusion)"
-    else
-        excluded_tests_logic="$excluded_tests_logic and not ($exclusion)"
-    fi
-done
-
-
-##############################################################################
-# Run the tests
-##############################################################################
-echo
-echo "$(date) :: Calling 'python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k '$excluded_tests_logic'"
-
-python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k "'" "$excluded_tests_logic" "'"
-
-echo
-echo "$(date) :: Finished 'python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k '$excluded_tests_logic'"
-
-# cpp_extensions don't work with pytest, so we run them without pytest here,
-# except there's a failure on CUDA builds (documented above), and
-# cpp_extensions doesn't work on a shared mac machine (also documented above)
-if [[ "$cuda_ver" == 'cpu' && "$(uname)" != 'Darwin' ]]; then
-    echo
-    echo "$(date) :: Calling 'python test/run_test.py -v -i cpp_extensions'"
-    python test/run_test.py -v -i cpp_extensions
-    echo
-    echo "$(date) :: Finished 'python test/run_test.py -v -i cpp_extensions'"
-fi
-
-# thd_distributed can run on Mac but not in pytest
-if [[ "$(uname)" == 'Darwin' ]]; then
-    echo
-    echo "$(date) :: Calling 'python test/run_test.py -v -i thd_distributed'"
-    python test/run_test.py -v -i thd_distributed
-    echo
-    echo "$(date) :: Finished 'python test/run_test.py -v -i thd_distributed'"
-fi
diff --git a/windows/internal/cuda_install.bat b/windows/internal/cuda_install.bat
index 2bfec067a..eb0ccac68 100644
--- a/windows/internal/cuda_install.bat
+++ b/windows/internal/cuda_install.bat
@@ -10,9 +10,7 @@ set SRC_DIR=%~dp0\..
 if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build"
 
 set /a CUDA_VER=%CUDA_VERSION%
-set CUDA_VER_MAJOR=%CUDA_VERSION:~0,-1%
-set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1%
-set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
+set /a CUDA_VERSION_STR=%CUDA_VERSION%
 set CUDNN_FOLDER="cuda"
 set CUDNN_LIB_FOLDER="lib\x64"
 
@@ -228,5 +226,4 @@ goto set_cuda_env_vars
 echo Setting up environment...
 set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%"
 set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
-set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
 set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt"