-
Notifications
You must be signed in to change notification settings - Fork 227
[aarch64] Add CUDA 12.4 build script for ARM wheel #1775
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
babd9f5
91df612
e7d419b
0f2b149
7d80355
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,103 +9,201 @@ | |
|
||
|
||
def list_dir(path: str) -> List[str]: | ||
'''' | ||
"""' | ||
Helper for getting paths for Python | ||
''' | ||
""" | ||
return check_output(["ls", "-1", path]).decode().split("\n") | ||
|
||
|
||
def build_ArmComputeLibrary() -> None: | ||
''' | ||
""" | ||
Using ArmComputeLibrary for aarch64 PyTorch | ||
''' | ||
print('Building Arm Compute Library') | ||
acl_build_flags=["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0", | ||
"arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"] | ||
acl_install_dir="/acl" | ||
acl_checkout_dir="ComputeLibrary" | ||
""" | ||
print("Building Arm Compute Library") | ||
acl_build_flags = [ | ||
"debug=0", | ||
"neon=1", | ||
"opencl=0", | ||
"os=linux", | ||
"openmp=1", | ||
"cppthreads=0", | ||
"arch=armv8a", | ||
"multi_isa=1", | ||
"fixed_format_kernels=1", | ||
"build=native", | ||
] | ||
acl_install_dir = "/acl" | ||
acl_checkout_dir = "ComputeLibrary" | ||
os.makedirs(acl_install_dir) | ||
check_call(["git", "clone", "https://github.com/ARM-software/ComputeLibrary.git", "-b", "v23.08", | ||
"--depth", "1", "--shallow-submodules"]) | ||
check_call(["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] + acl_build_flags, | ||
cwd=acl_checkout_dir) | ||
check_call( | ||
[ | ||
"git", | ||
"clone", | ||
"https://github.com/ARM-software/ComputeLibrary.git", | ||
"-b", | ||
"v23.08", | ||
"--depth", | ||
"1", | ||
"--shallow-submodules", | ||
] | ||
) | ||
check_call( | ||
["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] | ||
+ acl_build_flags, | ||
cwd=acl_checkout_dir, | ||
) | ||
for d in ["arm_compute", "include", "utils", "support", "src"]: | ||
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") | ||
|
||
|
||
def update_wheel(wheel_path) -> None: | ||
""" | ||
Update the cuda wheel libraries | ||
""" | ||
folder = os.path.dirname(wheel_path) | ||
wheelname = os.path.basename(wheel_path) | ||
os.mkdir(f"{folder}/tmp") | ||
os.system(f"unzip {wheel_path} -d {folder}/tmp") | ||
libs_to_copy = [ | ||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", | ||
"/usr/local/cuda/lib64/libcudnn.so.8", | ||
"/usr/local/cuda/lib64/libcublas.so.12", | ||
"/usr/local/cuda/lib64/libcublasLt.so.12", | ||
"/usr/local/cuda/lib64/libcudart.so.12", | ||
"/usr/local/cuda/lib64/libcufft.so.11", | ||
"/usr/local/cuda/lib64/libcusparse.so.12", | ||
"/usr/local/cuda/lib64/libcusparseLt.so.0", | ||
"/usr/local/cuda/lib64/libcusolver.so.11", | ||
"/usr/local/cuda/lib64/libcurand.so.10", | ||
"/usr/local/cuda/lib64/libnvToolsExt.so.1", | ||
"/usr/local/cuda/lib64/libnvJitLink.so.12", | ||
"/usr/local/cuda/lib64/libnvrtc.so.12", | ||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4", | ||
"/usr/local/cuda/lib64/libcudnn_adv_infer.so.8", | ||
"/usr/local/cuda/lib64/libcudnn_adv_train.so.8", | ||
"/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8", | ||
"/usr/local/cuda/lib64/libcudnn_cnn_train.so.8", | ||
"/usr/local/cuda/lib64/libcudnn_ops_infer.so.8", | ||
"/usr/local/cuda/lib64/libcudnn_ops_train.so.8", | ||
"/opt/conda/envs/aarch64_env/lib/libopenblas.so.0", | ||
"/opt/conda/envs/aarch64_env/lib/libgfortran.so.5", | ||
"/opt/conda/envs/aarch64_env/lib/libgomp.so.1", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. currently the scripts are packaging libomp.so There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm observing around 10% performance drop for eager mode inference with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please a follow-up on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @Aidyn-A , currently the aarch64 wheels are linked to My point was why to change it now? without having a strong reason. I have another PR to switch wheels from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From what I am seeing, it comes with:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I hope you are checking either torch 2.2 or nightly aarch64-linux wheel, because I am seeing, complete list:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now we have more data on |
||
"/acl/build/libarm_compute.so", | ||
"/acl/build/libarm_compute_graph.so", | ||
"/acl/build/libarm_compute_core.so", | ||
] | ||
# Copy libraries to unzipped_folder/a/lib | ||
for lib_path in libs_to_copy: | ||
lib_name = os.path.basename(lib_path) | ||
shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}") | ||
os.system( | ||
f"cd {folder}/tmp/torch/lib/; patchelf --set-rpath '$ORIGIN' {folder}/tmp/torch/lib/libtorch_cuda.so" | ||
) | ||
os.mkdir(f"{folder}/cuda_wheel") | ||
os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *") | ||
shutil.move( | ||
f"{folder}/cuda_wheel/{wheelname}", | ||
f"/dist/{wheelname}", | ||
copy_function=shutil.copy2, | ||
) | ||
os.system(f"rm -rf {folder}/tmp {folder}/dist/cuda_wheel/") | ||
|
||
|
||
def complete_wheel(folder: str) -> str: | ||
''' | ||
""" | ||
Complete wheel build and put in artifact location | ||
''' | ||
""" | ||
wheel_name = list_dir(f"/{folder}/dist")[0] | ||
|
||
if "pytorch" in folder: | ||
if "pytorch" in folder and not enable_cuda: | ||
print("Repairing Wheel with AuditWheel") | ||
check_call(["auditwheel","repair", f"dist/{wheel_name}"], cwd=folder) | ||
check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder) | ||
repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0] | ||
|
||
print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist") | ||
os.rename(f"/{folder}/wheelhouse/{repaired_wheel_name}", f"/{folder}/dist/{repaired_wheel_name}") | ||
os.rename( | ||
f"/{folder}/wheelhouse/{repaired_wheel_name}", | ||
f"/{folder}/dist/{repaired_wheel_name}", | ||
) | ||
else: | ||
repaired_wheel_name = wheel_name | ||
|
||
print(f"Copying {repaired_wheel_name} to artfacts") | ||
shutil.copy2(f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}") | ||
print(f"Copying {repaired_wheel_name} to artifacts") | ||
shutil.copy2( | ||
f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}" | ||
) | ||
|
||
return repaired_wheel_name | ||
|
||
|
||
def parse_arguments(): | ||
''' | ||
""" | ||
Parse inline arguments | ||
''' | ||
""" | ||
from argparse import ArgumentParser | ||
|
||
parser = ArgumentParser("AARCH64 wheels python CD") | ||
parser.add_argument("--debug", action="store_true") | ||
parser.add_argument("--build-only", action="store_true") | ||
parser.add_argument("--test-only", type=str) | ||
parser.add_argument("--enable-mkldnn", action="store_true") | ||
parser.add_argument("--enable-cuda", action="store_true") | ||
return parser.parse_args() | ||
|
||
|
||
if __name__ == '__main__': | ||
''' | ||
if __name__ == "__main__": | ||
""" | ||
Entry Point | ||
''' | ||
""" | ||
args = parse_arguments() | ||
enable_mkldnn = args.enable_mkldnn | ||
repo = Repository('/pytorch') | ||
enable_cuda = args.enable_cuda | ||
repo = Repository("/pytorch") | ||
branch = repo.head.name | ||
if branch == 'HEAD': | ||
branch = 'master' | ||
|
||
if branch == "HEAD": | ||
branch = "master" | ||
|
||
print('Building PyTorch wheel') | ||
print("Building PyTorch wheel") | ||
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " | ||
os.system("python setup.py clean") | ||
|
||
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") | ||
if override_package_version is not None: | ||
version = override_package_version | ||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 " | ||
elif branch in ['nightly', 'master']: | ||
build_date = check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '') | ||
version = check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2] | ||
build_vars += ( | ||
f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 " | ||
) | ||
elif branch in ["nightly", "master"]: | ||
build_date = ( | ||
check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch") | ||
.decode() | ||
.replace("-", "") | ||
) | ||
version = ( | ||
check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2] | ||
) | ||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 " | ||
elif branch.startswith(("v1.", "v2.")): | ||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " | ||
|
||
if enable_mkldnn: | ||
build_ArmComputeLibrary() | ||
print("build pytorch with mkldnn+acl backend") | ||
build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " \ | ||
"ACL_ROOT_DIR=/acl " \ | ||
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \ | ||
"ACL_INCLUDE_DIR=/acl/build " \ | ||
"ACL_LIBRARY=/acl/build " | ||
build_vars += ( | ||
"USE_MKLDNN=ON USE_MKLDNN_ACL=ON " | ||
"ACL_ROOT_DIR=/acl " | ||
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " | ||
"ACL_INCLUDE_DIR=/acl/build " | ||
"ACL_LIBRARY=/acl/build " | ||
) | ||
else: | ||
print("build pytorch without mkldnn backend") | ||
|
||
os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel") | ||
tinglvv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
pytorch_wheel_name = complete_wheel("pytorch") | ||
print(f"Build Compelete. Created {pytorch_wheel_name}..") | ||
if enable_cuda: | ||
print("Updating Cuda Dependency") | ||
filename = os.listdir("/pytorch/dist/") | ||
wheel_path = f"/pytorch/dist/{filename[0]}" | ||
update_wheel(wheel_path) | ||
pytorch_wheel_name = complete_wheel("/pytorch/") | ||
print(f"Build Complete. Created {pytorch_wheel_name}..") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
#!/bin/bash | ||
|
||
set -ex | ||
|
||
function install_cusparselt_052 { | ||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||
mkdir tmp_cusparselt && pushd tmp_cusparselt | ||
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz | ||
tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz | ||
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/ | ||
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/ | ||
popd | ||
rm -rf tmp_cusparselt | ||
} | ||
|
||
function install_124 { | ||
echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2" | ||
rm -rf /usr/local/cuda-12.4 /usr/local/cuda | ||
# install CUDA 12.4.0 in the same container | ||
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux_sbsa.run | ||
chmod +x cuda_12.4.0_550.54.14_linux_sbsa.run | ||
./cuda_12.4.0_550.54.14_linux_sbsa.run --toolkit --silent | ||
rm -f cuda_12.4.0_550.54.14_linux_sbsa.run | ||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda | ||
|
||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||
mkdir tmp_cudnn && cd tmp_cudnn | ||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz | ||
tar xf cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz | ||
cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/ | ||
cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||
cd .. | ||
rm -rf tmp_cudnn | ||
|
||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||
git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git | ||
cd nccl && make -j src.build | ||
cp -a build/include/* /usr/local/cuda/include/ | ||
cp -a build/lib/* /usr/local/cuda/lib64/ | ||
cd .. | ||
rm -rf nccl | ||
|
||
install_cusparselt_052 | ||
|
||
ldconfig | ||
} | ||
|
||
function prune_124 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does NVPrune do? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @nWEIdia it removes GPU architectures from libraries, which are never used to lower the binary size. This workflow can be dangerous if libraries depend on heuristics and can select kernels from the same GPU family (we've seen issues before where sm_61 was dropped causing all kinds of issues on GTX cards). |
||
echo "Pruning CUDA 12.4" | ||
##################################################################################### | ||
# CUDA 12.4 prune static libs | ||
##################################################################################### | ||
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune" | ||
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64" | ||
|
||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||
|
||
if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||
export GENCODE=$OVERRIDE_GENCODE | ||
fi | ||
|
||
# all CUDA libs except CuDNN and CuBLAS | ||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ | ||
| xargs -I {} bash -c \ | ||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||
|
||
# prune CuDNN and CuBLAS | ||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||
|
||
##################################################################################### | ||
# CUDA 12.1 prune visual tools | ||
##################################################################################### | ||
export CUDA_BASE="/usr/local/cuda-12.4/" | ||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ | ||
} | ||
|
||
# idiomatic parameter and option handling in sh | ||
while test $# -gt 0 | ||
do | ||
case "$1" in | ||
12.4) install_124; prune_124 | ||
;; | ||
*) echo "bad argument $1"; exit 1 | ||
;; | ||
esac | ||
shift | ||
done |
Uh oh!
There was an error while loading. Please reload this page.