-
Notifications
You must be signed in to change notification settings - Fork 74.8k
Description
Context: for DeepSpeech, we perform tensorflow builds and then keep the cache in a tar (capturing the whole of the home directory of the build user). We then untar it and the deepspeech build through bazel build
picks the proper cached items so it does not rebuild anything.
Recently, we started to have increased (2.5x) build time on CUDA-enabled builds. Debugging with Bazel showed that it was rebuilding because the actionKey
computed for stream_executor_impl
was different. Instrumenting Bazel to get more informations, I could get down to the reason of the different actionKey: the ordering of the CUDA includes was different. The list itself contained the exact same content, just a different ordering.
Those includes are symlinks, and they are generated from a genrule. This is all taken care of by
tensorflow/third_party/gpus/cuda_configure.bzl
Lines 915 to 1035 in ba64f53
def _create_local_cuda_repository(repository_ctx): | |
"""Creates the repository containing files set up to build with CUDA.""" | |
cuda_config = _get_cuda_config(repository_ctx) | |
cudnn_header_dir = _find_cudnn_header_dir(repository_ctx, | |
cuda_config.cudnn_install_basedir) | |
# Set up symbolic links for the cuda toolkit by creating genrules to do | |
# symlinking. We create one genrule for each directory we want to track under | |
# cuda_toolkit_path | |
cuda_toolkit_path = cuda_config.cuda_toolkit_path | |
cuda_include_path = cuda_toolkit_path + "/include" | |
genrules = [symlink_genrule_for_dir(repository_ctx, | |
cuda_include_path, "cuda/include", "cuda-include")] | |
genrules.append(symlink_genrule_for_dir(repository_ctx, | |
cuda_toolkit_path + "/nvvm", "cuda/nvvm", "cuda-nvvm")) | |
genrules.append(symlink_genrule_for_dir(repository_ctx, | |
cuda_toolkit_path + "/extras/CUPTI/include", | |
"cuda/extras/CUPTI/include", "cuda-extras")) | |
cuda_libs = _find_libs(repository_ctx, cuda_config) | |
cuda_lib_src = [] | |
cuda_lib_dest = [] | |
for lib in cuda_libs.values(): | |
cuda_lib_src.append(lib.path) | |
cuda_lib_dest.append("cuda/lib/" + lib.file_name) | |
genrules.append(symlink_genrule_for_dir(repository_ctx, None, "", "cuda-lib", | |
cuda_lib_src, cuda_lib_dest)) | |
# Set up the symbolic links for cudnn if cndnn was not installed to | |
# CUDA_TOOLKIT_PATH. | |
included_files = _read_dir(repository_ctx, cuda_include_path).replace( | |
cuda_include_path, '').splitlines() | |
if '/cudnn.h' not in included_files: | |
genrules.append(symlink_genrule_for_dir(repository_ctx, None, | |
"cuda/include/", "cudnn-include", [cudnn_header_dir + "/cudnn.h"], | |
["cudnn.h"])) | |
else: | |
genrules.append( | |
'filegroup(\n' + | |
' name = "cudnn-include",\n' + | |
' srcs = [],\n' + | |
')\n' | |
) | |
# Set up BUILD file for cuda/ | |
_tpl(repository_ctx, "cuda:build_defs.bzl", | |
{ | |
"%{cuda_is_configured}": "True", | |
"%{cuda_extra_copts}": _compute_cuda_extra_copts( | |
repository_ctx, cuda_config.compute_capabilities), | |
}) | |
_tpl(repository_ctx, "cuda:BUILD", | |
{ | |
"%{cuda_driver_lib}": cuda_libs["cuda"].file_name, | |
"%{cudart_static_lib}": cuda_libs["cudart_static"].file_name, | |
"%{cudart_static_linkopt}": _cudart_static_linkopt( | |
cuda_config.cpu_value), | |
"%{cudart_lib}": cuda_libs["cudart"].file_name, | |
"%{cublas_lib}": cuda_libs["cublas"].file_name, | |
"%{cusolver_lib}": cuda_libs["cusolver"].file_name, | |
"%{cudnn_lib}": cuda_libs["cudnn"].file_name, | |
"%{cufft_lib}": cuda_libs["cufft"].file_name, | |
"%{curand_lib}": cuda_libs["curand"].file_name, | |
"%{cupti_lib}": cuda_libs["cupti"].file_name, | |
"%{cuda_include_genrules}": "\n".join(genrules), | |
"%{cuda_headers}": ('":cuda-include",\n' + | |
' ":cudnn-include",') | |
}) | |
is_cuda_clang = _use_cuda_clang(repository_ctx) | |
should_download_clang = is_cuda_clang and _flag_enabled( | |
repository_ctx, _TF_DOWNLOAD_CLANG) | |
if should_download_clang: | |
download_clang(repository_ctx, "crosstool/extra_tools") | |
# Set up crosstool/ | |
cc = find_cc(repository_ctx) | |
cc_fullpath = cc if not should_download_clang else "crosstool/" + cc | |
host_compiler_includes = _host_compiler_includes(repository_ctx, cc_fullpath) | |
cuda_defines = { | |
"%{cuda_include_path}": _cuda_include_path(repository_ctx, | |
cuda_config), | |
"%{host_compiler_includes}": host_compiler_includes, | |
} | |
if is_cuda_clang: | |
cuda_defines["%{clang_path}"] = cc | |
_tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty"}) | |
_tpl(repository_ctx, "crosstool:CROSSTOOL_clang", cuda_defines, out="crosstool/CROSSTOOL") | |
repository_ctx.file("crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "") | |
else: | |
nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" % | |
(cuda_config.cuda_toolkit_path, | |
".exe" if cuda_config.cpu_value == "Windows" else ""))) | |
_tpl(repository_ctx, "crosstool:BUILD", | |
{"%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc"}) | |
_tpl(repository_ctx, "crosstool:CROSSTOOL_nvcc", cuda_defines, out="crosstool/CROSSTOOL") | |
_tpl(repository_ctx, | |
"crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc", | |
{ | |
"%{cpu_compiler}": str(cc), | |
"%{cuda_version}": cuda_config.cuda_version, | |
"%{nvcc_path}": nvcc_path, | |
"%{gcc_host_compiler_path}": str(cc), | |
"%{cuda_compute_capabilities}": ", ".join( | |
["\"%s\"" % c for c in cuda_config.compute_capabilities]), | |
}) | |
# Set up cuda_config.h, which is used by | |
# tensorflow/stream_executor/dso_loader.cc. | |
_tpl(repository_ctx, "cuda:cuda_config.h", | |
{ | |
"%{cuda_version}": cuda_config.cuda_version, | |
"%{cudnn_version}": cuda_config.cudnn_version, | |
"%{cuda_compute_capabilities}": ",".join( | |
["CudaVersion(\"%s\")" % c | |
for c in cuda_config.compute_capabilities]), | |
"%{cuda_toolkit_path}": cuda_config.cuda_toolkit_path, | |
}, "cuda/cuda/cuda_config.h") |
Checking more carefully, one will see that the headers are discovered by _read_dir
function:
tensorflow/third_party/gpus/cuda_configure.bzl
Lines 891 to 894 in ba64f53
find_result = _execute( | |
repository_ctx, ["find", src_dir, "-follow", "-type", "f"], | |
empty_stdout_fine=True) | |
result = find_result.stdout |
find
. This is dependant on the ordering provided by readdir
syscall.
In our case, the ordering on the filesystem before making the tar archive, and after untarring it would be different.
One simple fix for that is to force ordering the list of headers, this way we are sure the order is always the same and we are not dependant on what readdir
is going to get us.
In the past, Bazel would force the ordering of the elements considered to compute the actionKey. This was removed with 0.3.0 but it might have make the issue hidden bazelbuild/bazel@9dc3211