Skip to content

Extending timeout for libtorch job #211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion cron/build_multiple.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@ for config in "${all_configs[@]}"; do
build_script="${NIGHTLIES_BUILDER_ROOT}/cron/build_docker.sh"
fi

# Swap timeout out for libtorch
if [[ "$package_type" == libtorch ]]; then
_timeout="$PYTORCH_NIGHTLIES_LIBTORCH_TIMEOUT"
else
_timeout="$PYTORCH_NIGHTLIES_TIMEOUT"
fi

set +x
echo
echo "##############################"
Expand All @@ -124,7 +131,7 @@ for config in "${all_configs[@]}"; do
DESIRED_PYTHON="$py_ver" \
DESIRED_CUDA="$cuda_ver" \
ON_SUCCESS_WRITE_ME="$succeeded_log_loc" \
$PORTABLE_TIMEOUT "$PYTORCH_NIGHTLIES_TIMEOUT" \
$PORTABLE_TIMEOUT "$_timeout" \
"$build_script" > "$log_name" 2>&1
ret="$?"
duration="$SECONDS"
Expand Down
39 changes: 29 additions & 10 deletions cron/nightly_defaults.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ mkdir -p "$today" || true

# List of people to email when things go wrong. This is passed directly to
# `mail -t`
export NIGHTLIES_EMAIL_LIST='[email protected]'
if [[ -z "$NIGHTLIES_EMAIL_LIST" ]]; then
export NIGHTLIES_EMAIL_LIST='[email protected]'
fi

# PYTORCH_CREDENTIALS_FILE
# A bash file that exports credentials needed to upload to aws and anaconda.
Expand All @@ -79,15 +81,19 @@ fi
# Location of the temporary miniconda that is downloaded to install conda-build
# and aws to upload finished packages TODO this is messy to install this in
# upload.sh and later use it in upload_logs.sh
CONDA_UPLOADER_INSTALLATION="${today}/miniconda"
if [[ -z "$CONDA_UPLOADER_INSTALLATION" ]]; then
export CONDA_UPLOADER_INSTALLATION="${today}/miniconda"
fi

# N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that
# is the script that actually clones the builder repo that /this/ script is
# running from.
export NIGHTLIES_BUILDER_ROOT="$(cd $(dirname $0)/.. && pwd)"

# The shared pytorch repo to be used by all builds
export NIGHTLIES_PYTORCH_ROOT="${today}/pytorch"
if [[ -z "$NIGHTLIES_PYTORCH_ROOT" ]]; then
export NIGHTLIES_PYTORCH_ROOT="${today}/pytorch"
fi

# PYTORCH_REPO
# The Github org/user whose fork of Pytorch to check out (git clone
Expand Down Expand Up @@ -190,15 +196,23 @@ nightlies_package_folder () {
# should be empty. Logs are written out to RUNNING_LOG_DIR. When a build
# fails, it's log should be moved to FAILED_LOG_DIR, and similarily for
# succeeded builds.
export RUNNING_LOG_DIR="${today}/logs"
export FAILED_LOG_DIR="${today}/logs/failed"
export SUCCEEDED_LOG_DIR="${today}/logs/succeeded"
if [[ -z "$RUNNING_LOG_DIR" ]]; then
export RUNNING_LOG_DIR="${today}/logs"
fi
if [[ -z "$FAILED_LOG_DIR" ]]; then
export FAILED_LOG_DIR="${today}/logs/failed"
fi
if [[ -z "$SUCCEEDED_LOG_DIR" ]]; then
export SUCCEEDED_LOG_DIR="${today}/logs/succeeded"
fi

# Log s3 directory, must not end in a /
if [[ "$(uname)" == 'Darwin' ]]; then
export LOGS_S3_DIR="nightly_logs/macos/$NIGHTLIES_DATE"
else
export LOGS_S3_DIR="nightly_logs/linux/$NIGHTLIES_DATE"
if [[ -z "$LOGS_S3_DIR" ]]; then
if [[ "$(uname)" == 'Darwin' ]]; then
export LOGS_S3_DIR="nightly_logs/macos/$NIGHTLIES_DATE"
else
export LOGS_S3_DIR="nightly_logs/linux/$NIGHTLIES_DATE"
fi
fi
# The location of the binary_sizes dir in s3 is hardcoded into
# upload_binary_sizes.sh
Expand Down Expand Up @@ -236,6 +250,11 @@ if [[ -z "$PYTORCH_NIGHTLIES_TIMEOUT" ]]; then
export PYTORCH_NIGHTLIES_TIMEOUT=4800
fi
fi
if [[ -z "$PYTORCH_NIGHTLIES_LIBTORCH_TIMEOUT" ]]; then
# The libtorch job actually runs for several cpu/cuda versions in sequence
# and so takes a long time
export PYTORCH_NIGHTLIES_LIBTORCH_TIMEOUT=10800
fi

# PORTABLE_TIMEOUT
# Command/executable of some timeout command. Defined here because the path
Expand Down