diff --git a/.github/workflows/userbenchmark-a100.yml b/.github/workflows/userbenchmark-a100.yml index da542ec605..9b7ac76e91 100644 --- a/.github/workflows/userbenchmark-a100.yml +++ b/.github/workflows/userbenchmark-a100.yml @@ -21,52 +21,47 @@ jobs: TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - SETUP_SCRIPT: "/workspace/setup_instance.sh" steps: - name: Checkout TorchBench uses: actions/checkout@v3 - with: - path: benchmark - - name: Clone and setup conda env + - name: Install Conda run: | - CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" - conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}" + bash ./.ci/torchbench/install-conda.sh - name: Install TorchBench run: | - set -x - . "${SETUP_SCRIPT}" - pushd benchmark - python install.py + bash ./.ci/torchbench/install.sh - name: Run user benchmark run: | set -x - . "${SETUP_SCRIPT}" + . ${HOME}/miniconda3/etc/profile.d/conda.sh + conda activate "${CONDA_ENV}" + # remove old results if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi - pushd benchmark + if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi MANUAL_WORKFLOW="${{ github.event.inputs.userbenchmark_name }}" if [ -z "${MANUAL_WORKFLOW}" ]; then # Figure out what userbenchmarks we should run, and run it python ./.github/scripts/userbenchmark/schedule-benchmarks.py --platform ${PLATFORM_NAME} if [ -d ./.userbenchmark ]; then - cp -r ./.userbenchmark ../benchmark-output + cp -r ./.userbenchmark benchmark-output else - mkdir ../benchmark-output + mkdir benchmark-output fi else python run_benchmark.py "${{ github.event.inputs.userbenchmark_name }}" ${{ github.event.inputs.userbenchmark_options }} - cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" ../benchmark-output + cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" benchmark-output + ls -las benchmark-output + pwd fi - name: Upload artifact uses: actions/upload-artifact@v4 with: name: TorchBench result - path: benchmark-output/ + path: benchmark-output - name: Upload result jsons to Scribe and S3 run: | - . "${SETUP_SCRIPT}" - pushd benchmark RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 | sort -r)) echo "Uploading result jsons: ${RESULTS}" for r in ${RESULTS[@]}; do @@ -76,6 +71,5 @@ jobs: - name: Clean up Conda env if: always() run: | - . "${SETUP_SCRIPT}" - conda deactivate && conda deactivate + . ${HOME}/miniconda3/etc/profile.d/conda.sh conda remove -n "${CONDA_ENV}" --all diff --git a/userbenchmark/release-test/configs/2.6.0.yaml b/userbenchmark/release-test/configs/2.6.0.yaml new file mode 100644 index 0000000000..6c19df6535 --- /dev/null +++ b/userbenchmark/release-test/configs/2.6.0.yaml @@ -0,0 +1,8 @@ +cuda: + - version: 12.4 + magma_version: magma-cuda124 +pytorch: + - version: 2.5.1 + conda_channel: pytorch + - version: 2.6.0 + conda_channel: pytorch-test diff --git a/userbenchmark/release-test/run_release_test.sh b/userbenchmark/release-test/run_release_test.sh index 6272a45249..6bed451d63 100644 --- a/userbenchmark/release-test/run_release_test.sh +++ b/userbenchmark/release-test/run_release_test.sh @@ -18,6 +18,7 @@ fi nvcc --version +sudo apt update sudo apt-get install bc sudo apt-get install --reinstall time which time diff --git a/userbenchmark/release-test/version.txt b/userbenchmark/release-test/version.txt index 437459cd94..e70b4523ae 100644 --- a/userbenchmark/release-test/version.txt +++ b/userbenchmark/release-test/version.txt @@ -1 +1 @@ -2.5.0 +2.6.0