From eac42ab1218440a5754b1bda08f4320dc06f1026 Mon Sep 17 00:00:00 2001 From: Roman Shraga Date: Wed, 18 Jan 2023 10:15:07 -0500 Subject: [PATCH 1/8] added gpu test --- .github/workflows/test-linux-gpu.yml | 72 ++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 .github/workflows/test-linux-gpu.yml diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml new file mode 100644 index 0000000000..118a885f23 --- /dev/null +++ b/.github/workflows/test-linux-gpu.yml @@ -0,0 +1,72 @@ +name: Unit-tests on Linux GPU + +on: + pull_request: + push: + branches: + - nightly + - main + - release/* + workflow_dispatch: + +env: + CHANNEL: "nightly" + +jobs: + tests: + strategy: + matrix: + python_version: ["3.8"] + cuda_arch_version: ["11.6"] + fail-fast: false + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: + runner: linux.g5.4xlarge.nvidia.gpu + repository: pytorch/text + gpu-arch-type: cuda + gpu-arch-version: ${{ matrix.cuda_arch_version }} + timeout: 120 + script: | + # Mark Build Directory Safe + git config --global --add safe.directory /__w/text/text + + # Set up Environment Variables + export PYTHON_VERSION="${{ matrix.python_version }}" + export VERSION="${{ matrix.cuda_arch_version }}" + export CUDATOOLKIT="pytorch-cuda=${VERSION}" + + # Set CHANNEL + if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then + export CHANNEL=test + else + export CHANNEL=nightly + fi + + # Create Conda Env + conda create -yp ci_env python="${PYTHON_VERSION}" + conda activate /work/ci_env + python3 -m pip --quiet install cmake>=3.18.0 ninja + conda env update --file ".circleci/unittest/linux/scripts/environment.yml" --prune + + # TorchText-specific Setup + printf "* Downloading SpaCy English models\n" + python -m spacy download en_core_web_sm + printf "* Downloading SpaCy German models\n" + python -m spacy download de_core_news_sm + + # Install PyTorch, Torchvision, and TorchData + set -ex + conda install \ + --yes \ + -c "pytorch-${CHANNEL}" \ + -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ + "${CUDATOOLKIT}" + printf "Installing torchdata nightly\n" + python3 -m pip install --pre torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu + python3 setup.py develop + python3 -m pip install parameterized + + # Run Tests + python3 -m torch.utils.collect_env + cd test + python3 -m pytest --cov=torchtext --junitxml=test-results/junit.xml -v --durations 20 torchtext_unittest From 7190d15de707b39e204f54217111e9ed0fb45788 Mon Sep 17 00:00:00 2001 From: Roman Shraga Date: Thu, 19 Jan 2023 13:49:07 -0500 Subject: [PATCH 2/8] separated model tests into cpu and gpu specific versions --- test/torchtext_unittest/common/case_utils.py | 24 +++++++++++++ .../models/models_cpu_test.py | 9 +++++ .../models/models_gpu_test.py | 11 ++++++ .../{test_models.py => models_test_impl.py} | 34 +++++++++++++------ 4 files changed, 67 insertions(+), 11 deletions(-) create mode 100644 test/torchtext_unittest/models/models_cpu_test.py create mode 100644 test/torchtext_unittest/models/models_gpu_test.py rename test/torchtext_unittest/models/{test_models.py => models_test_impl.py} (87%) diff --git a/test/torchtext_unittest/common/case_utils.py b/test/torchtext_unittest/common/case_utils.py index 9ed9a1ce62..aae9ee7b0b 100644 --- a/test/torchtext_unittest/common/case_utils.py +++ b/test/torchtext_unittest/common/case_utils.py @@ -4,6 +4,7 @@ import unittest from itertools import zip_longest +import torch from torchtext._internal.module_utils import is_module_available @@ -37,11 +38,34 @@ def get_temp_path(self, *paths): return path +class TestBaseMixin: + """Mixin to provide consistent way to define device/dtype/backend aware TestCase""" + + dtype = None + device = None + + def setUp(self): + super().setUp() + torch.random.manual_seed(2434) + + @property + def complex_dtype(self): + if self.dtype in ["float32", "float", torch.float, torch.float32]: + return torch.cfloat + if self.dtype in ["float64", "double", torch.double, torch.float64]: + return torch.cdouble + raise ValueError(f"No corresponding complex dtype for {self.dtype}") + + def skipIfNoModule(module, display_name=None): display_name = display_name or module return unittest.skipIf(not is_module_available(module), f'"{display_name}" is not available') +def skipIfNoCuda(module): + return unittest.skipIf(not torch.cuda.is_available(), "CUDA is not available.") + + def zip_equal(*iterables): """With the regular Python `zip` function, if one iterable is longer than the other, the remainder portions are ignored.This is resolved in Python 3.10 where we can use diff --git a/test/torchtext_unittest/models/models_cpu_test.py b/test/torchtext_unittest/models/models_cpu_test.py new file mode 100644 index 0000000000..cb9b5c3e17 --- /dev/null +++ b/test/torchtext_unittest/models/models_cpu_test.py @@ -0,0 +1,9 @@ +import torch + +from ..common.torchtext_test_case import TorchtextTestCase +from .models_test_impl import BaseTestModels + + +class TestModels32CPUTest(BaseTestModels, TorchtextTestCase): + dtype = torch.float32 + device = torch.device("cpu") diff --git a/test/torchtext_unittest/models/models_gpu_test.py b/test/torchtext_unittest/models/models_gpu_test.py new file mode 100644 index 0000000000..43352a02a8 --- /dev/null +++ b/test/torchtext_unittest/models/models_gpu_test.py @@ -0,0 +1,11 @@ +import torch + +from ..common.case_utils import skipIfNoCuda +from ..common.torchtext_test_case import TorchtextTestCase +from .models_test_impl import BaseTestModels + + +@skipIfNoCuda +class TestModels32GPUTest(BaseTestModels, TorchtextTestCase): + dtype = torch.float32 + device = torch.device("cuda") diff --git a/test/torchtext_unittest/models/test_models.py b/test/torchtext_unittest/models/models_test_impl.py similarity index 87% rename from test/torchtext_unittest/models/test_models.py rename to test/torchtext_unittest/models/models_test_impl.py index 0e92f6b631..43c862122c 100644 --- a/test/torchtext_unittest/models/test_models.py +++ b/test/torchtext_unittest/models/models_test_impl.py @@ -2,15 +2,27 @@ from unittest.mock import patch import torch -import torchtext from torch.nn import functional as torch_F -from ..common.torchtext_test_case import TorchtextTestCase +from ..common.case_utils import TestBaseMixin -class TestModels(TorchtextTestCase): +class BaseTestModels(TestBaseMixin): + def get_model(self, encoder_conf, head=None, freeze_encoder=False, checkpoint=None, override_checkpoint_head=False): + from torchtext.models import RobertaBundle + + model = RobertaBundle.build_model( + encoder_conf=encoder_conf, + head=head, + freeze_encoder=freeze_encoder, + checkpoint=checkpoint, + override_checkpoint_head=override_checkpoint_head, + ) + model.to(device=self.device, dtype=self.dtype) + return model + def test_roberta_bundler_build_model(self) -> None: - from torchtext.models import RobertaClassificationHead, RobertaEncoderConf, RobertaModel, RobertaBundle + from torchtext.models import RobertaClassificationHead, RobertaEncoderConf, RobertaModel dummy_encoder_conf = RobertaEncoderConf( vocab_size=10, embedding_dim=16, ffn_dimension=64, num_attention_heads=2, num_encoder_layers=2 @@ -18,14 +30,14 @@ def test_roberta_bundler_build_model(self) -> None: # case: user provide encoder checkpoint state dict dummy_encoder = RobertaModel(dummy_encoder_conf) - model = RobertaBundle.build_model(encoder_conf=dummy_encoder_conf, checkpoint=dummy_encoder.state_dict()) + model = self.get_model(encoder_conf=dummy_encoder_conf, checkpoint=dummy_encoder.state_dict()) self.assertEqual(model.state_dict(), dummy_encoder.state_dict()) # case: user provide classifier checkpoint state dict when head is given and override_head is False (by default) dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) another_dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) dummy_classifier = RobertaModel(dummy_encoder_conf, dummy_classifier_head) - model = RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=another_dummy_classifier_head, checkpoint=dummy_classifier.state_dict(), @@ -34,7 +46,7 @@ def test_roberta_bundler_build_model(self) -> None: # case: user provide classifier checkpoint state dict when head is given and override_head is set True another_dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) - model = RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=another_dummy_classifier_head, checkpoint=dummy_classifier.state_dict(), @@ -48,13 +60,13 @@ def test_roberta_bundler_build_model(self) -> None: encoder_state_dict = {} for k, v in dummy_classifier.encoder.state_dict().items(): encoder_state_dict["encoder." + k] = v - model = torchtext.models.RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=dummy_classifier_head, checkpoint=encoder_state_dict ) self.assertEqual(model.state_dict(), dummy_classifier.state_dict()) def test_roberta_bundler_train(self) -> None: - from torchtext.models import RobertaClassificationHead, RobertaEncoderConf, RobertaModel, RobertaBundle + from torchtext.models import RobertaClassificationHead, RobertaEncoderConf, RobertaModel dummy_encoder_conf = RobertaEncoderConf( vocab_size=10, embedding_dim=16, ffn_dimension=64, num_attention_heads=2, num_encoder_layers=2 @@ -73,7 +85,7 @@ def _train(model): # does not freeze encoder dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) dummy_classifier = RobertaModel(dummy_encoder_conf, dummy_classifier_head) - model = RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=dummy_classifier_head, freeze_encoder=False, @@ -91,7 +103,7 @@ def _train(model): # freeze encoder dummy_classifier_head = RobertaClassificationHead(num_classes=2, input_dim=16) dummy_classifier = RobertaModel(dummy_encoder_conf, dummy_classifier_head) - model = RobertaBundle.build_model( + model = self.get_model( encoder_conf=dummy_encoder_conf, head=dummy_classifier_head, freeze_encoder=True, From ff07271f18c575b55d8f7bd06a956a1054b4d29e Mon Sep 17 00:00:00 2001 From: Roman Shraga Date: Thu, 19 Jan 2023 13:53:39 -0500 Subject: [PATCH 3/8] removed redundant Test from name --- test/torchtext_unittest/models/models_cpu_test.py | 2 +- test/torchtext_unittest/models/models_gpu_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/torchtext_unittest/models/models_cpu_test.py b/test/torchtext_unittest/models/models_cpu_test.py index cb9b5c3e17..3bcd3e4eb5 100644 --- a/test/torchtext_unittest/models/models_cpu_test.py +++ b/test/torchtext_unittest/models/models_cpu_test.py @@ -4,6 +4,6 @@ from .models_test_impl import BaseTestModels -class TestModels32CPUTest(BaseTestModels, TorchtextTestCase): +class TestModels32CPU(BaseTestModels, TorchtextTestCase): dtype = torch.float32 device = torch.device("cpu") diff --git a/test/torchtext_unittest/models/models_gpu_test.py b/test/torchtext_unittest/models/models_gpu_test.py index 43352a02a8..ef0386d454 100644 --- a/test/torchtext_unittest/models/models_gpu_test.py +++ b/test/torchtext_unittest/models/models_gpu_test.py @@ -6,6 +6,6 @@ @skipIfNoCuda -class TestModels32GPUTest(BaseTestModels, TorchtextTestCase): +class TestModels32GPU(BaseTestModels, TorchtextTestCase): dtype = torch.float32 device = torch.device("cuda") From 5f5a65b3f6cae748b79fa41d66ea57401ecd1332 Mon Sep 17 00:00:00 2001 From: Roman Shraga Date: Tue, 24 Jan 2023 13:40:09 -0500 Subject: [PATCH 4/8] addressed comments and fixed test skip implemenation --- .github/workflows/test-linux-gpu.yml | 2 +- test/torchtext_unittest/common/case_utils.py | 12 ------------ test/torchtext_unittest/models/models_gpu_test.py | 4 ++-- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml index 118a885f23..97f5bad326 100644 --- a/.github/workflows/test-linux-gpu.yml +++ b/.github/workflows/test-linux-gpu.yml @@ -54,7 +54,7 @@ jobs: printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch, Torchvision, and TorchData + # Install PyTorch and TorchData set -ex conda install \ --yes \ diff --git a/test/torchtext_unittest/common/case_utils.py b/test/torchtext_unittest/common/case_utils.py index aae9ee7b0b..b4d040547a 100644 --- a/test/torchtext_unittest/common/case_utils.py +++ b/test/torchtext_unittest/common/case_utils.py @@ -48,24 +48,12 @@ def setUp(self): super().setUp() torch.random.manual_seed(2434) - @property - def complex_dtype(self): - if self.dtype in ["float32", "float", torch.float, torch.float32]: - return torch.cfloat - if self.dtype in ["float64", "double", torch.double, torch.float64]: - return torch.cdouble - raise ValueError(f"No corresponding complex dtype for {self.dtype}") - def skipIfNoModule(module, display_name=None): display_name = display_name or module return unittest.skipIf(not is_module_available(module), f'"{display_name}" is not available') -def skipIfNoCuda(module): - return unittest.skipIf(not torch.cuda.is_available(), "CUDA is not available.") - - def zip_equal(*iterables): """With the regular Python `zip` function, if one iterable is longer than the other, the remainder portions are ignored.This is resolved in Python 3.10 where we can use diff --git a/test/torchtext_unittest/models/models_gpu_test.py b/test/torchtext_unittest/models/models_gpu_test.py index ef0386d454..738ef3108d 100644 --- a/test/torchtext_unittest/models/models_gpu_test.py +++ b/test/torchtext_unittest/models/models_gpu_test.py @@ -1,11 +1,11 @@ +import pytest import torch -from ..common.case_utils import skipIfNoCuda from ..common.torchtext_test_case import TorchtextTestCase from .models_test_impl import BaseTestModels -@skipIfNoCuda +@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available") class TestModels32GPU(BaseTestModels, TorchtextTestCase): dtype = torch.float32 device = torch.device("cuda") From ac8e41532337f788ccd53e6154a2bb2666b46735 Mon Sep 17 00:00:00 2001 From: Roman Shraga Date: Tue, 24 Jan 2023 14:58:35 -0500 Subject: [PATCH 5/8] fix gpu test train case by sending model input to device --- test/torchtext_unittest/models/models_test_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/torchtext_unittest/models/models_test_impl.py b/test/torchtext_unittest/models/models_test_impl.py index 43c862122c..cdfd196268 100644 --- a/test/torchtext_unittest/models/models_test_impl.py +++ b/test/torchtext_unittest/models/models_test_impl.py @@ -75,8 +75,8 @@ def test_roberta_bundler_train(self) -> None: def _train(model): optim = SGD(model.parameters(), lr=1) - model_input = torch.tensor([[0, 1, 2, 3, 4, 5]]) - target = torch.tensor([0]) + model_input = torch.tensor([[0, 1, 2, 3, 4, 5]]).to(device=self.device) + target = torch.tensor([0]).to(device=self.device) logits = model(model_input) loss = torch_F.cross_entropy(logits, target) loss.backward() From 3343f0b792b2e1a16a97bbefaa8921ab81c93db4 Mon Sep 17 00:00:00 2001 From: Roman Shraga Date: Thu, 26 Jan 2023 12:19:24 -0500 Subject: [PATCH 6/8] silenced setup script steps, changed skip decorator to unittest, moved gpu tests to separate folder --- .github/workflows/test-linux-gpu.yml | 9 +++++---- test/torchtext_unittest/models/models_gpu_test.py | 11 ----------- 2 files changed, 5 insertions(+), 15 deletions(-) delete mode 100644 test/torchtext_unittest/models/models_gpu_test.py diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml index 97f5bad326..9b017a9811 100644 --- a/.github/workflows/test-linux-gpu.yml +++ b/.github/workflows/test-linux-gpu.yml @@ -43,7 +43,7 @@ jobs: fi # Create Conda Env - conda create -yp ci_env python="${PYTHON_VERSION}" + conda create --quiet -yp ci_env python="${PYTHON_VERSION}" conda activate /work/ci_env python3 -m pip --quiet install cmake>=3.18.0 ninja conda env update --file ".circleci/unittest/linux/scripts/environment.yml" --prune @@ -58,15 +58,16 @@ jobs: set -ex conda install \ --yes \ + --quiet \ -c "pytorch-${CHANNEL}" \ -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ "${CUDATOOLKIT}" printf "Installing torchdata nightly\n" - python3 -m pip install --pre torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu + python3 -m pip install --pre torchdata --extra-index-url https://download.pytorch.org/whl/nightly/cpu --quiet python3 setup.py develop - python3 -m pip install parameterized + python3 -m pip install parameterized --quiet # Run Tests python3 -m torch.utils.collect_env cd test - python3 -m pytest --cov=torchtext --junitxml=test-results/junit.xml -v --durations 20 torchtext_unittest + python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 torchtext_unittest/models/gpu_tests diff --git a/test/torchtext_unittest/models/models_gpu_test.py b/test/torchtext_unittest/models/models_gpu_test.py deleted file mode 100644 index 738ef3108d..0000000000 --- a/test/torchtext_unittest/models/models_gpu_test.py +++ /dev/null @@ -1,11 +0,0 @@ -import pytest -import torch - -from ..common.torchtext_test_case import TorchtextTestCase -from .models_test_impl import BaseTestModels - - -@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available") -class TestModels32GPU(BaseTestModels, TorchtextTestCase): - dtype = torch.float32 - device = torch.device("cuda") From 1d40bb31796ad2d9d6077a340031922bfa5bd907 Mon Sep 17 00:00:00 2001 From: Roman Shraga Date: Thu, 26 Jan 2023 12:43:16 -0500 Subject: [PATCH 7/8] add back gpu tests --- .../models/gpu_tests/models_gpu_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 test/torchtext_unittest/models/gpu_tests/models_gpu_test.py diff --git a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py new file mode 100644 index 0000000000..40a90b9730 --- /dev/null +++ b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py @@ -0,0 +1,10 @@ +import unittest +import torch + +from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase +from torchtext_unittest.models.models_test_impl import BaseTestModels + +@unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available") +class TestModels32GPU(BaseTestModels, TorchtextTestCase): + dtype = torch.float32 + device = torch.device("cuda") From 38fdaf21474aaeb753be0afc04789be53b4e9372 Mon Sep 17 00:00:00 2001 From: Roman Shraga Date: Thu, 26 Jan 2023 12:50:32 -0500 Subject: [PATCH 8/8] fix lint --- test/torchtext_unittest/models/gpu_tests/models_gpu_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py index 40a90b9730..07452b4619 100644 --- a/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py +++ b/test/torchtext_unittest/models/gpu_tests/models_gpu_test.py @@ -1,9 +1,10 @@ import unittest -import torch +import torch from torchtext_unittest.common.torchtext_test_case import TorchtextTestCase from torchtext_unittest.models.models_test_impl import BaseTestModels + @unittest.skipIf(not torch.cuda.is_available(), reason="CUDA is not available") class TestModels32GPU(BaseTestModels, TorchtextTestCase): dtype = torch.float32