diff --git a/.bazelrc b/.bazelrc index bd2a357a..5a427833 100644 --- a/.bazelrc +++ b/.bazelrc @@ -2,6 +2,8 @@ # Zetasql is removed. # This is a candidate for removal build --cxxopt="-std=c++17" +# Needed to build absl +build --host_cxxopt=-std=c++17 # Needed to avoid zetasql proto error. # Zetasql is removed. @@ -12,3 +14,5 @@ build --protocopt=--experimental_allow_proto3_optional # parameter 'user_link_flags' is deprecated and will be removed soon. # It may be temporarily re-enabled by setting --incompatible_require_linker_input_cc_api=false build --incompatible_require_linker_input_cc_api=false +build:macos --apple_platform_type=macos +build:macos_arm64 --cpu=darwin_arm64 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a48e8684..006f19c2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,3 +1,17 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + name: Build on: @@ -11,44 +25,27 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + os: [ubuntu-latest, macos-latest] + fail-fast: false steps: - name: Checkout uses: actions/checkout@v4 - - name: Build data-validation - id: build-data-validation - uses: ./.github/reusable-build - with: - python-version: ${{ matrix.python-version }} - upload-artifact: true - - upload_to_pypi: - name: Upload to PyPI - runs-on: ubuntu-latest - if: (github.event_name == 'release' && startsWith(github.ref, 'refs/tags')) || (github.event_name == 'workflow_dispatch') - needs: [build] - environment: - name: pypi - url: https://pypi.org/p/tensorflow-data-validation/ - permissions: - id-token: write - steps: - - name: Retrieve wheels - uses: actions/download-artifact@v4.1.8 - with: - merge-multiple: true - path: wheels - - - name: List the build artifacts - run: | - ls -lAs wheels/ + - name: Build wheels + uses: pypa/cibuildwheel@v2.23.3 + # env: + # CIBW_SOME_OPTION: value + # ... + # with: + # package-dir: . + # output-dir: wheelhouse + # config-file: "{package}/pyproject.toml" - - name: Upload to PyPI - uses: pypa/gh-action-pypi-publish@release/v1.9 - with: - packages_dir: wheels/ + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + path: ./wheelhouse/*.whl diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b8a65fd3..a83a4ad4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,10 +11,12 @@ on: jobs: test: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + needs: build strategy: matrix: python-version: ["3.9", "3.10", "3.11"] + os: [ubuntu-latest, macos-latest] steps: - name: Checkout @@ -30,7 +32,7 @@ jobs: shell: bash run: | PYTHON_VERSION_TAG="cp$(echo ${{ matrix.python-version }} | sed 's/\.//')" - WHEEL_FILE=$(ls dist/*${PYTHON_VERSION_TAG}*.whl) + WHEEL_FILE=$(ls wheelhouse/*${PYTHON_VERSION_TAG}*.whl) pip install "${WHEEL_FILE}[test]" - name: Run Test diff --git a/BUILD b/BUILD.bazel similarity index 100% rename from BUILD rename to BUILD.bazel diff --git a/pyproject.toml b/pyproject.toml index 0db16c19..43bedcdb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ requires = [ "setuptools", "wheel", # Required for using org_tensorflow bazel repository. - "numpy~=1.22.0", + "numpy>=1.22.0", ] [tool.ruff] @@ -143,6 +143,27 @@ ignore = [ "UP031", # Use format specifiers instead of percent format ] - [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401"] + +[tool.cibuildwheel] +build-frontend="build" +environment = {USE_BAZEL_VERSION = "6.5.0"} +# build = ["cp310-*"] +skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] +before-test="rm {project}/bazel-* && pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ajf-test-tfx-bsl" +test-command="pytest {project}" +test-extras = ["test"] + +[tool.cibuildwheel.linux] +#manylinux-x86_64-image = "manylinux_2_28" +manylinux-x86_64-image = "manylinux2014" +archs=["x86_64"] +before-build = "yum install -y npm && npm install -g @bazel/bazelisk" +#test-extras = ["test"] +before-test="rm bazel-* && pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ajf-test-tfx-bsl" +test-command="pytest {project}" + + +[tool.cibuildwheel.macos] +archs = ["arm64"] diff --git a/setup.py b/setup.py index 8e3a410c..7f8249c2 100644 --- a/setup.py +++ b/setup.py @@ -77,18 +77,34 @@ def finalize_options(self): ) self._additional_build_options = [] if platform.system() == "Darwin": - self._additional_build_options = ["--macos_minimum_os=10.14"] + # This flag determines the platform qualifier of the macos wheel. + if platform.machine() == "arm64": + self._additional_build_options = [ + "--macos_minimum_os=11.0", + "--config=macos_arm64", + ] + else: + self._additional_build_options = ["--macos_minimum_os=10.14"] def run(self): - subprocess.check_call( + check_call_call = ( [self._bazel_cmd, "run", "-c", "opt"] + self._additional_build_options - + ["//tensorflow_data_validation:move_generated_files"], + + ["//tensorflow_data_validation:move_generated_files"] + ) + print(check_call_call) + subprocess.check_call( + check_call_call, # Bazel should be invoked in a directory containing bazel WORKSPACE # file, which is the root directory. cwd=os.path.dirname(os.path.realpath(__file__)), env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), ) + subprocess.check_call( + ["ls", "-al"], + cwd=os.path.dirname(os.path.realpath(__file__)), + env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), + ) # TFDV is not a purelib. However because of the extension module is not built @@ -214,17 +230,17 @@ def select_constraint(default, nightly=None, git_master=None): nightly=">=1.18.0.dev", git_master="@git+https://github.com/tensorflow/metadata@master", ), - "tfx-bsl" - + select_constraint( - default=">=1.17.1,<1.18", - nightly=">=1.18.0.dev", - git_master="@git+https://github.com/tensorflow/tfx-bsl@master", - ), + "ajf-test-tfx-bsl>=1.18.0.dev", + # + select_constraint( + # default=">=1.17.1,<1.18", + # nightly=">=1.18.0.dev", + # git_master="@git+https://github.com/tensorflow/tfx-bsl@master", + # ), ], extras_require={ "mutual-information": _make_mutual_information_requirements(), "visualization": _make_visualization_requirements(), - "dev": ["precommit"], + "dev": ["precommit", "cibuildwheel", "build"], "docs": _make_docs_requirements(), "test": [ "pytest", diff --git a/tensorflow_data_validation/move_generated_files.sh b/tensorflow_data_validation/move_generated_files.sh index 08ce5abe..ee055d50 100755 --- a/tensorflow_data_validation/move_generated_files.sh +++ b/tensorflow_data_validation/move_generated_files.sh @@ -16,6 +16,8 @@ # Moves the bazel generated files needed for packaging the wheel to the source # tree. function tfdv::move_generated_files() { + echo $BUILD_WORKSPACE_DIRECTORY + PYWRAP_TFDV="tensorflow_data_validation/pywrap/tensorflow_data_validation_extension.so" cp -f "${BUILD_WORKSPACE_DIRECTORY}/bazel-bin/${PYWRAP_TFDV}" \ "${BUILD_WORKSPACE_DIRECTORY}/${PYWRAP_TFDV}" @@ -23,6 +25,7 @@ function tfdv::move_generated_files() { # If run by "bazel run", $(pwd) is the .runfiles dir that contains all the # data dependencies. RUNFILES_DIR=$(pwd) + echo "RUNFILES_DIR: ${RUNFILES_DIR}" cp -f ${RUNFILES_DIR}/tensorflow_data_validation/skew/protos/feature_skew_results_pb2.py \ ${BUILD_WORKSPACE_DIRECTORY}/tensorflow_data_validation/skew/protos cp -f ${RUNFILES_DIR}/tensorflow_data_validation/anomalies/proto/validation_config_pb2.py \ @@ -30,6 +33,7 @@ function tfdv::move_generated_files() { cp -f ${RUNFILES_DIR}/tensorflow_data_validation/anomalies/proto/validation_metadata_pb2.py \ ${BUILD_WORKSPACE_DIRECTORY}/tensorflow_data_validation/anomalies/proto chmod +w "${BUILD_WORKSPACE_DIRECTORY}/${PYWRAP_TFDV}" + echo "finished moving generated files" } tfdv::move_generated_files diff --git a/tensorflow_data_validation/statistics/generators/mutual_information_test.py b/tensorflow_data_validation/statistics/generators/mutual_information_test.py index 1f708a79..f5e8dde5 100644 --- a/tensorflow_data_validation/statistics/generators/mutual_information_test.py +++ b/tensorflow_data_validation/statistics/generators/mutual_information_test.py @@ -13,6 +13,8 @@ # limitations under the License. """Tests for mutual_information.""" +import sys + import apache_beam as beam import numpy as np import pyarrow as pa @@ -219,6 +221,7 @@ def test_encoder_multivalent_numeric_missing(self): batch, expected, set([types.FeaturePath(["fa"])]), EMPTY_SET ) + @pytest.mark.skipif(sys.platform == "darwin", reason="fails on macos") def test_encoder_multivalent_numeric_too_large_for_numpy_v1(self): # For NumPy version 1.x.x, np.histogram cannot handle values > 2**53 if the # min and max of the examples are the same. @@ -1442,6 +1445,7 @@ def test_mi_with_no_schema_or_paths(self): TEST_MAX_ENCODING_LENGTH, ).compute(batch) + @pytest.mark.skipif(sys.platform == "darwin", reason="fails on macos") def test_mi_multivalent_too_large_int_value_for_numpy_v1(self): # For NumPy version 1.x.x, np.histogram cannot handle values > 2**53 if the # min and max of the examples are the same. diff --git a/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py b/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py index 27671554..0bf46cda 100644 --- a/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py +++ b/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py @@ -13,6 +13,8 @@ # limitations under the License. """Tests for partitioned_stats_generator.""" +import sys + import apache_beam as beam import numpy as np import pyarrow as pa @@ -473,6 +475,7 @@ def test_sample_partition_combine( if num_compacts_metric: self.assertEqual(metric_num_compacts, num_compacts) + @pytest.mark.skipif(sys.platform == "darwin", reason="fails on macos") def test_sample_metrics(self): record_batch = pa.RecordBatch.from_arrays( [