From b1daa23efe4bed38cc5be76894e2c19d6333fc5b Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Wed, 4 Jun 2025 09:30:14 -0600 Subject: [PATCH 01/38] add build cibuildwheel logic --- .github/workflows/build.yml | 59 +++++++++++++++++-------------------- pyproject.toml | 19 +++++++++++- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a48e8684..8745b9b7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,3 +1,16 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. name: Build on: @@ -14,41 +27,23 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + os: [ubuntu-latest, macos-14] steps: - name: Checkout uses: actions/checkout@v4 - - name: Build data-validation - id: build-data-validation - uses: ./.github/reusable-build - with: - python-version: ${{ matrix.python-version }} - upload-artifact: true - - upload_to_pypi: - name: Upload to PyPI - runs-on: ubuntu-latest - if: (github.event_name == 'release' && startsWith(github.ref, 'refs/tags')) || (github.event_name == 'workflow_dispatch') - needs: [build] - environment: - name: pypi - url: https://pypi.org/p/tensorflow-data-validation/ - permissions: - id-token: write - steps: - - name: Retrieve wheels - uses: actions/download-artifact@v4.1.8 - with: - merge-multiple: true - path: wheels - - - name: List the build artifacts - run: | - ls -lAs wheels/ + - name: Build wheels + uses: pypa/cibuildwheel@v2.23.3 + # env: + # CIBW_SOME_OPTION: value + # ... + # with: + # package-dir: . + # output-dir: wheelhouse + # config-file: "{package}/pyproject.toml" - - name: Upload to PyPI - uses: pypa/gh-action-pypi-publish@release/v1.9 - with: - packages_dir: wheels/ + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + path: ./wheelhouse/*.whl diff --git a/pyproject.toml b/pyproject.toml index 0db16c19..a0f2cdb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,6 +143,23 @@ ignore = [ "UP031", # Use format specifiers instead of percent format ] - [tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401"] + +[tool.cibuildwheel] +build-frontend="build" +before-build = "yum install -y npm && npm install -g @bazel/bazelisk" +# before-build = "bazel run " +# bazel 8 doesn't support bazel WORKSPACE file +environment = {USE_BAZEL_VERSION = "6.5.0"} + +# test-command="python -m unittest discover {package} -p *_test.py" + +[tool.cibuildwheel.linux] +#manylinux-x86_64-image = "manylinux_2_28" +manylinux-x86_64-image = "manylinux2014" +archs=["x86_64"] + + +[tool.cibuildwheel.macos] +archs = ["arm64"] From 5d300a8f56517dd331908bb383f91bb0638a6fc0 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Wed, 4 Jun 2025 09:31:52 -0600 Subject: [PATCH 02/38] update cibuildwheel --- .github/workflows/build.yml | 7 ++++--- pyproject.toml | 9 +++++---- setup.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8745b9b7..def44dce 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -name: Build + +name: Build tfx-bsl on: push: @@ -24,10 +25,10 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-14] + os: [ubuntu-latest, macos-latest] steps: - name: Checkout diff --git a/pyproject.toml b/pyproject.toml index a0f2cdb5..0ec3689a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,11 +147,10 @@ ignore = [ "__init__.py" = ["F401"] [tool.cibuildwheel] -build-frontend="build" -before-build = "yum install -y npm && npm install -g @bazel/bazelisk" -# before-build = "bazel run " -# bazel 8 doesn't support bazel WORKSPACE file +build-frontend="pip" environment = {USE_BAZEL_VERSION = "6.5.0"} +# build = ["cp310-*"] +skip = ["cp311-*", "cp312-*", "cp313-*", "*musllinux*", "pp*"] # test-command="python -m unittest discover {package} -p *_test.py" @@ -159,7 +158,9 @@ environment = {USE_BAZEL_VERSION = "6.5.0"} #manylinux-x86_64-image = "manylinux_2_28" manylinux-x86_64-image = "manylinux2014" archs=["x86_64"] +before-build = "yum install -y npm && npm install -g @bazel/bazelisk" [tool.cibuildwheel.macos] archs = ["arm64"] +before-build = "brew install bazelisk" diff --git a/setup.py b/setup.py index 8e3a410c..5fb57120 100644 --- a/setup.py +++ b/setup.py @@ -224,7 +224,7 @@ def select_constraint(default, nightly=None, git_master=None): extras_require={ "mutual-information": _make_mutual_information_requirements(), "visualization": _make_visualization_requirements(), - "dev": ["precommit"], + "dev": ["precommit", "cibuildwheel", "build"], "docs": _make_docs_requirements(), "test": [ "pytest", From 3b4987b7bd3b8df6e203a4183598b15eed362d58 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 10 Jun 2025 10:01:47 -0600 Subject: [PATCH 03/38] update build frontend and skips --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0ec3689a..e63448df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,10 +147,10 @@ ignore = [ "__init__.py" = ["F401"] [tool.cibuildwheel] -build-frontend="pip" +build-frontend="build" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] -skip = ["cp311-*", "cp312-*", "cp313-*", "*musllinux*", "pp*"] +skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] # test-command="python -m unittest discover {package} -p *_test.py" From e28573ce05e3a2789ccee12325a5dce5f5e9636d Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 10 Jun 2025 10:14:57 -0600 Subject: [PATCH 04/38] remove mac build --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e63448df..c54f33e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,7 +147,7 @@ ignore = [ "__init__.py" = ["F401"] [tool.cibuildwheel] -build-frontend="build" +build-frontend="" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] @@ -161,6 +161,6 @@ archs=["x86_64"] before-build = "yum install -y npm && npm install -g @bazel/bazelisk" -[tool.cibuildwheel.macos] -archs = ["arm64"] -before-build = "brew install bazelisk" +#[tool.cibuildwheel.macos] +#archs = ["arm64"] +#before-build = "brew install bazelisk" From 23e46ded6c84eb0a70ee2c9a08568f631fdc9691 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 10 Jun 2025 10:17:40 -0600 Subject: [PATCH 05/38] update platform and build frontend --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c54f33e3..8ecffd7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,7 +147,8 @@ ignore = [ "__init__.py" = ["F401"] [tool.cibuildwheel] -build-frontend="" +build-frontend="build" +platform="linux" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] From ab59916bf0274b5ef978673ff5d0dce50a924241 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 10 Jun 2025 10:20:13 -0600 Subject: [PATCH 06/38] skip mac builds --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8ecffd7a..1de1bb59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,7 +151,7 @@ build-frontend="build" platform="linux" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] -skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] +skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*", "*macos*"] # test-command="python -m unittest discover {package} -p *_test.py" From ba78c17ed3acf69172a99a0aeefb908fd24bb827 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 10 Jun 2025 10:21:51 -0600 Subject: [PATCH 07/38] remove platform tag --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1de1bb59..766abc07 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -148,7 +148,6 @@ ignore = [ [tool.cibuildwheel] build-frontend="build" -platform="linux" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*", "*macos*"] From c2cd3217d7dcd7884b971ecf40749fb77a84d744 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 10 Jun 2025 10:26:30 -0600 Subject: [PATCH 08/38] don't run macos build --- .github/workflows/build.yml | 2 +- pyproject.toml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index def44dce..d2d487b9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest] # , macos-latest] steps: - name: Checkout diff --git a/pyproject.toml b/pyproject.toml index 766abc07..24d17b25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -161,6 +161,6 @@ archs=["x86_64"] before-build = "yum install -y npm && npm install -g @bazel/bazelisk" -#[tool.cibuildwheel.macos] -#archs = ["arm64"] -#before-build = "brew install bazelisk" +[tool.cibuildwheel.macos] +archs = ["arm64"] +before-build = "brew install bazelisk" From 37df8cd3c01030e206f82df1271f849bb5e5a508 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 10 Jun 2025 12:24:35 -0600 Subject: [PATCH 09/38] add build on macos15 --- .github/workflows/build.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d2d487b9..19b495ea 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest] # , macos-latest] + os: [ubuntu-latest, macos-15] steps: - name: Checkout diff --git a/pyproject.toml b/pyproject.toml index 24d17b25..e63448df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -150,7 +150,7 @@ ignore = [ build-frontend="build" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] -skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*", "*macos*"] +skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] # test-command="python -m unittest discover {package} -p *_test.py" From dfa764f8aeda1f8ae30569c19d97d35c0ceff2a4 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 10 Jun 2025 12:55:16 -0600 Subject: [PATCH 10/38] add build host_cxxopt to .bazelrc --- .bazelrc | 1 + 1 file changed, 1 insertion(+) diff --git a/.bazelrc b/.bazelrc index bd2a357a..ea72a300 100644 --- a/.bazelrc +++ b/.bazelrc @@ -2,6 +2,7 @@ # Zetasql is removed. # This is a candidate for removal build --cxxopt="-std=c++17" +build --host_cxxopt=-std=c++17 # Needed to avoid zetasql proto error. # Zetasql is removed. From 1736aa707c865500722c902e22324bdc8094ee09 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Thu, 12 Jun 2025 10:47:04 -0600 Subject: [PATCH 11/38] added comment --- .bazelrc | 1 + 1 file changed, 1 insertion(+) diff --git a/.bazelrc b/.bazelrc index ea72a300..4619c3b6 100644 --- a/.bazelrc +++ b/.bazelrc @@ -2,6 +2,7 @@ # Zetasql is removed. # This is a candidate for removal build --cxxopt="-std=c++17" +# Needed to build absl build --host_cxxopt=-std=c++17 # Needed to avoid zetasql proto error. From e389eb0c7dbebb84a34a318054f562f8b8b5070a Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 12:35:57 -0600 Subject: [PATCH 12/38] update Build job name --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 19b495ea..6112b6c9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: Build tfx-bsl +name: Build on: push: From 698f422753c933033493a610a6e94e0da8e9e619 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 13:52:00 -0600 Subject: [PATCH 13/38] see move generated files --- tensorflow_data_validation/move_generated_files.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow_data_validation/move_generated_files.sh b/tensorflow_data_validation/move_generated_files.sh index 08ce5abe..490970f4 100755 --- a/tensorflow_data_validation/move_generated_files.sh +++ b/tensorflow_data_validation/move_generated_files.sh @@ -16,6 +16,8 @@ # Moves the bazel generated files needed for packaging the wheel to the source # tree. function tfdv::move_generated_files() { + echo $BUILD_WORKSPACE_DIRECTORY + PYWRAP_TFDV="tensorflow_data_validation/pywrap/tensorflow_data_validation_extension.so" cp -f "${BUILD_WORKSPACE_DIRECTORY}/bazel-bin/${PYWRAP_TFDV}" \ "${BUILD_WORKSPACE_DIRECTORY}/${PYWRAP_TFDV}" @@ -23,6 +25,7 @@ function tfdv::move_generated_files() { # If run by "bazel run", $(pwd) is the .runfiles dir that contains all the # data dependencies. RUNFILES_DIR=$(pwd) + echo "RUNFILES_DIR: ${RUNFILES_DIR}" cp -f ${RUNFILES_DIR}/tensorflow_data_validation/skew/protos/feature_skew_results_pb2.py \ ${BUILD_WORKSPACE_DIRECTORY}/tensorflow_data_validation/skew/protos cp -f ${RUNFILES_DIR}/tensorflow_data_validation/anomalies/proto/validation_config_pb2.py \ From 71b5502652e38765a3994ab10d0c292935b9ec52 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 13:56:04 -0600 Subject: [PATCH 14/38] debugging --- setup.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 5fb57120..49d83c90 100644 --- a/setup.py +++ b/setup.py @@ -80,10 +80,12 @@ def finalize_options(self): self._additional_build_options = ["--macos_minimum_os=10.14"] def run(self): - subprocess.check_call( - [self._bazel_cmd, "run", "-c", "opt"] + check_call_call = ([self._bazel_cmd, "run", "-c", "opt"] + self._additional_build_options - + ["//tensorflow_data_validation:move_generated_files"], + + ["//tensorflow_data_validation:move_generated_files"]) + print(check_call_call ) + subprocess.check_call( + check_call_call, # Bazel should be invoked in a directory containing bazel WORKSPACE # file, which is the root directory. cwd=os.path.dirname(os.path.realpath(__file__)), From 571b49fe0a4a49764efc78302dff6bbf1a6194f0 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 14:04:20 -0600 Subject: [PATCH 15/38] add echo --- tensorflow_data_validation/move_generated_files.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow_data_validation/move_generated_files.sh b/tensorflow_data_validation/move_generated_files.sh index 490970f4..ee055d50 100755 --- a/tensorflow_data_validation/move_generated_files.sh +++ b/tensorflow_data_validation/move_generated_files.sh @@ -33,6 +33,7 @@ function tfdv::move_generated_files() { cp -f ${RUNFILES_DIR}/tensorflow_data_validation/anomalies/proto/validation_metadata_pb2.py \ ${BUILD_WORKSPACE_DIRECTORY}/tensorflow_data_validation/anomalies/proto chmod +w "${BUILD_WORKSPACE_DIRECTORY}/${PYWRAP_TFDV}" + echo "finished moving generated files" } tfdv::move_generated_files From e2cf169361982621522c5369393aff806b90873e Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 14:16:49 -0600 Subject: [PATCH 16/38] add logic for if platform is arm64 --- setup.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 49d83c90..f01c407a 100644 --- a/setup.py +++ b/setup.py @@ -77,7 +77,14 @@ def finalize_options(self): ) self._additional_build_options = [] if platform.system() == "Darwin": - self._additional_build_options = ["--macos_minimum_os=10.14"] + # This flag determines the platform qualifier of the macos wheel. + if platform.machine() == "arm64": + self._additional_build_options = [ + "--macos_minimum_os=11.0", + "--config=macos_arm64", + ] + else: + self._additional_build_options = ["--macos_minimum_os=10.14"] def run(self): check_call_call = ([self._bazel_cmd, "run", "-c", "opt"] From 4ed05a66041d8e04a2814314e41e819f7646283e Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 14:38:29 -0600 Subject: [PATCH 17/38] add macos_arm64 to bazelrc build --- .bazelrc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.bazelrc b/.bazelrc index 4619c3b6..5a427833 100644 --- a/.bazelrc +++ b/.bazelrc @@ -14,3 +14,5 @@ build --protocopt=--experimental_allow_proto3_optional # parameter 'user_link_flags' is deprecated and will be removed soon. # It may be temporarily re-enabled by setting --incompatible_require_linker_input_cc_api=false build --incompatible_require_linker_input_cc_api=false +build:macos --apple_platform_type=macos +build:macos_arm64 --cpu=darwin_arm64 From eda3669ad1298f302a5ca1c547984bad76c1fe48 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 15:13:55 -0600 Subject: [PATCH 18/38] update build-backend --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index e63448df..a5c89352 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ requires = [ # Required for using org_tensorflow bazel repository. "numpy~=1.22.0", ] +build-backend = "setuptools.build_meta" [tool.ruff] line-length = 88 From cbea9b475993afde06351ce3447afc854d3a08af Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 15:20:59 -0600 Subject: [PATCH 19/38] debugging --- setup.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/setup.py b/setup.py index f01c407a..98e40d74 100644 --- a/setup.py +++ b/setup.py @@ -98,6 +98,23 @@ def run(self): cwd=os.path.dirname(os.path.realpath(__file__)), env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), ) + subprocess.check_call( + ["echo", "$PWD"], + cwd=os.path.dirname(os.path.realpath(__file__)), + env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), + ) + subprocess.check_call( + ["ls", "-al"], + cwd=os.path.dirname(os.path.realpath(__file__)), + env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), + ) + subprocess.check_call( + ["ls", "-al", "build/"], + cwd=os.path.dirname(os.path.realpath(__file__)), + env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), + ) + + # TFDV is not a purelib. However because of the extension module is not built From 619670a33a415bb5bdd8580aede70dfdddcfa8e2 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 15:38:49 -0600 Subject: [PATCH 20/38] Move build to build.bazel remove subprocess check_calls --- BUILD => BUILD.bazel | 0 setup.py | 10 ---------- 2 files changed, 10 deletions(-) rename BUILD => BUILD.bazel (100%) diff --git a/BUILD b/BUILD.bazel similarity index 100% rename from BUILD rename to BUILD.bazel diff --git a/setup.py b/setup.py index 98e40d74..ceb1bbcf 100644 --- a/setup.py +++ b/setup.py @@ -98,21 +98,11 @@ def run(self): cwd=os.path.dirname(os.path.realpath(__file__)), env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), ) - subprocess.check_call( - ["echo", "$PWD"], - cwd=os.path.dirname(os.path.realpath(__file__)), - env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), - ) subprocess.check_call( ["ls", "-al"], cwd=os.path.dirname(os.path.realpath(__file__)), env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), ) - subprocess.check_call( - ["ls", "-al", "build/"], - cwd=os.path.dirname(os.path.realpath(__file__)), - env=dict(os.environ, PYTHON_BIN_PATH=sys.executable), - ) From 1f34dc0239ecb37fc17b635c8accd32cc8f747df Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 15:54:28 -0600 Subject: [PATCH 21/38] remove numpy dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a5c89352..28b4dc3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ requires = [ "setuptools", "wheel", # Required for using org_tensorflow bazel repository. - "numpy~=1.22.0", + # "numpy~=1.22.0", ] build-backend = "setuptools.build_meta" From db0ca49b4f417aca62a42b14eba777f88c0f1083 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 16:38:47 -0600 Subject: [PATCH 22/38] add numpy back in, add test commands --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 28b4dc3f..65368e26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ requires = [ "setuptools", "wheel", # Required for using org_tensorflow bazel repository. - # "numpy~=1.22.0", + "numpy~=1.22.0", ] build-backend = "setuptools.build_meta" @@ -160,6 +160,8 @@ skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] manylinux-x86_64-image = "manylinux2014" archs=["x86_64"] before-build = "yum install -y npm && npm install -g @bazel/bazelisk" +before-test="rm bazel-*" +test-command="pytest" [tool.cibuildwheel.macos] From 219c0bf07989246efe50c420535f11e81d253398 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 16:41:24 -0600 Subject: [PATCH 23/38] add test extras --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 65368e26..793b8ad5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,6 +160,7 @@ skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] manylinux-x86_64-image = "manylinux2014" archs=["x86_64"] before-build = "yum install -y npm && npm install -g @bazel/bazelisk" +test_extras = ["test"] before-test="rm bazel-*" test-command="pytest" From ace8ec309414ae8c9b4a470aeb4762b79216ca34 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 16:42:46 -0600 Subject: [PATCH 24/38] fix typo --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 793b8ad5..5c571538 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,7 +160,7 @@ skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] manylinux-x86_64-image = "manylinux2014" archs=["x86_64"] before-build = "yum install -y npm && npm install -g @bazel/bazelisk" -test_extras = ["test"] +test-extras = ["test"] before-test="rm bazel-*" test-command="pytest" From cabc4913ab68a4846fa3bd37065abedce2cd9ad8 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 16:57:09 -0600 Subject: [PATCH 25/38] add test-sources --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 5c571538..4ed00ffd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,6 +160,7 @@ skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] manylinux-x86_64-image = "manylinux2014" archs=["x86_64"] before-build = "yum install -y npm && npm install -g @bazel/bazelisk" +test-sources = ["tensorflow_data_validation"] test-extras = ["test"] before-test="rm bazel-*" test-command="pytest" From e0617c6b462a6e79a8f95fd64cac4ff0dc4742db Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Mon, 16 Jun 2025 17:14:16 -0600 Subject: [PATCH 26/38] fix test --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4ed00ffd..c4e07546 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,10 +160,9 @@ skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] manylinux-x86_64-image = "manylinux2014" archs=["x86_64"] before-build = "yum install -y npm && npm install -g @bazel/bazelisk" -test-sources = ["tensorflow_data_validation"] test-extras = ["test"] before-test="rm bazel-*" -test-command="pytest" +test-command="pytest {project}" [tool.cibuildwheel.macos] From 809c58a0a3a65b12872a28faa3841af3f84dea3c Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 24 Jun 2025 09:42:56 -0600 Subject: [PATCH 27/38] update --- .github/workflows/build.yml | 2 +- pyproject.toml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6112b6c9..c53414f4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-15] + os: [ubuntu-latest, macos-14, macos-15, ] steps: - name: Checkout diff --git a/pyproject.toml b/pyproject.toml index c4e07546..976a31cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -167,4 +167,3 @@ test-command="pytest {project}" [tool.cibuildwheel.macos] archs = ["arm64"] -before-build = "brew install bazelisk" From e93f2f24dcc630e1a78da366ae0a77eb65e839b9 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 24 Jun 2025 09:56:59 -0600 Subject: [PATCH 28/38] try using wheelhouse build on test workflow --- .github/workflows/build.yml | 2 +- .github/workflows/test.yml | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c53414f4..4d156e81 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-14, macos-15, ] + os: [ubuntu-latest, macos-14, macos-15] steps: - name: Checkout diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b8a65fd3..551c8db7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,10 +11,12 @@ on: jobs: test: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + needs: build strategy: matrix: python-version: ["3.9", "3.10", "3.11"] + os: [ubuntu-latest, macos-14, macos-15] steps: - name: Checkout @@ -30,7 +32,7 @@ jobs: shell: bash run: | PYTHON_VERSION_TAG="cp$(echo ${{ matrix.python-version }} | sed 's/\.//')" - WHEEL_FILE=$(ls dist/*${PYTHON_VERSION_TAG}*.whl) + WHEEL_FILE=$(ls wheelhouse/*${PYTHON_VERSION_TAG}*.whl) pip install "${WHEEL_FILE}[test]" - name: Run Test From 7dcdef275dbb4d94b9f7673710bd9fe3d755743c Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 24 Jun 2025 10:04:18 -0600 Subject: [PATCH 29/38] remove cibuildwheel tests --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 976a31cf..29f93bae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,9 +160,9 @@ skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] manylinux-x86_64-image = "manylinux2014" archs=["x86_64"] before-build = "yum install -y npm && npm install -g @bazel/bazelisk" -test-extras = ["test"] -before-test="rm bazel-*" -test-command="pytest {project}" +#test-extras = ["test"] +#before-test="rm bazel-*" +#test-command="pytest {project}" [tool.cibuildwheel.macos] From 0dc5fafcd9804ce5aa85ea55e72da15939269763 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 24 Jun 2025 10:33:54 -0600 Subject: [PATCH 30/38] try to fix tests by using testpypi for ajf-test-tfx-bsl in place of tfx-bsl --- pyproject.toml | 4 ++-- setup.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 29f93bae..bab1694d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -161,8 +161,8 @@ manylinux-x86_64-image = "manylinux2014" archs=["x86_64"] before-build = "yum install -y npm && npm install -g @bazel/bazelisk" #test-extras = ["test"] -#before-test="rm bazel-*" -#test-command="pytest {project}" +before-test="rm bazel-* && pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ajf-test-tfx-bsl" +test-command="pytest {project}" [tool.cibuildwheel.macos] diff --git a/setup.py b/setup.py index ceb1bbcf..7bb9b107 100644 --- a/setup.py +++ b/setup.py @@ -230,12 +230,12 @@ def select_constraint(default, nightly=None, git_master=None): nightly=">=1.18.0.dev", git_master="@git+https://github.com/tensorflow/metadata@master", ), - "tfx-bsl" - + select_constraint( - default=">=1.17.1,<1.18", - nightly=">=1.18.0.dev", - git_master="@git+https://github.com/tensorflow/tfx-bsl@master", - ), + "ajf-test-tfx-bsl>=1.18.0.dev", + # + select_constraint( + # default=">=1.17.1,<1.18", + # nightly=">=1.18.0.dev", + # git_master="@git+https://github.com/tensorflow/tfx-bsl@master", + # ), ], extras_require={ "mutual-information": _make_mutual_information_requirements(), From 9bb136bd1a31b836cd6ba460c8f744909ce12b4e Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 24 Jun 2025 10:46:56 -0600 Subject: [PATCH 31/38] precommit --- pyproject.toml | 3 ++- setup.py | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bab1694d..59dcf6aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ requires = [ # Required for using org_tensorflow bazel repository. "numpy~=1.22.0", ] -build-backend = "setuptools.build_meta" [tool.ruff] line-length = 88 @@ -152,6 +151,8 @@ build-frontend="build" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] +before-test="rm bazel-* && pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ajf-test-tfx-bsl" +test-command="pytest {project}" # test-command="python -m unittest discover {package} -p *_test.py" diff --git a/setup.py b/setup.py index 7bb9b107..7f8249c2 100644 --- a/setup.py +++ b/setup.py @@ -87,10 +87,12 @@ def finalize_options(self): self._additional_build_options = ["--macos_minimum_os=10.14"] def run(self): - check_call_call = ([self._bazel_cmd, "run", "-c", "opt"] + check_call_call = ( + [self._bazel_cmd, "run", "-c", "opt"] + self._additional_build_options - + ["//tensorflow_data_validation:move_generated_files"]) - print(check_call_call ) + + ["//tensorflow_data_validation:move_generated_files"] + ) + print(check_call_call) subprocess.check_call( check_call_call, # Bazel should be invoked in a directory containing bazel WORKSPACE @@ -105,8 +107,6 @@ def run(self): ) - - # TFDV is not a purelib. However because of the extension module is not built # by setuptools, it will be incorrectly treated as a purelib. The following # works around that bug. From a600f5b36681780202ddfa0f72015d707520c672 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 24 Jun 2025 10:48:59 -0600 Subject: [PATCH 32/38] make sure test extras are installed --- pyproject.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 59dcf6aa..b8377e01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,10 +151,9 @@ build-frontend="build" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] -before-test="rm bazel-* && pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ajf-test-tfx-bsl" +before-test="rm {project}/bazel-* && pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ajf-test-tfx-bsl" test-command="pytest {project}" - -# test-command="python -m unittest discover {package} -p *_test.py" +test-extras = ["test"] [tool.cibuildwheel.linux] #manylinux-x86_64-image = "manylinux_2_28" From 658168f48c3d5910d53f847c614b58761eec7c78 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Tue, 24 Jun 2025 17:35:15 -0600 Subject: [PATCH 33/38] try not building 311 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b8377e01..e843ee98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ requires = [ "setuptools", "wheel", # Required for using org_tensorflow bazel repository. - "numpy~=1.22.0", + "numpy>=1.22.0", ] [tool.ruff] @@ -150,7 +150,7 @@ ignore = [ build-frontend="build" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] -skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] +skip = ["cp311-*", "cp312-*", "cp313-*", "*musllinux*", "pp*"] before-test="rm {project}/bazel-* && pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ajf-test-tfx-bsl" test-command="pytest {project}" test-extras = ["test"] From bd7881bfa15d2cdb89daa564671b3527317034bb Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Wed, 25 Jun 2025 11:58:35 -0600 Subject: [PATCH 34/38] don't fail fast to see if tests fail with other setups --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4d156e81..ee9eba0a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,6 +29,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-14, macos-15] + fail-fast: false steps: - name: Checkout From 518e249e4b4405b8e3277e8d752002e4a6fb07ef Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Wed, 25 Jun 2025 15:54:56 -0600 Subject: [PATCH 35/38] update versioning --- .github/workflows/build.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ee9eba0a..006f19c2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,7 +28,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-14, macos-15] + os: [ubuntu-latest, macos-latest] fail-fast: false steps: diff --git a/pyproject.toml b/pyproject.toml index e843ee98..43bedcdb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -150,7 +150,7 @@ ignore = [ build-frontend="build" environment = {USE_BAZEL_VERSION = "6.5.0"} # build = ["cp310-*"] -skip = ["cp311-*", "cp312-*", "cp313-*", "*musllinux*", "pp*"] +skip = ["cp312-*", "cp313-*", "*musllinux*", "pp*"] before-test="rm {project}/bazel-* && pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ajf-test-tfx-bsl" test-command="pytest {project}" test-extras = ["test"] From 05113e8fe97fc4861f756b87eed27e8a74a48ade Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Wed, 16 Jul 2025 12:48:07 -0600 Subject: [PATCH 36/38] skip failing tests on macos --- .github/workflows/test.yml | 2 +- .../statistics/generators/mutual_information_test.py | 1 + .../statistics/generators/partitioned_stats_generator_test.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 551c8db7..a83a4ad4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: python-version: ["3.9", "3.10", "3.11"] - os: [ubuntu-latest, macos-14, macos-15] + os: [ubuntu-latest, macos-latest] steps: - name: Checkout diff --git a/tensorflow_data_validation/statistics/generators/mutual_information_test.py b/tensorflow_data_validation/statistics/generators/mutual_information_test.py index 1f708a79..587fc814 100644 --- a/tensorflow_data_validation/statistics/generators/mutual_information_test.py +++ b/tensorflow_data_validation/statistics/generators/mutual_information_test.py @@ -219,6 +219,7 @@ def test_encoder_multivalent_numeric_missing(self): batch, expected, set([types.FeaturePath(["fa"])]), EMPTY_SET ) + @pytest.mark.skipif(sys.platform == "darwin", reason="fails on macos") def test_encoder_multivalent_numeric_too_large_for_numpy_v1(self): # For NumPy version 1.x.x, np.histogram cannot handle values > 2**53 if the # min and max of the examples are the same. diff --git a/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py b/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py index 27671554..d9f3c864 100644 --- a/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py +++ b/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py @@ -473,6 +473,7 @@ def test_sample_partition_combine( if num_compacts_metric: self.assertEqual(metric_num_compacts, num_compacts) + @pytest.mark.skipif(sys.platform == "darwin", reason="fails on macos") def test_sample_metrics(self): record_batch = pa.RecordBatch.from_arrays( [ From dfab0a52a056189f019da416bf4d7472e1a5b4d4 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Wed, 16 Jul 2025 12:56:10 -0600 Subject: [PATCH 37/38] import sys --- .../statistics/generators/mutual_information_test.py | 1 + .../statistics/generators/partitioned_stats_generator_test.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow_data_validation/statistics/generators/mutual_information_test.py b/tensorflow_data_validation/statistics/generators/mutual_information_test.py index 587fc814..47569b83 100644 --- a/tensorflow_data_validation/statistics/generators/mutual_information_test.py +++ b/tensorflow_data_validation/statistics/generators/mutual_information_test.py @@ -15,6 +15,7 @@ import apache_beam as beam import numpy as np +import sys import pyarrow as pa import pytest from absl.testing import absltest, parameterized diff --git a/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py b/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py index d9f3c864..6a0b442d 100644 --- a/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py +++ b/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py @@ -15,6 +15,7 @@ import apache_beam as beam import numpy as np +import sys import pyarrow as pa import pytest from absl.testing import absltest, parameterized From ce82c99f5d0586ed6568634b005252017bfa00e1 Mon Sep 17 00:00:00 2001 From: andrewfulton9 Date: Wed, 16 Jul 2025 13:16:02 -0600 Subject: [PATCH 38/38] linting add skip --- .../statistics/generators/mutual_information_test.py | 4 +++- .../statistics/generators/partitioned_stats_generator_test.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow_data_validation/statistics/generators/mutual_information_test.py b/tensorflow_data_validation/statistics/generators/mutual_information_test.py index 47569b83..f5e8dde5 100644 --- a/tensorflow_data_validation/statistics/generators/mutual_information_test.py +++ b/tensorflow_data_validation/statistics/generators/mutual_information_test.py @@ -13,9 +13,10 @@ # limitations under the License. """Tests for mutual_information.""" +import sys + import apache_beam as beam import numpy as np -import sys import pyarrow as pa import pytest from absl.testing import absltest, parameterized @@ -1444,6 +1445,7 @@ def test_mi_with_no_schema_or_paths(self): TEST_MAX_ENCODING_LENGTH, ).compute(batch) + @pytest.mark.skipif(sys.platform == "darwin", reason="fails on macos") def test_mi_multivalent_too_large_int_value_for_numpy_v1(self): # For NumPy version 1.x.x, np.histogram cannot handle values > 2**53 if the # min and max of the examples are the same. diff --git a/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py b/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py index 6a0b442d..0bf46cda 100644 --- a/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py +++ b/tensorflow_data_validation/statistics/generators/partitioned_stats_generator_test.py @@ -13,9 +13,10 @@ # limitations under the License. """Tests for partitioned_stats_generator.""" +import sys + import apache_beam as beam import numpy as np -import sys import pyarrow as pa import pytest from absl.testing import absltest, parameterized