From 2aceb2329f058a4380dad1d8cd896ec6eea43747 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sat, 7 Sep 2024 17:22:19 +0200 Subject: [PATCH 1/7] Better lint --- .gitignore | 1 + _doc/examples/plot_constraint_kmeans.py | 8 ++--- .../plot_logistic_regression_clustering.py | 6 ++-- ...t_piecewise_linear_regression_criterion.py | 4 +-- _doc/examples/plot_search_images_torch.py | 6 ++-- .../plot_sklearn_transformed_target.py | 6 ++-- _doc/examples/plot_visualize_pipeline.py | 4 +-- _unittests/ut_helpers/test_debug.py | 1 - _unittests/ut_helpers/test_parameters.py | 1 - .../test_non_linear_correlations.py | 19 ++++++------ _unittests/ut_metrics/test_scoring_metrics.py | 1 - _unittests/ut_mlbatch/test_pipeline_cache.py | 1 - _unittests/ut_mlmodel/test_anmf_predictor.py | 1 - .../ut_mlmodel/test_classification_kmeans.py | 5 ++- .../test_decision_tree_logistic_regression.py | 1 - .../ut_mlmodel/test_direct_blas_lapack.py | 1 - .../ut_mlmodel/test_extended_features.py | 5 ++- .../ut_mlmodel/test_interval_regressor.py | 1 - _unittests/ut_mlmodel/test_kmeans_sklearn.py | 1 - .../ut_mlmodel/test_piecewise_classifier.py | 1 - ...test_piecewise_decision_tree_experiment.py | 1 - ...piecewise_decision_tree_experiment_fast.py | 1 - ...ecewise_decision_tree_experiment_linear.py | 1 - .../ut_mlmodel/test_piecewise_regressor.py | 1 - .../ut_mlmodel/test_quantile_mlpregression.py | 1 - .../ut_mlmodel/test_quantile_regression.py | 1 - _unittests/ut_mlmodel/test_sklearn_text.py | 1 - .../ut_mlmodel/test_sklearn_transform_inv.py | 1 - .../ut_mlmodel/test_target_predictors.py | 1 - .../ut_mlmodel/test_transfer_transformer.py | 1 - .../ut_mlmodel/test_tsne_predictable.py | 5 ++- _unittests/ut_mltree/test_tree_digitize.py | 1 - _unittests/ut_mltree/test_tree_structure.py | 1 - _unittests/ut_plotting/test_dot.py | 3 +- _unittests/ut_plotting/test_plot_gallery.py | 3 +- _unittests/ut_plotting/test_str.py | 1 - .../test_LONG_search_images_torch.py | 5 ++- .../ut_search_rank/test_search_predictions.py | 1 - .../ut_search_rank/test_search_vectors.py | 1 - .../ut_timeseries/test_plot_timeseries.py | 2 +- .../test_preprocessing_timeseries.py | 4 +-- mlinsights/__init__.py | 1 - mlinsights/ext_test_case.py | 23 ++++++++------ mlinsights/helpers/pipeline.py | 16 +++++----- mlinsights/metrics/correlations.py | 2 +- mlinsights/mlbatch/cache_model.py | 8 ++--- .../mlmodel/_extended_features_polynomial.py | 8 ++--- mlinsights/mlmodel/_kmeans_constraint_.py | 7 ++--- mlinsights/mlmodel/anmf_predictor.py | 2 +- mlinsights/mlmodel/categories_to_integers.py | 5 ++- mlinsights/mlmodel/extended_features.py | 10 +++--- mlinsights/mlmodel/interval_regressor.py | 2 +- mlinsights/mlmodel/kmeans_constraint.py | 1 - mlinsights/mlmodel/ml_featurizer.py | 2 -- mlinsights/mlmodel/piecewise_estimator.py | 2 +- .../mlmodel/piecewise_tree_regression.py | 3 +- mlinsights/mlmodel/quantile_mlpregressor.py | 3 +- mlinsights/mlmodel/quantile_regression.py | 3 +- mlinsights/mlmodel/sklearn_testing.py | 10 +++--- .../mlmodel/sklearn_transform_inv_fct.py | 2 +- mlinsights/mlmodel/target_predictors.py | 6 ++-- mlinsights/mltree/tree_structure.py | 2 +- mlinsights/plotting/visualize.py | 14 +++++---- mlinsights/sklapi/__init__.py | 1 - mlinsights/sklapi/sklearn_base.py | 4 +-- mlinsights/sklapi/sklearn_base_classifier.py | 1 - mlinsights/sklapi/sklearn_base_learner.py | 1 - mlinsights/sklapi/sklearn_base_regressor.py | 1 - mlinsights/sklapi/sklearn_base_transform.py | 1 - .../sklapi/sklearn_base_transform_learner.py | 1 - .../sklapi/sklearn_base_transform_stacking.py | 9 ++---- mlinsights/sklapi/sklearn_parameters.py | 3 -- mlinsights/timeseries/agg.py | 30 +++++++++--------- mlinsights/timeseries/dummies.py | 2 +- mlinsights/timeseries/patterns.py | 2 +- mlinsights/timeseries/plotting.py | 2 +- mlinsights/timeseries/preprocessing.py | 6 ++-- mlinsights/timeseries/utils.py | 8 ++--- pyproject.toml | 31 +++++++++++++++++-- setup.py | 10 +++--- 80 files changed, 166 insertions(+), 190 deletions(-) diff --git a/.gitignore b/.gitignore index b129d9e0..343d820a 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ _doc/examples/plot_*.onnx _doc/examples/plot_*.xlsx _doc/_static/require.js _doc/_static/viz.js +_doc/sg_execution_times.rst _unittests/ut__main/*.png _unittests/test_constants.h mlinsights/_config.py diff --git a/_doc/examples/plot_constraint_kmeans.py b/_doc/examples/plot_constraint_kmeans.py index 9922d5ba..bf84a905 100644 --- a/_doc/examples/plot_constraint_kmeans.py +++ b/_doc/examples/plot_constraint_kmeans.py @@ -62,7 +62,7 @@ colors = "brgy" fig, ax = plt.subplots(1, 1, figsize=(4, 4)) -for i in range(0, max(cl) + 1): +for i in range(max(cl) + 1): ax.plot(X[cl == i, 0], X[cl == i, 1], colors[i] + ".", label="cl%d" % i) x = [km.cluster_centers_[i, 0], km.cluster_centers_[i, 0]] y = [km.cluster_centers_[i, 1], km.cluster_centers_[i, 1]] @@ -97,7 +97,7 @@ # fig, ax = plt.subplots(1, 2, figsize=(10, 4)) -for i in range(0, max(cl1) + 1): +for i in range(max(cl1) + 1): ax[0].plot(X[cl1 == i, 0], X[cl1 == i, 1], colors[i] + ".", label="cl%d" % i) ax[1].plot(X[cl2 == i, 0], X[cl2 == i, 1], colors[i] + ".", label="cl%d" % i) x = [km1.cluster_centers_[i, 0], km1.cluster_centers_[i, 0]] @@ -135,7 +135,7 @@ def plot_delaunay(ax, edges, points): fig, ax = plt.subplots(1, 2, figsize=(10, 4)) -for i in range(0, max(cl) + 1): +for i in range(max(cl) + 1): ax[0].plot(X[cl == i, 0], X[cl == i, 1], colors[i] + ".", label="cl%d" % i) x = [km.cluster_centers_[i, 0], km.cluster_centers_[i, 0]] y = [km.cluster_centers_[i, 1], km.cluster_centers_[i, 1]] @@ -145,7 +145,7 @@ def plot_delaunay(ax, edges, points): cls = km.cluster_centers_iter_ ax[1].plot(X[:, 0], X[:, 1], ".", label="X", color="#AAAAAA", ms=3) -for i in range(0, max(cl) + 1): +for i in range(max(cl) + 1): ms = numpy.arange(cls.shape[-1]).astype(numpy.float64) / cls.shape[-1] * 50 + 1 ax[1].scatter(cls[i, 0, :], cls[i, 1, :], color=colors[i], s=ms, label="cl%d" % i) plot_delaunay(ax[1], edges, km.cluster_centers_) diff --git a/_doc/examples/plot_logistic_regression_clustering.py b/_doc/examples/plot_logistic_regression_clustering.py index b9c3f191..42baec9b 100644 --- a/_doc/examples/plot_logistic_regression_clustering.py +++ b/_doc/examples/plot_logistic_regression_clustering.py @@ -23,8 +23,8 @@ Xs = [] Ys = [] n = 20 -for i in range(0, 5): - for j in range(0, 4): +for i in range(5): + for j in range(4): x1 = numpy.random.rand(n) + i * 1.1 x2 = numpy.random.rand(n) + j * 1.1 Xs.append(numpy.vstack([x1, x2]).T) @@ -41,7 +41,7 @@ fig, ax = plt.subplots(1, 1, figsize=(6, 4)) for i in set(Y): ax.plot( - X[Y == i, 0], X[Y == i, 1], "o", label="cl%d" % i, color=plt.cm.tab20.colors[i] + X[i == Y, 0], X[i == Y, 1], "o", label="cl%d" % i, color=plt.cm.tab20.colors[i] ) ax.legend() ax.set_title("Classification not convex") diff --git a/_doc/examples/plot_piecewise_linear_regression_criterion.py b/_doc/examples/plot_piecewise_linear_regression_criterion.py index bb99c044..69a52ba0 100644 --- a/_doc/examples/plot_piecewise_linear_regression_criterion.py +++ b/_doc/examples/plot_piecewise_linear_regression_criterion.py @@ -1,9 +1,9 @@ """ Custom DecisionTreeRegressor adapted to a linear regression =========================================================== - + A :class:`sklearn.tree.DecisionTreeRegressor` -can be trained with a couple of possible criterions but it is possible +can be trained with a couple of possible criterions but it is possible to implement a custom one (see `hellinger_distance_criterion `_). See also tutorial diff --git a/_doc/examples/plot_search_images_torch.py b/_doc/examples/plot_search_images_torch.py index 75ed8fdd..3b002c16 100644 --- a/_doc/examples/plot_search_images_torch.py +++ b/_doc/examples/plot_search_images_torch.py @@ -147,7 +147,7 @@ imgs = datasets.ImageFolder("simages", trans) dataloader = DataLoader(imgs, batch_size=1, shuffle=True, num_workers=1) img_seq = iter(dataloader) -imgs = list(img[0] for i, img in zip(range(2), img_seq)) +imgs = [img[0] for i, img in zip(range(2), img_seq)] ####################################### # @@ -182,7 +182,7 @@ ConcatDataset([imgs1, imgs2]), batch_size=1, shuffle=True, num_workers=1 ) img_seq = iter(dataloader) -imgs = list(img[0] for i, img in zip(range(10), img_seq)) +imgs = [img[0] for i, img in zip(range(10), img_seq)] ####################################### # @@ -259,7 +259,7 @@ imgs = datasets.ImageFolder("simages", trans) dataloader = DataLoader(imgs, batch_size=1, shuffle=False, num_workers=1) img_seq = iter(dataloader) -imgs = list(img[0] for img in img_seq) +imgs = [img[0] for img in img_seq] all_outputs = [model.forward(img).detach().numpy().ravel() for img in imgs] diff --git a/_doc/examples/plot_sklearn_transformed_target.py b/_doc/examples/plot_sklearn_transformed_target.py index 65ba7a79..c0470aed 100644 --- a/_doc/examples/plot_sklearn_transformed_target.py +++ b/_doc/examples/plot_sklearn_transformed_target.py @@ -202,7 +202,7 @@ def evaluation(): rnd = [] perf_reg = [] perf_clr = [] - for rs in range(0, 200): + for rs in range(200): rnd.append(rs) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rs) reg = LinearRegression() @@ -247,7 +247,7 @@ def evaluation2(): perf_clr = [] acc_reg = [] acc_clr = [] - for rs in range(0, 50): + for rs in range(50): rnd.append(rs) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rs) reg = LinearRegression() @@ -368,7 +368,7 @@ def permute(y): rows = [] -for i in range(0, 10): +for _i in range(10): regpt = TransformedTargetRegressor2(LinearRegression(), transformer="permute") regpt.fit(X_train, y_train) logpt = TransformedTargetClassifier2( diff --git a/_doc/examples/plot_visualize_pipeline.py b/_doc/examples/plot_visualize_pipeline.py index bcfcf77b..750389fe 100644 --- a/_doc/examples/plot_visualize_pipeline.py +++ b/_doc/examples/plot_visualize_pipeline.py @@ -238,6 +238,6 @@ # Every piece behaves the same way. -for coor, model, vars in enumerate_pipeline_models(model): +for coor, m, _vars in enumerate_pipeline_models(model): print(coor) - print(model._debug) + print(m._debug) diff --git a/_unittests/ut_helpers/test_debug.py b/_unittests/ut_helpers/test_debug.py index ce264ef6..ce2a52bb 100644 --- a/_unittests/ut_helpers/test_debug.py +++ b/_unittests/ut_helpers/test_debug.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy.random from sklearn.linear_model import LinearRegression, LogisticRegression diff --git a/_unittests/ut_helpers/test_parameters.py b/_unittests/ut_helpers/test_parameters.py index cfef9490..b1917c79 100644 --- a/_unittests/ut_helpers/test_parameters.py +++ b/_unittests/ut_helpers/test_parameters.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest from mlinsights.ext_test_case import ExtTestCase from mlinsights.helpers.parameters import format_value diff --git a/_unittests/ut_metrics/test_non_linear_correlations.py b/_unittests/ut_metrics/test_non_linear_correlations.py index 7792089f..ea5ec8eb 100644 --- a/_unittests/ut_metrics/test_non_linear_correlations.py +++ b/_unittests/ut_metrics/test_non_linear_correlations.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import pandas from sklearn import datasets @@ -18,7 +17,7 @@ def test_non_linear_correlations_df(self): self.assertEqual(cor.shape, (4, 4)) self.assertEqual(list(cor.columns), ["X1", "X2", "X3", "X4"]) self.assertEqual(list(cor.index), ["X1", "X2", "X3", "X4"]) - self.assertEqual(list(cor.iloc[i, i] for i in range(0, 4)), [1, 1, 1, 1]) + self.assertEqual(list(cor.iloc[i, i] for i in range(4)), [1, 1, 1, 1]) self.assertGreater(cor.values.min(), 0) def test_non_linear_correlations_array(self): @@ -27,7 +26,7 @@ def test_non_linear_correlations_array(self): df = pandas.DataFrame(X).values cor = non_linear_correlations(df, LinearRegression(fit_intercept=False)) self.assertEqual(cor.shape, (4, 4)) - self.assertEqual(list(cor[i, i] for i in range(0, 4)), [1, 1, 1, 1]) + self.assertEqual(list(cor[i, i] for i in range(4)), [1, 1, 1, 1]) self.assertGreater(cor.min(), 0) def test_non_linear_correlations_df_tree(self): @@ -39,7 +38,7 @@ def test_non_linear_correlations_df_tree(self): self.assertEqual(cor.shape, (4, 4)) self.assertEqual(list(cor.columns), ["X1", "X2", "X3", "X4"]) self.assertEqual(list(cor.index), ["X1", "X2", "X3", "X4"]) - self.assertGreater(max(cor.iloc[i, i] for i in range(0, 4)), 0.98) + self.assertGreater(max(cor.iloc[i, i] for i in range(4)), 0.98) self.assertGreater(cor.values.min(), 0) def test_non_linear_correlations_df_minmax(self): @@ -53,9 +52,9 @@ def test_non_linear_correlations_df_minmax(self): self.assertEqual(cor.shape, (4, 4)) self.assertEqual(list(cor.columns), ["X1", "X2", "X3", "X4"]) self.assertEqual(list(cor.index), ["X1", "X2", "X3", "X4"]) - self.assertEqual(list(cor.iloc[i, i] for i in range(0, 4)), [1, 1, 1, 1]) - self.assertEqual(list(mini.iloc[i, i] for i in range(0, 4)), [1, 1, 1, 1]) - self.assertEqual(list(maxi.iloc[i, i] for i in range(0, 4)), [1, 1, 1, 1]) + self.assertEqual(list(cor.iloc[i, i] for i in range(4)), [1, 1, 1, 1]) + self.assertEqual(list(mini.iloc[i, i] for i in range(4)), [1, 1, 1, 1]) + self.assertEqual(list(maxi.iloc[i, i] for i in range(4)), [1, 1, 1, 1]) self.assertGreater(cor.values.min(), 0) self.assertEqual(list(mini.columns), ["X1", "X2", "X3", "X4"]) self.assertEqual(list(mini.index), ["X1", "X2", "X3", "X4"]) @@ -74,9 +73,9 @@ def test_non_linear_correlations_array_minmax(self): df, LinearRegression(fit_intercept=False), minmax=True ) self.assertEqual(cor.shape, (4, 4)) - self.assertEqual(list(cor[i, i] for i in range(0, 4)), [1, 1, 1, 1]) - self.assertEqual(list(mini[i, i] for i in range(0, 4)), [1, 1, 1, 1]) - self.assertEqual(list(maxi[i, i] for i in range(0, 4)), [1, 1, 1, 1]) + self.assertEqual(list(cor[i, i] for i in range(4)), [1, 1, 1, 1]) + self.assertEqual(list(mini[i, i] for i in range(4)), [1, 1, 1, 1]) + self.assertEqual(list(maxi[i, i] for i in range(4)), [1, 1, 1, 1]) self.assertGreater(cor.min(), 0) self.assertEqual(mini.shape, (4, 4)) self.assertLesser(mini.min(), cor.min()) diff --git a/_unittests/ut_metrics/test_scoring_metrics.py b/_unittests/ut_metrics/test_scoring_metrics.py index 0ed29d40..e124c7ba 100644 --- a/_unittests/ut_metrics/test_scoring_metrics.py +++ b/_unittests/ut_metrics/test_scoring_metrics.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import pandas import numpy diff --git a/_unittests/ut_mlbatch/test_pipeline_cache.py b/_unittests/ut_mlbatch/test_pipeline_cache.py index 46d601a1..1282689d 100644 --- a/_unittests/ut_mlbatch/test_pipeline_cache.py +++ b/_unittests/ut_mlbatch/test_pipeline_cache.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest from sklearn.datasets import make_classification from sklearn.decomposition import PCA, TruncatedSVD as SVD diff --git a/_unittests/ut_mlmodel/test_anmf_predictor.py b/_unittests/ut_mlmodel/test_anmf_predictor.py index 09166c40..a82a44e0 100644 --- a/_unittests/ut_mlmodel/test_anmf_predictor.py +++ b/_unittests/ut_mlmodel/test_anmf_predictor.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from scipy.sparse import csr_matrix diff --git a/_unittests/ut_mlmodel/test_classification_kmeans.py b/_unittests/ut_mlmodel/test_classification_kmeans.py index f2d7f197..921abe67 100644 --- a/_unittests/ut_mlmodel/test_classification_kmeans.py +++ b/_unittests/ut_mlmodel/test_classification_kmeans.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from numpy.random import RandomState @@ -75,8 +74,8 @@ def test_classification_kmeans_relevance(self): Xs = [] Ys = [] n = 20 - for i in range(0, 5): - for j in range(0, 4): + for i in range(5): + for j in range(4): x1 = state.rand(n) + i * 1.1 x2 = state.rand(n) + j * 1.1 Xs.append(numpy.vstack([x1, x2]).T) diff --git a/_unittests/ut_mlmodel/test_decision_tree_logistic_regression.py b/_unittests/ut_mlmodel/test_decision_tree_logistic_regression.py index 0141fc0a..6b1bf2b9 100644 --- a/_unittests/ut_mlmodel/test_decision_tree_logistic_regression.py +++ b/_unittests/ut_mlmodel/test_decision_tree_logistic_regression.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from numpy.random import random diff --git a/_unittests/ut_mlmodel/test_direct_blas_lapack.py b/_unittests/ut_mlmodel/test_direct_blas_lapack.py index 28af6b57..56e99a48 100644 --- a/_unittests/ut_mlmodel/test_direct_blas_lapack.py +++ b/_unittests/ut_mlmodel/test_direct_blas_lapack.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from scipy.linalg.lapack import dgelss as scipy_dgelss diff --git a/_unittests/ut_mlmodel/test_extended_features.py b/_unittests/ut_mlmodel/test_extended_features.py index de3558a6..27dd1808 100644 --- a/_unittests/ut_mlmodel/test_extended_features.py +++ b/_unittests/ut_mlmodel/test_extended_features.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from scipy import sparse @@ -304,7 +303,7 @@ def test_polynomial_features_bigger(self): ext = ExtendedFeatures(poly_degree=deg) X_ext = ext.fit_transform(X) - inames = ["x%d" % i for i in range(0, X.shape[1])] + inames = ["x%d" % i for i in range(X.shape[1])] names_ext = ext.get_feature_names_out(inames) self.assertEqual(len(names_sk), len(names_ext)) @@ -329,7 +328,7 @@ def test_polynomial_features_bigger_ionly(self): ) X_ext = ext.fit_transform(X) - inames = ["x%d" % i for i in range(0, X.shape[1])] + inames = ["x%d" % i for i in range(X.shape[1])] names_ext = ext.get_feature_names_out(inames) self.assertEqual(len(names_sk), len(names_ext)) diff --git a/_unittests/ut_mlmodel/test_interval_regressor.py b/_unittests/ut_mlmodel/test_interval_regressor.py index 07e5eed2..d949d45b 100644 --- a/_unittests/ut_mlmodel/test_interval_regressor.py +++ b/_unittests/ut_mlmodel/test_interval_regressor.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from sklearn.linear_model import LinearRegression diff --git a/_unittests/ut_mlmodel/test_kmeans_sklearn.py b/_unittests/ut_mlmodel/test_kmeans_sklearn.py index 159033fb..974b024f 100644 --- a/_unittests/ut_mlmodel/test_kmeans_sklearn.py +++ b/_unittests/ut_mlmodel/test_kmeans_sklearn.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy as np from scipy import sparse as sp diff --git a/_unittests/ut_mlmodel/test_piecewise_classifier.py b/_unittests/ut_mlmodel/test_piecewise_classifier.py index d5c2aac3..8f73099f 100644 --- a/_unittests/ut_mlmodel/test_piecewise_classifier.py +++ b/_unittests/ut_mlmodel/test_piecewise_classifier.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from numpy.random import random diff --git a/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment.py b/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment.py index e072d12c..97a1f84c 100644 --- a/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment.py +++ b/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import warnings import packaging.version as pv diff --git a/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_fast.py b/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_fast.py index ed73c52a..dbbf7393 100644 --- a/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_fast.py +++ b/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_fast.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import warnings import numpy diff --git a/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_linear.py b/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_linear.py index 9a82cb86..607f9bcd 100644 --- a/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_linear.py +++ b/_unittests/ut_mlmodel/test_piecewise_decision_tree_experiment_linear.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import warnings import numpy diff --git a/_unittests/ut_mlmodel/test_piecewise_regressor.py b/_unittests/ut_mlmodel/test_piecewise_regressor.py index 41e664db..765d8f54 100644 --- a/_unittests/ut_mlmodel/test_piecewise_regressor.py +++ b/_unittests/ut_mlmodel/test_piecewise_regressor.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from numpy.random import random diff --git a/_unittests/ut_mlmodel/test_quantile_mlpregression.py b/_unittests/ut_mlmodel/test_quantile_mlpregression.py index 00306c0f..a2dc04c7 100644 --- a/_unittests/ut_mlmodel/test_quantile_mlpregression.py +++ b/_unittests/ut_mlmodel/test_quantile_mlpregression.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from numpy.random import random diff --git a/_unittests/ut_mlmodel/test_quantile_regression.py b/_unittests/ut_mlmodel/test_quantile_regression.py index 0ecc65f2..54b24711 100644 --- a/_unittests/ut_mlmodel/test_quantile_regression.py +++ b/_unittests/ut_mlmodel/test_quantile_regression.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from numpy.random import random diff --git a/_unittests/ut_mlmodel/test_sklearn_text.py b/_unittests/ut_mlmodel/test_sklearn_text.py index 5691eedd..f35d8ea9 100644 --- a/_unittests/ut_mlmodel/test_sklearn_text.py +++ b/_unittests/ut_mlmodel/test_sklearn_text.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer diff --git a/_unittests/ut_mlmodel/test_sklearn_transform_inv.py b/_unittests/ut_mlmodel/test_sklearn_transform_inv.py index 2cadf5ba..84ad8a2e 100644 --- a/_unittests/ut_mlmodel/test_sklearn_transform_inv.py +++ b/_unittests/ut_mlmodel/test_sklearn_transform_inv.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from mlinsights.ext_test_case import ExtTestCase diff --git a/_unittests/ut_mlmodel/test_target_predictors.py b/_unittests/ut_mlmodel/test_target_predictors.py index 610fb75b..81e67605 100644 --- a/_unittests/ut_mlmodel/test_target_predictors.py +++ b/_unittests/ut_mlmodel/test_target_predictors.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from sklearn.datasets import load_iris diff --git a/_unittests/ut_mlmodel/test_transfer_transformer.py b/_unittests/ut_mlmodel/test_transfer_transformer.py index d370c12a..60f33b84 100644 --- a/_unittests/ut_mlmodel/test_transfer_transformer.py +++ b/_unittests/ut_mlmodel/test_transfer_transformer.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from sklearn.linear_model import LinearRegression, LogisticRegression diff --git a/_unittests/ut_mlmodel/test_tsne_predictable.py b/_unittests/ut_mlmodel/test_tsne_predictable.py index f5e769ff..09b28c66 100644 --- a/_unittests/ut_mlmodel/test_tsne_predictable.py +++ b/_unittests/ut_mlmodel/test_tsne_predictable.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from numpy.random import RandomState @@ -63,8 +62,8 @@ def test_predictable_tsne_relevance(self): Xs = [] Ys = [] n = 20 - for i in range(0, 5): - for j in range(0, 4): + for i in range(5): + for j in range(4): x1 = state.rand(n) + i * 1.1 x2 = state.rand(n) + j * 1.1 Xs.append(numpy.vstack([x1, x2]).T) diff --git a/_unittests/ut_mltree/test_tree_digitize.py b/_unittests/ut_mltree/test_tree_digitize.py index d09c8fd1..a2b6ae5a 100644 --- a/_unittests/ut_mltree/test_tree_digitize.py +++ b/_unittests/ut_mltree/test_tree_digitize.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from sklearn.tree import DecisionTreeRegressor diff --git a/_unittests/ut_mltree/test_tree_structure.py b/_unittests/ut_mltree/test_tree_structure.py index 9e598c61..0768e012 100644 --- a/_unittests/ut_mltree/test_tree_structure.py +++ b/_unittests/ut_mltree/test_tree_structure.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import numpy from sklearn import datasets diff --git a/_unittests/ut_plotting/test_dot.py b/_unittests/ut_plotting/test_dot.py index abf0413d..5f4a156e 100644 --- a/_unittests/ut_plotting/test_dot.py +++ b/_unittests/ut_plotting/test_dot.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest from io import StringIO from textwrap import dedent @@ -168,7 +167,7 @@ def test_onehotencoder_dot(self): ), LinearRegression(), ) - train_cols = cols + ["weekday"] + train_cols = [*cols, "weekday"] model.fit(df, df[train_cols]) dot = pipeline2dot(model, df) self.assertIn('label="Identity"', dot) diff --git a/_unittests/ut_plotting/test_plot_gallery.py b/_unittests/ut_plotting/test_plot_gallery.py index b5fae3fd..5852e4b3 100644 --- a/_unittests/ut_plotting/test_plot_gallery.py +++ b/_unittests/ut_plotting/test_plot_gallery.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import os import tempfile import unittest @@ -73,7 +72,7 @@ def test_plot_gallery_url(self): ax = plot_gallery_images(files, return_figure=False, ax=ax) self.assertNotEmpty(ax) except http.client.RemoteDisconnected as e: - warnings.warn(f"Unable to fetch image {e}'") + warnings.warn(f"Unable to fetch image {e}'", stacklevel=0) plt.close("all") return plt.close("all") diff --git a/_unittests/ut_plotting/test_str.py b/_unittests/ut_plotting/test_str.py index 0fe28fac..1abaf781 100644 --- a/_unittests/ut_plotting/test_str.py +++ b/_unittests/ut_plotting/test_str.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import StandardScaler, MinMaxScaler diff --git a/_unittests/ut_search_rank/test_LONG_search_images_torch.py b/_unittests/ut_search_rank/test_LONG_search_images_torch.py index 2fc61090..8515e652 100644 --- a/_unittests/ut_search_rank/test_LONG_search_images_torch.py +++ b/_unittests/ut_search_rank/test_LONG_search_images_torch.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import os import tempfile import unittest @@ -19,7 +18,7 @@ def test_search_predictions_torch(self): try: import torchvision.models as tmodels except (SyntaxError, ModuleNotFoundError) as e: - warnings.warn(f"torch is not available: {e}") + warnings.warn(f"torch is not available: {e}", stacklevel=0) return from torchvision import datasets, transforms from torch.utils.data import DataLoader @@ -55,7 +54,7 @@ def test_search_predictions_torch(self): imgs_ = datasets.ImageFolder(temp, trans) dataloader = DataLoader(imgs_, batch_size=1, shuffle=False, num_workers=1) img_seq = iter(dataloader) - imgs = list(img[0] for img in img_seq) + imgs = [img[0] for img in img_seq] # search se = SearchEnginePredictionImages(model, n_neighbors=5) diff --git a/_unittests/ut_search_rank/test_search_predictions.py b/_unittests/ut_search_rank/test_search_predictions.py index 0df67c14..bc420379 100644 --- a/_unittests/ut_search_rank/test_search_predictions.py +++ b/_unittests/ut_search_rank/test_search_predictions.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import unittest import pandas import numpy diff --git a/_unittests/ut_search_rank/test_search_vectors.py b/_unittests/ut_search_rank/test_search_vectors.py index c710e8ee..9a90aa0b 100644 --- a/_unittests/ut_search_rank/test_search_vectors.py +++ b/_unittests/ut_search_rank/test_search_vectors.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import os import tempfile import unittest diff --git a/_unittests/ut_timeseries/test_plot_timeseries.py b/_unittests/ut_timeseries/test_plot_timeseries.py index 0dc9105a..eb6076fa 100644 --- a/_unittests/ut_timeseries/test_plot_timeseries.py +++ b/_unittests/ut_timeseries/test_plot_timeseries.py @@ -19,7 +19,7 @@ def test_plot_data(self): import matplotlib.pyplot as plt except Exception as e: if "generated new fontManager" in str(e): - warnings.warn(e) + warnings.warn(e, stacklevel=0) return raise e dt1 = datetime.datetime(2019, 8, 1) diff --git a/_unittests/ut_timeseries/test_preprocessing_timeseries.py b/_unittests/ut_timeseries/test_preprocessing_timeseries.py index 9da2cf7a..7b4045bb 100644 --- a/_unittests/ut_timeseries/test_preprocessing_timeseries.py +++ b/_unittests/ut_timeseries/test_preprocessing_timeseries.py @@ -12,7 +12,7 @@ def test_base_parameters_split0(self): y = numpy.arange(10) * 100 bs = BaseTimeSeries(past=2) nx, ny, _ = build_ts_X_y(bs, X, y) - for d in range(0, 5): + for d in range(5): proc = TimeSeriesDifference(d) proc.fit(nx, ny) px, py = proc.transform(nx, ny) @@ -28,7 +28,7 @@ def test_base_parameters_split0_weight(self): bs = BaseTimeSeries(past=2) nx, ny, _ = build_ts_X_y(bs, X, y) weights = numpy.ones((nx.shape[0],), dtype=nx.dtype) - for d in range(0, 5): + for d in range(5): proc = TimeSeriesDifference(d) proc.fit(nx, ny, weights) px, py = proc.transform(nx, ny) diff --git a/mlinsights/__init__.py b/mlinsights/__init__.py index aa2cdee8..425f16b0 100644 --- a/mlinsights/__init__.py +++ b/mlinsights/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- __version__ = "0.5.0" __author__ = "Xavier Dupré" __github__ = "https://github.com/sdpython/mlinsights" diff --git a/mlinsights/ext_test_case.py b/mlinsights/ext_test_case.py index bcd40109..909482eb 100644 --- a/mlinsights/ext_test_case.py +++ b/mlinsights/ext_test_case.py @@ -270,7 +270,7 @@ def assertRaise( fct() except exc_type as e: if not isinstance(e, exc_type): - raise AssertionError(f"Unexpected exception {type(e)!r}.") + raise AssertionError(f"Unexpected exception {type(e)!r}.") # noqa: B904 if msg is None: return self.assertIn(msg, str(e)) @@ -315,13 +315,13 @@ def assertEqualFloat(self, a, b, precision=1e-5): try: self.assertLesser(d, precision) except AssertionError: - raise AssertionError(f"{a} != {b} (p={precision})") + raise AssertionError(f"{a} != {b} (p={precision})") # noqa: B904 else: r = float(abs(a - b)) / mi try: self.assertLesser(r, precision) except AssertionError: - raise AssertionError(f"{a} != {b} (p={precision})") + raise AssertionError(f"{a} != {b} (p={precision})") # noqa: B904 def assertEndsWith(self, suffix: str, text: str): if not text.endswith(suffix): @@ -338,7 +338,7 @@ def assertVersionGreaterOrEqual(self, v1, v2): @classmethod def tearDownClass(cls): for name, line, w in cls._warns: - warnings.warn(f"\n{name}:{line}: {type(w)}\n {str(w)}") + warnings.warn(f"\n{name}:{line}: {type(w)}\n {str(w)}", stacklevel=0) def capture(self, fct: Callable): """ @@ -349,9 +349,8 @@ def capture(self, fct: Callable): """ sout = StringIO() serr = StringIO() - with redirect_stdout(sout): - with redirect_stderr(serr): - res = fct() + with redirect_stdout(sout), redirect_stderr(serr): + res = fct() return res, sout.getvalue(), serr.getvalue() @@ -496,7 +495,7 @@ def unzip_files( except zipfile.BadZipFile as e: if isinstance(zipf, BytesIO): raise e - raise IOError(f"Unable to read file '{zipf}'") from e + raise OSError(f"Unable to read file '{zipf}'") from e files: List[Union[str, Tuple[str, Any]]] = [] with zipfile.ZipFile(zipf, "r") as file: @@ -512,7 +511,9 @@ def unzip_files( f"Unable to extract {info.filename!r} due to {e}" ) from e warnings.warn( - f"Unable to extract {info.filename!r} due to {e}", UserWarning + f"Unable to extract {info.filename!r} due to {e}", + UserWarning, + stacklevel=0, ) continue files.append((info.filename, content)) @@ -533,7 +534,9 @@ def unzip_files( f"Unable to extract {info.filename!r} due to {e}" ) from e warnings.warn( - f"Unable to extract {info.filename!r} due to {e}", UserWarning + f"Unable to extract {info.filename!r} due to {e}", + UserWarning, + stacklevel=0, ) continue if verbose > 0: diff --git a/mlinsights/helpers/pipeline.py b/mlinsights/helpers/pipeline.py index 492603df..a4caec80 100644 --- a/mlinsights/helpers/pipeline.py +++ b/mlinsights/helpers/pipeline.py @@ -28,7 +28,6 @@ def enumerate_pipeline_models(pipe, coor=None, vs=None): class PassThrough: "dummy class to help display" - pass yield coor, PassThrough(), vs else: @@ -40,7 +39,7 @@ class PassThrough: raise NotImplementedError("Unable to handle this specific case.") elif hasattr(pipe, "mapper") and pipe.mapper: # azureml DataTransformer - for couple in enumerate_pipeline_models(pipe.mapper, coor + (0,)): + for couple in enumerate_pipeline_models(pipe.mapper, (*coor, 0)): yield couple elif hasattr(pipe, "built_features"): # sklearn_pandas.dataframe_mapper.DataFrameMapper @@ -48,25 +47,25 @@ class PassThrough: if isinstance(columns, str): columns = (columns,) if transformers is None: - yield (coor + (i,)), None, columns + yield (*coor, i), None, columns else: for couple in enumerate_pipeline_models( - transformers, coor + (i,), columns + transformers, (*coor, i), columns ): yield couple elif isinstance(pipe, Pipeline): for i, (_, model) in enumerate(pipe.steps): - for couple in enumerate_pipeline_models(model, coor + (i,)): + for couple in enumerate_pipeline_models(model, (*coor, i)): yield couple elif isinstance(pipe, ColumnTransformer): for i, (_, fitted_transformer, column) in enumerate(pipe.transformers): for couple in enumerate_pipeline_models( - fitted_transformer, coor + (i,), column + fitted_transformer, (*coor, i), column ): yield couple elif isinstance(pipe, FeatureUnion): for i, (_, model) in enumerate(pipe.transformer_list): - for couple in enumerate_pipeline_models(model, coor + (i,)): + for couple in enumerate_pipeline_models(model, (*coor, i)): yield couple elif isinstance(pipe, TransformedTargetRegressor): raise NotImplementedError( @@ -205,5 +204,6 @@ def decision_function(self, X, *args, **kwargs): setattr(model, k, MethodType(new_methods[k], model)) except AttributeError: warnings.warn( - f"Unable to overwrite method {k!r} for class {type(model)!r}." + f"Unable to overwrite method {k!r} for class {type(model)!r}.", + stacklevel=0, ) diff --git a/mlinsights/metrics/correlations.py b/mlinsights/metrics/correlations.py index 9b0333ec..e298b9f6 100644 --- a/mlinsights/metrics/correlations.py +++ b/mlinsights/metrics/correlations.py @@ -111,7 +111,7 @@ def non_linear_correlations(df, model, draws=5, minmax=False): maxi = cor.copy() df = scale(df) - for k in range(0, draws): + for k in range(draws): df_train, df_test = train_test_split(df, test_size=0.5) for i in range(cor.shape[0]): xi_train = df_train[:, i : i + 1] diff --git a/mlinsights/mlbatch/cache_model.py b/mlinsights/mlbatch/cache_model.py index 4d976444..f7d4cde2 100644 --- a/mlinsights/mlbatch/cache_model.py +++ b/mlinsights/mlbatch/cache_model.py @@ -70,7 +70,7 @@ def as_key(params): sv = str(v) elif isinstance(v, tuple): assert all( - map(lambda e: isinstance(e, (int, float, str)), v) + isinstance(e, (int, float, str)) for e in v ), f"Unable to create a key with value {k!r}:{v}" return str(v) elif isinstance(v, numpy.ndarray): @@ -94,15 +94,13 @@ def items(self): """ Enumerates all cached items. """ - for item in self.cached.items(): - yield item + yield from self.cached.items() def keys(self): """ Enumerates all cached keys. """ - for k in self.cached.keys(): - yield k + yield from self.cached.keys() @staticmethod def create_cache(name): diff --git a/mlinsights/mlmodel/_extended_features_polynomial.py b/mlinsights/mlmodel/_extended_features_polynomial.py index c81e2dc9..2e1d4c3e 100644 --- a/mlinsights/mlmodel/_extended_features_polynomial.py +++ b/mlinsights/mlmodel/_extended_features_polynomial.py @@ -11,7 +11,7 @@ def _transform_iall(degree, bias, XP, X, multiply, final): pos = 0 n = X.shape[1] - for d in range(0, degree): + for d in range(degree): if d == 0: XP[:, pos : pos + n] = X index = list(range(pos, pos + n)) @@ -20,7 +20,7 @@ def _transform_iall(degree, bias, XP, X, multiply, final): else: new_index = [] end = index[-1] - for i in range(0, n): + for i in range(n): a = index[i] new_index.append(pos) new_pos = pos + end - a @@ -42,7 +42,7 @@ def _transform_ionly(degree, bias, XP, X, multiply, final): pos = 0 n = X.shape[1] - for d in range(0, degree): + for d in range(degree): if d == 0: XP[:, pos : pos + n] = X index = list(range(pos, pos + n)) @@ -51,7 +51,7 @@ def _transform_ionly(degree, bias, XP, X, multiply, final): else: new_index = [] end = index[-1] - for i in range(0, n): + for i in range(n): a = index[i] new_index.append(pos) dec = index[i + 1] - index[i] diff --git a/mlinsights/mlmodel/_kmeans_constraint_.py b/mlinsights/mlmodel/_kmeans_constraint_.py index 49930db4..265545c6 100644 --- a/mlinsights/mlmodel/_kmeans_constraint_.py +++ b/mlinsights/mlmodel/_kmeans_constraint_.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import bisect from collections import Counter from pandas import DataFrame @@ -49,7 +48,7 @@ def linearize_matrix(mat, *adds): c = mat.shape[1] ic = numpy.arange(mat.shape[1]) res = numpy.empty((n * c, 3 + len(adds)), dtype=mat.dtype) - for i in range(0, n): + for i in range(n): a = i * c b = (i + 1) * c res[a:b, 1] = i @@ -534,7 +533,7 @@ def loopf(h, sumi): transfer = {} - for i in range(0, sorted_distances.shape[0]): + for i in range(sorted_distances.shape[0]): gain = sorted_distances[i, 3] ind = int(sorted_distances[i, 1]) dest = int(sorted_distances[i, 2]) @@ -812,7 +811,7 @@ def _adjust_weights(X, sw, weights, labels, lr): """ www, exp, N = _compute_balance(X, sw, labels, weights.shape[0]) - for i in range(0, weights.shape[0]): + for i in range(weights.shape[0]): nw = (www[i] - exp) / exp delta = nw * lr weights[i] += delta diff --git a/mlinsights/mlmodel/anmf_predictor.py b/mlinsights/mlmodel/anmf_predictor.py index 47479937..2879b4cc 100644 --- a/mlinsights/mlmodel/anmf_predictor.py +++ b/mlinsights/mlmodel/anmf_predictor.py @@ -57,7 +57,7 @@ def _get_param_names(cls): of the estimator. """ res = NMF._get_param_names() - res = res + ["force_positive"] + res = [*res, "force_positive"] return res def get_params(self, deep=True): diff --git a/mlinsights/mlmodel/categories_to_integers.py b/mlinsights/mlmodel/categories_to_integers.py index 37c325f5..cf1597b8 100644 --- a/mlinsights/mlmodel/categories_to_integers.py +++ b/mlinsights/mlmodel/categories_to_integers.py @@ -86,9 +86,8 @@ def fit(self, X, y=None, **fit_params): raise ValueError( f"Too many categories ({nb}) for one column '{c}' max_cat={max_cat}" ) - self._categories[c] = dict( - (c, i) for i, c in enumerate(list(sorted(distinct))) - ) + self._categories[c] = dict(enumerate(list(sorted(distinct)))) + self._schema = self._build_schema() return self diff --git a/mlinsights/mlmodel/extended_features.py b/mlinsights/mlmodel/extended_features.py index a059e99c..bbff2988 100644 --- a/mlinsights/mlmodel/extended_features.py +++ b/mlinsights/mlmodel/extended_features.py @@ -72,7 +72,7 @@ def _get_feature_names_poly(self, input_features=None): the polynomial features. """ if input_features is None: - input_features = ["x%d" % i for i in range(0, self.n_input_features_)] + input_features = ["x%d" % i for i in range(self.n_input_features_)] elif len(input_features) != self.n_input_features_: raise ValueError( f"input_features should contain {self.n_input_features_} strings." @@ -81,7 +81,7 @@ def _get_feature_names_poly(self, input_features=None): names = ["1"] if self.poly_include_bias else [] n = self.n_input_features_ interaction_only = self.poly_interaction_only - for d in range(0, self.poly_degree): + for d in range(self.poly_degree): if d == 0: pos = len(names) names.extend(input_features) @@ -90,7 +90,7 @@ def _get_feature_names_poly(self, input_features=None): else: new_index = [] end = index[-1] - for i in range(0, n): + for i in range(n): a = index[i] new_index.append(len(names)) start = a + (index[i + 1] - index[i] if interaction_only else 0) @@ -195,6 +195,6 @@ def _transform_poly_slow(self, X): XP = numpy.empty( (X.shape[0], self.n_output_features_), dtype=X.dtype, order=order ) - for i, comb in enumerate(comb): - XP[:, i] = X[:, comb].prod(1) + for i, cc in enumerate(comb): + XP[:, i] = X[:, cc].prod(1) return XP diff --git a/mlinsights/mlmodel/interval_regressor.py b/mlinsights/mlmodel/interval_regressor.py index 5b8b2940..48c44adc 100644 --- a/mlinsights/mlmodel/interval_regressor.py +++ b/mlinsights/mlmodel/interval_regressor.py @@ -3,7 +3,7 @@ from sklearn.base import RegressorMixin, clone, BaseEstimator from sklearn.utils._joblib import Parallel, delayed -try: +try: # noqa: SIM105 from tqdm import tqdm except ImportError: pass diff --git a/mlinsights/mlmodel/kmeans_constraint.py b/mlinsights/mlmodel/kmeans_constraint.py index d8ac24df..e3c11a4d 100644 --- a/mlinsights/mlmodel/kmeans_constraint.py +++ b/mlinsights/mlmodel/kmeans_constraint.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import numpy from scipy.spatial import Delaunay from sklearn.cluster import KMeans diff --git a/mlinsights/mlmodel/ml_featurizer.py b/mlinsights/mlmodel/ml_featurizer.py index 68a2ef44..40fd12fa 100644 --- a/mlinsights/mlmodel/ml_featurizer.py +++ b/mlinsights/mlmodel/ml_featurizer.py @@ -9,8 +9,6 @@ class FeaturizerTypeError(TypeError): Unable to process a type. """ - pass - def model_featurizer(model, **params): """ diff --git a/mlinsights/mlmodel/piecewise_estimator.py b/mlinsights/mlmodel/piecewise_estimator.py index a1d6641d..184e7d91 100644 --- a/mlinsights/mlmodel/piecewise_estimator.py +++ b/mlinsights/mlmodel/piecewise_estimator.py @@ -7,7 +7,7 @@ from sklearn.preprocessing import KBinsDiscretizer from sklearn.utils._joblib import Parallel, delayed -try: +try: # noqa: SIM105 from tqdm import tqdm except ImportError: pass diff --git a/mlinsights/mlmodel/piecewise_tree_regression.py b/mlinsights/mlmodel/piecewise_tree_regression.py index bd274417..f2aaf6cc 100644 --- a/mlinsights/mlmodel/piecewise_tree_regression.py +++ b/mlinsights/mlmodel/piecewise_tree_regression.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import numpy from sklearn.tree import DecisionTreeRegressor @@ -190,7 +189,7 @@ def _predict_reglin(self, X, check_input=True): leaves = self.predict_leaves(X) pred = numpy.ones((X.shape[0], 1)) Xone = numpy.hstack([X, pred]) - for i in range(0, X.shape[0]): + for i in range(X.shape[0]): li = leaves[i] pred[i] = numpy.dot(Xone[i, :], self.betas_[li, :]) return pred.ravel() diff --git a/mlinsights/mlmodel/quantile_mlpregressor.py b/mlinsights/mlmodel/quantile_mlpregressor.py index fbffa92f..082e320d 100644 --- a/mlinsights/mlmodel/quantile_mlpregressor.py +++ b/mlinsights/mlmodel/quantile_mlpregressor.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import inspect import numpy as np from sklearn.base import RegressorMixin @@ -388,7 +387,7 @@ def __init__( """ See :epkg:`sklearn:neural_networks:MLPRegressor` """ - sup = super(QuantileMLPRegressor, self) + sup = super(QuantileMLPRegressor, self) # noqa: UP008 if "max_fun" not in kwargs: sig = inspect.signature(sup.__init__) if "max_fun" in sig.parameters: diff --git a/mlinsights/mlmodel/quantile_regression.py b/mlinsights/mlmodel/quantile_regression.py index 0a4c8909..ec5ceb52 100644 --- a/mlinsights/mlmodel/quantile_regression.py +++ b/mlinsights/mlmodel/quantile_regression.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import numpy from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_absolute_error @@ -142,7 +141,7 @@ def compute_z(Xm, beta, Y, W, delta=0.0001): W = numpy.ones(X.shape[0]) if sample_weight is None else sample_weight self.n_iter_ = 0 lastE = None - for i in range(0, self.max_iter): + for i in range(self.max_iter): clr.fit(Xm, y, W) beta = clr.coef_ W, epsilon = compute_z(Xm, beta, y, W, delta=self.delta) diff --git a/mlinsights/mlmodel/sklearn_testing.py b/mlinsights/mlmodel/sklearn_testing.py index 94323457..ac91d1ca 100644 --- a/mlinsights/mlmodel/sklearn_testing.py +++ b/mlinsights/mlmodel/sklearn_testing.py @@ -129,10 +129,10 @@ def run_test_sklearn_clone(fct_model, ext=None, copy_fitted=False): else: try: ext.assertEqual(p1[k], p2[k]) - except AssertionError: + except AssertionError as e: raise AssertionError( f"Difference for key '{k}'\n==1 {p1[k]}\n==2 {p2[k]}" - ) + ) from e return conv, cloned @@ -159,13 +159,13 @@ def _assert_dict_equal(a, b, ext): else: if a[key] != b[key]: rows.append( - "** Value != for key '{0}': != id({1}) != id({2})\n==1 " + "** Value != for key '{0}': != id({1}) != id({2})\n==1 " # noqa: UP030 "{3}\n==2 {4}".format(key, id(a[key]), id(b[key]), a[key], b[key]) ) for key in sorted(a): if key not in b: rows.append(f"** Removed key '{key}' in a") - assert not rows, "Dictionaries are different\n{0}".format("\n".join(rows)) + assert not rows, "Dictionaries are different\n{}".format("\n".join(rows)) def _assert_tuple_equal(t1, t2, ext): @@ -310,7 +310,7 @@ def adjust(obj1, obj2): adjust(est, cloned) res = cloned elif isinstance(est, list): - res = list(clone_with_fitted_parameters(o) for o in est) + res = [clone_with_fitted_parameters(o) for o in est] elif isinstance(est, tuple): res = tuple(clone_with_fitted_parameters(o) for o in est) elif isinstance(est, dict): diff --git a/mlinsights/mlmodel/sklearn_transform_inv_fct.py b/mlinsights/mlmodel/sklearn_transform_inv_fct.py index 9aa264d5..b964687a 100644 --- a/mlinsights/mlmodel/sklearn_transform_inv_fct.py +++ b/mlinsights/mlmodel/sklearn_transform_inv_fct.py @@ -201,7 +201,7 @@ def transform(self, X, y): cl = [(v, k) for k, v in self.permutation_.items()] cl.sort() new_perm = {} - for cl, current in cl: + for _cl, current in cl: new_perm[current] = len(new_perm) yp = y.copy() for i in range(y.shape[1]): diff --git a/mlinsights/mlmodel/target_predictors.py b/mlinsights/mlmodel/target_predictors.py index 481a83a4..d303b2ce 100644 --- a/mlinsights/mlmodel/target_predictors.py +++ b/mlinsights/mlmodel/target_predictors.py @@ -224,9 +224,9 @@ def _apply(self, X, method): Predicted values. """ self._check_is_fitted() - assert hasattr(self.classifier_, method), ( - f"Unable to find method {method!r} in model " f"{type(self.classifier_)}." - ) + assert hasattr( + self.classifier_, method + ), f"Unable to find method {method!r} in model {type(self.classifier_)}." meth = getattr(self.classifier_, method) X_trans, _ = self.transformer_.transform(X, None) pred = meth(X_trans) diff --git a/mlinsights/mltree/tree_structure.py b/mlinsights/mltree/tree_structure.py index bd085cff..fff29bd9 100644 --- a/mlinsights/mltree/tree_structure.py +++ b/mlinsights/mltree/tree_structure.py @@ -223,7 +223,7 @@ def tree_leave_neighbors(model): features_keys = features.keys() for fe in features_keys: features[fe] = list(sorted(set(features[fe]))) - for fe, v in features.items(): + for _fe, v in features.items(): if len(v) == 1: d = abs(v[0]) / 10 if d == v[0]: diff --git a/mlinsights/plotting/visualize.py b/mlinsights/plotting/visualize.py index 43b1e4f4..3ab455e2 100644 --- a/mlinsights/plotting/visualize.py +++ b/mlinsights/plotting/visualize.py @@ -50,7 +50,7 @@ def _get_name_simple(name, data): infos = [] outputs = [] for _, model, vs in pipe.transformers: - if all(map(lambda o: isinstance(o, int), vs)): + if all(isinstance(o, int) for o in vs): new_data = [] if isinstance(data, OrderedDict): new_data = [_[1] for _ in data.items()] @@ -265,22 +265,24 @@ def pipeline2dot(pipe, data, **params): for c, col in enumerate(schema): columns[col] = f"sch0:f{c}" labs.append(f" {col}") - node = ' sch0[label="{0}",shape=record,fontsize={1}];'.format( - "|".join(labs), params.get("fontsize", fontsize) + node = ( + ' sch0[label="{0}",shape=record,fontsize={1}];'.format( # noqa: UP030 + "|".join(labs), params.get("fontsize", fontsize) + ) ) exp.append(node) else: exp.append("") if line["type"] == "transform": node = ( - ' node{0}[label="{1}",shape=box,style="filled' + ' node{0}[label="{1}",shape=box,style="filled' # noqa: UP030 ',rounded",color=cyan,fontsize={2}];'.format( i, line["name"], int(params.get("fontsize", fontsize) * 1.5) ) ) else: node = ( - ' node{0}[label="{1}",shape=box,style="filled,' + ' node{0}[label="{1}",shape=box,style="filled,' # noqa: UP030 'rounded",color=yellow,fontsize={2}];'.format( i, line["name"], int(params.get("fontsize", fontsize) * 1.5) ) @@ -301,7 +303,7 @@ def pipeline2dot(pipe, data, **params): for c, out in enumerate(line["outputs"]): columns[out] = f"sch{i}:f{c}" labs.append(f" {out}") - node = ' sch{0}[label="{1}",shape=record,fontsize={2}];'.format( + node = ' sch{0}[label="{1}",shape=record,fontsize={2}];'.format( # noqa: UP030 i, "|".join(labs), params.get("fontsize", fontsize) ) exp.append(node) diff --git a/mlinsights/sklapi/__init__.py b/mlinsights/sklapi/__init__.py index 896c78d2..f426ea3e 100644 --- a/mlinsights/sklapi/__init__.py +++ b/mlinsights/sklapi/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from .sklearn_base_classifier import SkBaseClassifier from .sklearn_base_learner import SkBaseLearner from .sklearn_base_regressor import SkBaseRegressor diff --git a/mlinsights/sklapi/sklearn_base.py b/mlinsights/sklapi/sklearn_base.py index 6d0a926f..e3584eba 100644 --- a/mlinsights/sklapi/sklearn_base.py +++ b/mlinsights/sklapi/sklearn_base.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from typing import Any, Dict import textwrap import warnings @@ -106,7 +105,8 @@ def compare_params( if exc and v1 is not v2: warnings.warn( f"v2 is a clone of v1 not v1 itself for key " - f"{k!r} and class {type(v1)}." + f"{k!r} and class {type(v1)}.", + stacklevel=0, ) elif isinstance(v1, list) and isinstance(v2, list) and len(v1) == len(v2): b = True diff --git a/mlinsights/sklapi/sklearn_base_classifier.py b/mlinsights/sklapi/sklearn_base_classifier.py index cba630b9..ccd8f052 100644 --- a/mlinsights/sklapi/sklearn_base_classifier.py +++ b/mlinsights/sklapi/sklearn_base_classifier.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from sklearn.metrics import accuracy_score from .sklearn_base_learner import SkBaseLearner diff --git a/mlinsights/sklapi/sklearn_base_learner.py b/mlinsights/sklapi/sklearn_base_learner.py index d0dd604b..b6c4818c 100644 --- a/mlinsights/sklapi/sklearn_base_learner.py +++ b/mlinsights/sklapi/sklearn_base_learner.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from .sklearn_base import SkBase diff --git a/mlinsights/sklapi/sklearn_base_regressor.py b/mlinsights/sklapi/sklearn_base_regressor.py index 7acf03cf..7ecff446 100644 --- a/mlinsights/sklapi/sklearn_base_regressor.py +++ b/mlinsights/sklapi/sklearn_base_regressor.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from sklearn.metrics import r2_score from .sklearn_base_learner import SkBaseLearner diff --git a/mlinsights/sklapi/sklearn_base_transform.py b/mlinsights/sklapi/sklearn_base_transform.py index 73daaa90..0c2ad82d 100644 --- a/mlinsights/sklapi/sklearn_base_transform.py +++ b/mlinsights/sklapi/sklearn_base_transform.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from .sklearn_base import SkBase diff --git a/mlinsights/sklapi/sklearn_base_transform_learner.py b/mlinsights/sklapi/sklearn_base_transform_learner.py index c08f591a..9fe9dc0b 100644 --- a/mlinsights/sklapi/sklearn_base_transform_learner.py +++ b/mlinsights/sklapi/sklearn_base_transform_learner.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import textwrap import numpy from .sklearn_base_transform import SkBaseTransform diff --git a/mlinsights/sklapi/sklearn_base_transform_stacking.py b/mlinsights/sklapi/sklearn_base_transform_stacking.py index 388fda94..664987b7 100644 --- a/mlinsights/sklapi/sklearn_base_transform_stacking.py +++ b/mlinsights/sklapi/sklearn_base_transform_stacking.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import textwrap import numpy from .sklearn_base_transform import SkBaseTransform @@ -94,9 +93,7 @@ def convert2transform(c, new_learners): return res new_learners = [] - res = list( - map(lambda c: convert2transform(c, new_learners), zip(models, method)) - ) + res = [convert2transform(c, new_learners) for c in zip(models, method)] if not new_learners: # We need to do that to avoid creating new objects # when it is not necessary. This behavior is not @@ -164,7 +161,7 @@ def set_params(self, **values): if "method" in values: self.method = values["method"] del values["method"] - for k, v in values.items(): + for k, _v in values.items(): if not k.startswith("models_"): raise ValueError(f"Parameter '{k}' must start with 'models_'.") d = len("models_") @@ -186,7 +183,7 @@ def __repr__(self): usual """ rps = repr(self.P) - res = "{0}([{1}], [{2}], {3})".format( + res = "{0}([{1}], [{2}], {3})".format( # noqa: UP030 self.__class__.__name__, ", ".join(repr(m.model if hasattr(m, "model") else m) for m in self.models), ", ".join( diff --git a/mlinsights/sklapi/sklearn_parameters.py b/mlinsights/sklapi/sklearn_parameters.py index f77bdbeb..d2ea373e 100644 --- a/mlinsights/sklapi/sklearn_parameters.py +++ b/mlinsights/sklapi/sklearn_parameters.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import textwrap @@ -7,8 +6,6 @@ class SkException(Exception): custom exception """ - pass - class SkLearnParameters: """ diff --git a/mlinsights/timeseries/agg.py b/mlinsights/timeseries/agg.py index 17f628ba..be837473 100644 --- a/mlinsights/timeseries/agg.py +++ b/mlinsights/timeseries/agg.py @@ -49,23 +49,21 @@ def round_(serie, freq, per): if per == "week": pyres = res.to_pydatetime() return pandas.to_timedelta( - map( - lambda t: datetime.timedelta( - days=t.weekday(), hours=t.hour, minutes=t.minute - ), - pyres, - ) + [ + datetime.timedelta(days=t.weekday(), hours=t.hour, minutes=t.minute) + for t in pyres + ] ) + if per == "month": pyres = res.to_pydatetime() return pandas.to_timedelta( - map( - lambda t: datetime.timedelta( - days=t.day, hours=t.hour, minutes=t.minute - ), - pyres, - ) + [ + datetime.timedelta(days=t.day, hours=t.hour, minutes=t.minute) + for t in pyres + ] ) + raise ValueError(f"Unknown frequency '{per}'.") agg_name = _get_column_name(df) @@ -78,14 +76,14 @@ def round_(serie, freq, per): if not isinstance(values, list): values = [values] if agg == "sum": - gr = df[[agg_name] + values].groupby(agg_name, as_index=False).sum() + gr = df[[agg_name, *values]].groupby(agg_name, as_index=False).sum() agg_name = _get_column_name(gr, "week" + index) - gr.columns = [agg_name] + list(gr.columns[1:]) + gr.columns = [agg_name, *gr.columns[1:]] elif agg == "norm": - gr = df[[agg_name] + values].groupby(agg_name, as_index=False).sum() + gr = df[[agg_name, *values]].groupby(agg_name, as_index=False).sum() agg_name = _get_column_name(gr, "week" + index) agg_cols = list(gr.columns[1:]) - gr.columns = [agg_name] + agg_cols + gr.columns = [agg_name, *agg_cols] for c in agg_cols: su = gr[c].sum() if su != 0: diff --git a/mlinsights/timeseries/dummies.py b/mlinsights/timeseries/dummies.py index cb974b33..60b69497 100644 --- a/mlinsights/timeseries/dummies.py +++ b/mlinsights/timeseries/dummies.py @@ -69,6 +69,6 @@ def predict(self, X, y): pred = numpy.empty((nbrow, nb), dtype=X.dtype) first = nbrow - X.shape[0] pred[:first] = numpy.nan - for i in range(0, nb): + for i in range(nb): pred[first:, i] = X[:, -1] return pred diff --git a/mlinsights/timeseries/patterns.py b/mlinsights/timeseries/patterns.py index d92cd0ab..ceb1910b 100644 --- a/mlinsights/timeseries/patterns.py +++ b/mlinsights/timeseries/patterns.py @@ -58,7 +58,7 @@ def find_ts_group_pattern( all_merged.fillna(0, inplace=True) ncol = all_merged.shape[1] // len(gr_names) gr_feats = [] - for i, name in enumerate(gr_names): + for i, _name in enumerate(gr_names): feats = all_merged.iloc[:, i * ncol : (i + 1) * ncol].values.ravel() gr_feats.append(feats) diff --git a/mlinsights/timeseries/plotting.py b/mlinsights/timeseries/plotting.py index b22e20ee..0b305ea5 100644 --- a/mlinsights/timeseries/plotting.py +++ b/mlinsights/timeseries/plotting.py @@ -154,7 +154,7 @@ def coor(ti): xs = ax.get_xticks() xticks = [] xlabels = [] - for i in range(0, len(xs) - 1): + for i in range(len(xs) - 1): if xs[i] < 0: continue dx = xs[i] - int(xs[i] / input_maxy) * input_maxy diff --git a/mlinsights/timeseries/preprocessing.py b/mlinsights/timeseries/preprocessing.py index fbec369f..39a97abe 100644 --- a/mlinsights/timeseries/preprocessing.py +++ b/mlinsights/timeseries/preprocessing.py @@ -58,9 +58,9 @@ class TimeSeriesDifferenceInv(BaseReciprocalTimeSeriesTransformer): def __init__(self, estimator): BaseReciprocalTimeSeriesTransformer.__init__(self, estimator.context_length) - assert isinstance(estimator, TimeSeriesDifference), ( - f"estimator must be of type TimeSeriesDifference not " f"{type(estimator)}." - ) + assert isinstance( + estimator, TimeSeriesDifference + ), f"estimator must be of type TimeSeriesDifference not {type(estimator)}." self.estimator = estimator def fit(self, X=None, y=None, sample_weight=None): diff --git a/mlinsights/timeseries/utils.py b/mlinsights/timeseries/utils.py index 70f056ba..5f73190e 100644 --- a/mlinsights/timeseries/utils.py +++ b/mlinsights/timeseries/utils.py @@ -68,11 +68,11 @@ def build_ts_X_y(model, X, y, weights=None, same_rows=False): ) first = y.shape[0] - nrow if X is not None: - for i in range(0, model.past): + for i in range(model.past): begin = i * ncol end = begin + ncol new_X[i:, begin:end] = X[i:] - for i in range(0, model.past): + for i in range(model.past): end = y.shape[0] + i + model.delay1 - 1 - model.delay2 new_X[first - i : first - i + end - i, i + ncol * model.past] = y[i:end] @@ -113,11 +113,11 @@ def build_ts_X_y(model, X, y, weights=None, same_rows=False): new_X = numpy.empty((nrow, ncol * model.past + model.past), dtype=y.dtype) if X is not None: - for i in range(0, model.past): + for i in range(model.past): begin = i * ncol end = begin + ncol new_X[:, begin:end] = X[i : i + nrow] - for i in range(0, model.past): + for i in range(model.past): end = y.shape[0] + i + model.delay1 - 1 - model.delay2 new_X[:, i + ncol * model.past] = y[i:end] diff --git a/pyproject.toml b/pyproject.toml index f79888ec..ee2f8f07 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -146,10 +146,37 @@ disable_error_code = ["override", "index"] exclude = [".eggs", ".git", "build", "dist"] line-length = 88 -[tool.ruff.lint.mccabe] -max-complexity = 10 +[tool.ruff.lint] +select = [ + "B", # flake8-bugbear + "C4", # flake8-comprehensions + #"D", # pydocstyle + "E", # pycodestyle + "F", # Pyflakes + "G", # flake8-logging-format + #"I", # isort + "ISC", # flake8-implicit-str-concat + "LOG", # flake8-logging + #"N", # pep8-naming + #"NPY", # modern numpy + #"PERF", # Perflint + "PIE", # flake8-pie + "PYI", # flake8-pyi + "RUF", # Ruff-specific rules + "SIM", # flake8-simplify + "SLOT", # flake8-slot + "T10", # flake8-debugger + #"TID", # Disallow relative imports + #"TRY", # flake8-try-except-raise + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] [tool.ruff.lint.per-file-ignores] +"**" = ["B905", "C401", "C408", "C413", "RUF012", "RUF100", "RUF010", "SIM108", "SIM910", "SIM110", "SIM102", "SIM114", "SIM103", "UP015", "UP027", "UP031", "UP034", "UP032", "UP006", "UP035", "UP007", "UP038"] +"**/plot*.py" = ["B018"] +"_unittests/**.py" = ["B904", "RUF015", "C400"] "_unittests/ut_plotting/test_dot.py" = ["E501"] "mlinsights/mlbatch/__init__.py" = ["F401"] "mlinsights/metrics/__init__.py" = ["F401"] diff --git a/setup.py b/setup.py index 8727a80b..6bb630d7 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import distutils import os import platform @@ -250,7 +249,7 @@ class cmake_build_class_extension(Command): ( "manylinux=", None, - "Enforces the compilation with manylinux, " "default is set to 0.", + "Enforces the compilation with manylinux, default is set to 0.", ), ] @@ -435,7 +434,8 @@ def build_cmake(self, cfg: str, cmake_args: List[str]) -> Tuple[str, str]: # Builds the project. build_path = os.path.abspath(build_temp) build_lib = getattr(self, "build_lib", build_path) - cmake_args = cmake_args + [ + cmake_args = [ + *cmake_args, f"-DSETUP_BUILD_PATH={os.path.abspath(build_path)}", f"-DSETUP_BUILD_LIB={os.path.abspath(build_lib)}", ] @@ -518,8 +518,8 @@ def run_cmake(self): # Ensure that CMake is present and working try: subprocess.check_output(["cmake", "--version"]) - except OSError: - raise RuntimeError("Cannot find CMake executable") + except OSError as e: + raise RuntimeError("Cannot find CMake executable") from e cfg = "Release" cmake_args = self.get_cmake_args(cfg) From 69e9ef52572fcda47e0fbeb134d5232b61bbabf8 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sat, 7 Sep 2024 17:38:02 +0200 Subject: [PATCH 2/7] fix one unit test --- mlinsights/mlmodel/categories_to_integers.py | 11 +++++++---- pyproject.toml | 10 +++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/mlinsights/mlmodel/categories_to_integers.py b/mlinsights/mlmodel/categories_to_integers.py index cf1597b8..6055cd46 100644 --- a/mlinsights/mlmodel/categories_to_integers.py +++ b/mlinsights/mlmodel/categories_to_integers.py @@ -145,9 +145,10 @@ def transform(v, vec): if len(lv) > 20: lv = lv[:20] lv.append("...") + m = "\n".join(map(str, lv)) raise ValueError( - "Unable to find category value %r type(v)=%r " - "among\n%s" % (v, type(v), "\n".join(lv)) + f"Unable to find category value {v} " + f"type(v)={type(v)} among\n{m}" ) return numpy.nan @@ -178,10 +179,12 @@ def transform(v, vec): if len(lv) > 20: lv = lv[:20] lv.append("...") + m = "\n".join(map(str, lv)) raise ValueError( - "Unable to find category value %r: %r " - "type(v)=%r among\n%s" % (k, v, type(v), "\n".join(lv)) + f"Unable to find category value {k}: {v} " + f"type(v)={type(v)} among\n{m}" ) + p = pos[k] else: p = pos[k] + vec[k][v] res[i, p] = 1.0 diff --git a/pyproject.toml b/pyproject.toml index ee2f8f07..f86a67eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,14 +35,12 @@ changelog = "https://sdpython.github.io/doc/mlinsights/dev/CHANGELOGS.html" [project.optional-dependencies] dev = [ - "autopep8", "black", "clang-format", "cmakelang", "coverage", "cython", "cython-lint", - "flake8", "furo", "isort", "joblib", @@ -174,7 +172,13 @@ select = [ ] [tool.ruff.lint.per-file-ignores] -"**" = ["B905", "C401", "C408", "C413", "RUF012", "RUF100", "RUF010", "SIM108", "SIM910", "SIM110", "SIM102", "SIM114", "SIM103", "UP015", "UP027", "UP031", "UP034", "UP032", "UP006", "UP035", "UP007", "UP038"] +"**" = [ + "B905", + "C401", "C408", "C413", + "RUF012", "RUF100", "RUF010", + "SIM108", "SIM910", "SIM110", "SIM102", "SIM114", "SIM103", "UP015", + "UP027", "UP031", "UP034", "UP032", "UP006", "UP035", "UP007", "UP038" +] "**/plot*.py" = ["B018"] "_unittests/**.py" = ["B904", "RUF015", "C400"] "_unittests/ut_plotting/test_dot.py" = ["E501"] From 169e9dfe198f3278ce79fcec0db7a8e46ef6d362 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sat, 7 Sep 2024 18:58:22 +0200 Subject: [PATCH 3/7] fix wrong change --- mlinsights/helpers/pipeline.py | 2 +- mlinsights/mlbatch/cache_model.py | 2 +- mlinsights/mlmodel/categories_to_integers.py | 13 +++++++------ mlinsights/mlmodel/classification_kmeans.py | 2 +- mlinsights/mlmodel/kmeans_l1.py | 2 +- mlinsights/mlmodel/sklearn_testing.py | 4 ++-- mlinsights/sklapi/sklearn_base.py | 6 +++--- mlinsights/sklapi/sklearn_base_transform_learner.py | 2 +- .../sklapi/sklearn_base_transform_stacking.py | 2 +- 9 files changed, 18 insertions(+), 17 deletions(-) diff --git a/mlinsights/helpers/pipeline.py b/mlinsights/helpers/pipeline.py index a4caec80..ec4d8333 100644 --- a/mlinsights/helpers/pipeline.py +++ b/mlinsights/helpers/pipeline.py @@ -120,7 +120,7 @@ def to_str(self, nrows=5): """ rows = [f"BaseEstimatorDebugInformation({self.model.__class__.__name__})"] for k in sorted(self.inputs): - assert k in self.outputs, f"Unable to find output for method '{k}'." + assert k in self.outputs, f"Unable to find output for method {k!r}." rows.append(" " + k + "(") self.display(self.inputs[k], nrows) rows.append(textwrap.indent(self.display(self.inputs[k], nrows), " ")) diff --git a/mlinsights/mlbatch/cache_model.py b/mlinsights/mlbatch/cache_model.py index f7d4cde2..98387ca2 100644 --- a/mlinsights/mlbatch/cache_model.py +++ b/mlinsights/mlbatch/cache_model.py @@ -80,7 +80,7 @@ def as_key(params): elif v is None: sv = "" else: - raise TypeError(f"Unable to create a key with value '{k}':{v}") + raise TypeError(f"Unable to create a key with value {k!r}:{v!r}") els.append((k, sv)) return str(els) diff --git a/mlinsights/mlmodel/categories_to_integers.py b/mlinsights/mlmodel/categories_to_integers.py index 6055cd46..7dabf299 100644 --- a/mlinsights/mlmodel/categories_to_integers.py +++ b/mlinsights/mlmodel/categories_to_integers.py @@ -70,10 +70,11 @@ def fit(self, X, y=None, **fit_params): """ if not isinstance(X, pandas.DataFrame): raise TypeError(f"this transformer only accept Dataframes, not {type(X)}") - if self.columns: - columns = self.columns - else: - columns = [c for c, d in zip(X.columns, X.dtypes) if d in (object,)] + columns = ( + self.columns + if self.columns + else [c for c, d in zip(X.columns, X.dtypes) if d in (object, str)] + ) self._fit_columns = columns max_cat = max(len(X) // 2 + 1, 10000) @@ -86,7 +87,7 @@ def fit(self, X, y=None, **fit_params): raise ValueError( f"Too many categories ({nb}) for one column '{c}' max_cat={max_cat}" ) - self._categories[c] = dict(enumerate(list(sorted(distinct)))) + self._categories[c] = {c: i for i, c in enumerate(list(sorted(distinct)))} self._schema = self._build_schema() return self @@ -181,7 +182,7 @@ def transform(v, vec): lv.append("...") m = "\n".join(map(str, lv)) raise ValueError( - f"Unable to find category value {k}: {v} " + f"Unable to find category value {k!r}: {v!r} " f"type(v)={type(v)} among\n{m}" ) p = pos[k] diff --git a/mlinsights/mlmodel/classification_kmeans.py b/mlinsights/mlmodel/classification_kmeans.py index 3ff238ef..de3e5568 100644 --- a/mlinsights/mlmodel/classification_kmeans.py +++ b/mlinsights/mlmodel/classification_kmeans.py @@ -147,7 +147,7 @@ def set_params(self, **values): elif k.startswith("c_"): pc[k[2:]] = v else: - raise ValueError(f"Unexpected parameter name '{k}'") + raise ValueError(f"Unexpected parameter name {k!r}") self.clus.set_params(**pc) self.estimator.set_params(**pe) diff --git a/mlinsights/mlmodel/kmeans_l1.py b/mlinsights/mlmodel/kmeans_l1.py index 9e50907a..8ac28d29 100644 --- a/mlinsights/mlmodel/kmeans_l1.py +++ b/mlinsights/mlmodel/kmeans_l1.py @@ -173,7 +173,7 @@ def _validate_center_shape(X, k, centers): """Check if centers is compatible with X and n_clusters""" assert centers.shape[0] == k, ( f"The shape of the initial centers {centers.shape} does not " - f"match the number of clusters {k}." + f"match the number of clusters {k!r}." ) assert centers.shape[1] == X.shape[1], ( f"The shape of the initial centers {centers.shape} does not " diff --git a/mlinsights/mlmodel/sklearn_testing.py b/mlinsights/mlmodel/sklearn_testing.py index ac91d1ca..0e8f0ac2 100644 --- a/mlinsights/mlmodel/sklearn_testing.py +++ b/mlinsights/mlmodel/sklearn_testing.py @@ -131,7 +131,7 @@ def run_test_sklearn_clone(fct_model, ext=None, copy_fitted=False): ext.assertEqual(p1[k], p2[k]) except AssertionError as e: raise AssertionError( - f"Difference for key '{k}'\n==1 {p1[k]}\n==2 {p2[k]}" + f"Difference for key {k!r}\n==1 {p1[k]}\n==2 {p2[k]}" ) from e return conv, cloned @@ -303,7 +303,7 @@ def adjust(obj1, obj2): v1 = getattr(obj1, k) setattr(obj2, k, clone_with_fitted_parameters(v1)) else: - raise RuntimeError(f"Cloned object is missing '{k}' in {obj2}.") + raise RuntimeError(f"Cloned object is missing {k!r} in {obj2}.") if isinstance(est, BaseEstimator): cloned = clone(est) diff --git a/mlinsights/sklapi/sklearn_base.py b/mlinsights/sklapi/sklearn_base.py index e3584eba..318b0e76 100644 --- a/mlinsights/sklapi/sklearn_base.py +++ b/mlinsights/sklapi/sklearn_base.py @@ -91,12 +91,12 @@ def compare_params( for k in p1: if k not in p2: if exc: - raise KeyError(f"Key '{k}' was removed.") + raise KeyError(f"Key {k!r} was removed.") return False for k in p2: if k not in p1: if exc: - raise KeyError(f"Key '{k}' was added.") + raise KeyError(f"Key {k!r} was added.") return False for k in sorted(p1): v1, v2 = p1[k], p2[k] @@ -133,7 +133,7 @@ def compare_params( if not b: if exc: raise ValueError( - f"Values for key '{k}' are different.\n---\n{v1}\n---\n{v2}" + f"Values for key {k!r} are different.\n---\n{v1}\n---\n{v2}" ) return False return True diff --git a/mlinsights/sklapi/sklearn_base_transform_learner.py b/mlinsights/sklapi/sklearn_base_transform_learner.py index 9fe9dc0b..f5570743 100644 --- a/mlinsights/sklapi/sklearn_base_transform_learner.py +++ b/mlinsights/sklapi/sklearn_base_transform_learner.py @@ -166,7 +166,7 @@ def set_params(self, **values): del values["method"] for k in values: if not k.startswith("model__"): - raise ValueError(f"Parameter '{k}' must start with 'model__'.") + raise ValueError(f"Parameter {k!r} must start with 'model__'.") d = len("model__") pars = {k[d:]: v for k, v in values.items()} self.model.set_params(**pars) diff --git a/mlinsights/sklapi/sklearn_base_transform_stacking.py b/mlinsights/sklapi/sklearn_base_transform_stacking.py index 664987b7..c3f8aa72 100644 --- a/mlinsights/sklapi/sklearn_base_transform_stacking.py +++ b/mlinsights/sklapi/sklearn_base_transform_stacking.py @@ -163,7 +163,7 @@ def set_params(self, **values): del values["method"] for k, _v in values.items(): if not k.startswith("models_"): - raise ValueError(f"Parameter '{k}' must start with 'models_'.") + raise ValueError(f"Parameter {k!r} must start with 'models_'.") d = len("models_") pars = [{} for m in self.models] for k, v in values.items(): From 6636d277fccf99c900fefd3754c9bf16fe4d2f2c Mon Sep 17 00:00:00 2001 From: xadupre Date: Sat, 7 Sep 2024 19:07:33 +0200 Subject: [PATCH 4/7] update cp --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f86a67eb..5cdec8bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,7 +106,7 @@ manylinux-x86_64-image = "manylinux2014" [tool.cibuildwheel.linux] archs = ["x86_64"] build = "cp*" -skip = "cp36-* cp37-* cp38-* cp39-* cp312-* pypy* *musllinux*" +skip = "cp36-* cp37-* cp38-* cp39-* cp313-* pypy* *musllinux*" manylinux-x86_64-image = "manylinux2014" before-build = "pip install auditwheel-symbols abi3audit" build-verbosity = 1 @@ -116,12 +116,12 @@ repair-wheel-command = "auditwheel-symbols --manylinux 2014 {wheel} ; abi3audit [tool.cibuildwheel.macos] archs = ["x86_64"] build = "cp*" -skip = "cp36-* cp37-* cp38-* cp39-* cp312-* pypy* pp*" +skip = "cp36-* cp37-* cp38-* cp39-* cp313-* pypy* pp*" [tool.cibuildwheel.windows] archs = ["AMD64"] build = "cp*" -skip = "cp36-* cp37-* cp38-* cp39-* cp312-* pypy*" +skip = "cp36-* cp37-* cp38-* cp39-* cp313-* pypy*" [tool.cython-lint] max-line-length = 88 From bdb302be6435175863704eabaee55566ac643285 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sun, 8 Sep 2024 09:41:23 +0200 Subject: [PATCH 5/7] warning --- .../test_LONG_search_images_torch.py | 9 ++++- mlinsights/ext_test_case.py | 33 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/_unittests/ut_search_rank/test_LONG_search_images_torch.py b/_unittests/ut_search_rank/test_LONG_search_images_torch.py index 8515e652..d37433ab 100644 --- a/_unittests/ut_search_rank/test_LONG_search_images_torch.py +++ b/_unittests/ut_search_rank/test_LONG_search_images_torch.py @@ -6,10 +6,17 @@ from io import StringIO import pandas import numpy -from mlinsights.ext_test_case import ExtTestCase, unzip_files +from mlinsights.ext_test_case import ( + ExtTestCase, + unzip_files, + skipif_ci_apple, + ignore_warnings, +) class TestSearchPredictionsImagesTorch(ExtTestCase): + @skipif_ci_apple("crash") + @ignore_warnings(UserWarning) def test_search_predictions_torch(self): from mlinsights.search_rank import SearchEnginePredictionImages diff --git a/mlinsights/ext_test_case.py b/mlinsights/ext_test_case.py index 909482eb..b5fdf4ba 100644 --- a/mlinsights/ext_test_case.py +++ b/mlinsights/ext_test_case.py @@ -547,3 +547,36 @@ def unzip_files( elif not info.filename.endswith("/"): files.append(tos) return files + + +def is_azure() -> bool: + "Tells if the job is running on Azure DevOps." + return os.environ.get("AZURE_HTTP_USER_AGENT", "undefined") != "undefined" + + +def is_windows() -> bool: + return sys.platform == "win32" + + +def is_apple() -> bool: + return sys.platform == "darwin" + + +def skipif_ci_windows(msg) -> Callable: + """ + Skips a unit test if it runs on :epkg:`azure pipeline` on :epkg:`Windows`. + """ + if is_windows() and is_azure(): + msg = f"Test does not work on azure pipeline (Windows). {msg}" + return unittest.skip(msg) + return lambda x: x + + +def skipif_ci_apple(msg) -> Callable: + """ + Skips a unit test if it runs on :epkg:`azure pipeline` on :epkg:`Windows`. + """ + if is_apple() and is_azure(): + msg = f"Test does not work on azure pipeline (Apple). {msg}" + return unittest.skip(msg) + return lambda x: x From 541708f49284507268fc79d61a6a5d3b7dd55723 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sun, 8 Sep 2024 10:10:01 +0200 Subject: [PATCH 6/7] mac --- CHANGELOGS.rst | 6 +++++- mlinsights/__init__.py | 2 +- pyproject.toml | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst index 4fbdf493..2f982492 100644 --- a/CHANGELOGS.rst +++ b/CHANGELOGS.rst @@ -1,8 +1,12 @@ - =========== Change Logs =========== +0.5.1 +===== + +* :pr:`132` builds against scikit-learn==1.5.0, python 3.12 + 0.5.0 ===== diff --git a/mlinsights/__init__.py b/mlinsights/__init__.py index 425f16b0..ed33ac63 100644 --- a/mlinsights/__init__.py +++ b/mlinsights/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.5.0" +__version__ = "0.5.1" __author__ = "Xavier Dupré" __github__ = "https://github.com/sdpython/mlinsights" __url__ = "https://sdpython.github.io/doc/dev/mlinsights/" diff --git a/pyproject.toml b/pyproject.toml index 5cdec8bf..608a32f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,6 +117,7 @@ repair-wheel-command = "auditwheel-symbols --manylinux 2014 {wheel} ; abi3audit archs = ["x86_64"] build = "cp*" skip = "cp36-* cp37-* cp38-* cp39-* cp313-* pypy* pp*" +before-build = "brew install libomp llvm" [tool.cibuildwheel.windows] archs = ["AMD64"] From 4145ed76faaee4ea9132b28cfb31fa1aba8456e7 Mon Sep 17 00:00:00 2001 From: xadupre Date: Sun, 8 Sep 2024 10:18:33 +0200 Subject: [PATCH 7/7] update mac --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 608a32f9..0ee0257f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,7 @@ repair-wheel-command = "auditwheel-symbols --manylinux 2014 {wheel} ; abi3audit archs = ["x86_64"] build = "cp*" skip = "cp36-* cp37-* cp38-* cp39-* cp313-* pypy* pp*" -before-build = "brew install libomp llvm" +before-build = "brew install libomp llvm&&echo 'export PATH=\"/opt/homebrew/opt/llvm/bin:$PATH\"' >> /Users/runner/.bash_profile" [tool.cibuildwheel.windows] archs = ["AMD64"]