Skip to content

Commit 108f1b6

Browse files
committed
BLD: restrict code_checks.sh to tracked repo files
Previously, some of the checks in code_checks.sh ran unrestricted on all the contents of the repository root (recursively), so that if any files extraneous to the repo were present (e.g. a virtual environment directory), they were checked too, potentially causing many false positives when a developer runs ./ci/code_checks.sh . The checker invocations that were already scoped (i.e. they were already restricted, in one way or another, to the actual pandas code, e.g. by restricting the search to the `pandas` subfolder) have been left as-is, while those that weren't are now given an explicit list of files that are tracked in the repo.
1 parent 3c07fff commit 108f1b6

File tree

1 file changed

+39
-16
lines changed

1 file changed

+39
-16
lines changed

ci/code_checks.sh

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,24 @@ BASE_DIR="$(dirname $0)/.."
2525
RET=0
2626
CHECK=$1
2727

28+
29+
# Get lists of files tracked by git:
30+
31+
function quote_if_needed {
32+
awk '{ print $0 ~ /.*\s+.*/ ? "\""$0"\"" : $0 }'
33+
}
34+
35+
function git_tracked_files {
36+
[[ ! -z "$1" ]] && local patt="\\${1}$" || local patt="$"
37+
local subdir=$2
38+
git ls-tree --name-only -r HEAD $subdir | grep -e $patt | quote_if_needed
39+
}
40+
41+
GIT_TRACKED_ALL=$(git_tracked_files)
42+
GIT_TRACKED_PY_FILES=$(git_tracked_files .py)
43+
GIT_TRACKED_RST_FILES=$(git_tracked_files .rst doc/source)
44+
45+
2846
function invgrep {
2947
# grep with inverse exist status and formatting for azure-pipelines
3048
#
@@ -38,6 +56,8 @@ function invgrep {
3856
return $((! $EXIT_STATUS))
3957
}
4058

59+
export -f invgrep; # needed because of the use of xargs to pass in $GIT_TRACKED_ALL as args
60+
4161
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
4262
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
4363
INVGREP_PREPEND="##[error]"
@@ -52,7 +72,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5272
black --version
5373

5474
MSG='Checking black formatting' ; echo $MSG
55-
black . --check
75+
echo $GIT_TRACKED_PY_FILES | xargs black --check
5676
RET=$(($RET + $?)) ; echo $MSG "DONE"
5777

5878
# `setup.cfg` contains the list of error codes that are being ignored in flake8
@@ -62,7 +82,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
6282

6383
# pandas/_libs/src is C code, so no need to search there.
6484
MSG='Linting .py code' ; echo $MSG
65-
flake8 --format="$FLAKE8_FORMAT" .
85+
echo $GIT_TRACKED_PY_FILES | xargs flake8 --format="$FLAKE8_FORMAT"
6686
RET=$(($RET + $?)) ; echo $MSG "DONE"
6787

6888
MSG='Linting .pyx and .pxd code' ; echo $MSG
@@ -77,7 +97,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
7797
flake8-rst --version
7898

7999
MSG='Linting code-blocks in .rst documentation' ; echo $MSG
80-
flake8-rst doc/source --filename=*.rst --format="$FLAKE8_FORMAT"
100+
echo $GIT_TRACKED_RST_FILES | xargs flake8-rst --format="$FLAKE8_FORMAT"
81101
RET=$(($RET + $?)) ; echo $MSG "DONE"
82102

83103
# Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
@@ -100,35 +120,38 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
100120
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
101121
RET=$(($RET + $?)) ; echo $MSG "DONE"
102122

123+
124+
VALIDATE_CMD=$BASE_DIR/scripts/validate_unwanted_patterns.py
125+
103126
MSG='Check for use of not concatenated strings' ; echo $MSG
104127
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
105-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" --format="##[error]{source_path}:{line_number}:{msg}" .
128+
echo $GIT_TRACKED_PY_FILES | xargs $VALIDATE_CMD --validation-type="strings_to_concatenate" --format="##[error]{source_path}:{line_number}:{msg}" --no-override
106129
else
107-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" .
130+
echo $GIT_TRACKED_PY_FILES | xargs $VALIDATE_CMD --validation-type="strings_to_concatenate" --no-override
108131
fi
109132
RET=$(($RET + $?)) ; echo $MSG "DONE"
110133

111134
MSG='Check for strings with wrong placed spaces' ; echo $MSG
112135
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
113-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" --format="##[error]{source_path}:{line_number}:{msg}" .
136+
echo $GIT_TRACKED_PY_FILES | xargs $VALIDATE_CMD --validation-type="strings_with_wrong_placed_whitespace" --format="##[error]{source_path}:{line_number}:{msg}" --no-override
114137
else
115-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" .
138+
echo $GIT_TRACKED_PY_FILES | xargs $VALIDATE_CMD --validation-type="strings_with_wrong_placed_whitespace" --no-override
116139
fi
117140
RET=$(($RET + $?)) ; echo $MSG "DONE"
118141

119142
MSG='Check for import of private attributes across modules' ; echo $MSG
120143
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
121-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
144+
$VALIDATE_CMD --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
122145
else
123-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
146+
$VALIDATE_CMD --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
124147
fi
125148
RET=$(($RET + $?)) ; echo $MSG "DONE"
126149

127150
MSG='Check for use of private functions across modules' ; echo $MSG
128151
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
129-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
152+
$VALIDATE_CMD --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
130153
else
131-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
154+
$VALIDATE_CMD --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
132155
fi
133156
RET=$(($RET + $?)) ; echo $MSG "DONE"
134157

@@ -137,11 +160,11 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
137160

138161
# Imports - Check formatting using isort see setup.cfg for settings
139162
MSG='Check import format using isort' ; echo $MSG
140-
ISORT_CMD="isort --quiet --check-only pandas asv_bench scripts web"
163+
ISORT_OPTIONS="--quiet --check-only pandas asv_bench scripts web"
141164
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
142-
eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
165+
echo GIT_TRACKED_PY_FILES | xargs isort $ISORT_OPTIONS | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
143166
else
144-
eval $ISORT_CMD
167+
echo GIT_TRACKED_PY_FILES | xargs isort $ISORT_OPTIONS
145168
fi
146169
RET=$(($RET + $?)) ; echo $MSG "DONE"
147170

@@ -239,7 +262,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
239262
RET=$(($RET + $?)) ; echo $MSG "DONE"
240263

241264
MSG='Check for extra blank lines after the class definition' ; echo $MSG
242-
invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' .
265+
invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' pandas
243266
RET=$(($RET + $?)) ; echo $MSG "DONE"
244267

245268
MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
@@ -272,7 +295,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
272295

273296
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
274297
INVGREP_APPEND=" <- trailing whitespaces found"
275-
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
298+
echo $GIT_TRACKED_ALL | xargs bash -c 'invgrep -RI "\s$" "$@"' _
276299
RET=$(($RET + $?)) ; echo $MSG "DONE"
277300
unset INVGREP_APPEND
278301
fi

0 commit comments

Comments
 (0)