diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 54aa830379c07..762c162858824 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -25,6 +25,26 @@ BASE_DIR="$(dirname $0)/.." RET=0 CHECK=$1 + +# Get lists of files tracked by git: + +function quote_if_needed { + awk '{ print $0 ~ /.*\s+.*/ ? "\""$0"\"" : $0 }' +} + +function git_tracked_files { + [[ ! -z "$1" ]] && local patt="\\${1}$" || local patt="$" + local subdir=$2 + git ls-tree --name-only -r HEAD $subdir | grep -e $patt | quote_if_needed +} + +GIT_TRACKED_ALL=$(git_tracked_files) +GIT_TRACKED_ALL_PY_FILES=$(git_tracked_files .py) +GIT_TRACKED_DOCSOURCE_RST_FILES=$(git_tracked_files .rst doc/source) +GIT_TRACKED_REFERENCE_RST_FILES=$(git_tracked_files .rst doc/source/reference) +GIT_TRACKED_DEVELOPMENT_RST_FILES=$(git_tracked_files .rst doc/source/development) + + function invgrep { # grep with inverse exist status and formatting for azure-pipelines # @@ -38,6 +58,8 @@ function invgrep { return $((! $EXIT_STATUS)) } +export -f invgrep; # needed because of the use of xargs to pass in $GIT_TRACKED_ALL as args + if [[ "$GITHUB_ACTIONS" == "true" ]]; then FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s" INVGREP_PREPEND="##[error]" @@ -45,6 +67,20 @@ else FLAKE8_FORMAT="default" fi + +function if_gh_actions { + # If this is running on GitHub Actions, echo the argument list, otherwise + # echo the empty string. + # Used to conditionally pass command-line arguments as in + # $(if_gh_actions --baz spam) | xargs foo --bar + if [[ "$GITHUB_ACTIONS" == "true" ]]; then + for arg in "$@"; do echo $arg; done | quote_if_needed + else + echo "" + fi +} + + ### LINTING ### if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then @@ -52,7 +88,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then black --version MSG='Checking black formatting' ; echo $MSG - black . --check + echo $GIT_TRACKED_ALL_PY_FILES | xargs black --check RET=$(($RET + $?)) ; echo $MSG "DONE" # `setup.cfg` contains the list of error codes that are being ignored in flake8 @@ -62,7 +98,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then # pandas/_libs/src is C code, so no need to search there. MSG='Linting .py code' ; echo $MSG - flake8 --format="$FLAKE8_FORMAT" . + echo $GIT_TRACKED_ALL_PY_FILES | xargs flake8 --format="$FLAKE8_FORMAT" RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Linting .pyx and .pxd code' ; echo $MSG @@ -77,7 +113,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then flake8-rst --version MSG='Linting code-blocks in .rst documentation' ; echo $MSG - flake8-rst doc/source --filename=*.rst --format="$FLAKE8_FORMAT" + echo $GIT_TRACKED_DOCSOURCE_RST_FILES | xargs flake8-rst --format="$FLAKE8_FORMAT" RET=$(($RET + $?)) ; echo $MSG "DONE" # Check that cython casting is of the form `obj` as opposed to ` obj`; @@ -100,36 +136,27 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp RET=$(($RET + $?)) ; echo $MSG "DONE" + + VALIDATE_CMD=$BASE_DIR/scripts/validate_unwanted_patterns.py + MSG='Check for use of not concatenated strings' ; echo $MSG - if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" --format="##[error]{source_path}:{line_number}:{msg}" . - else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate" . - fi + ARGS=$({ if_gh_actions --format="##[error]{source_path}:{line_number}:{msg}"; echo $GIT_TRACKED_ALL_PY_FILES; }) + echo $ARGS | xargs $VALIDATE_CMD --validation-type="strings_to_concatenate" --no-override RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check for strings with wrong placed spaces' ; echo $MSG - if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" --format="##[error]{source_path}:{line_number}:{msg}" . - else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" . - fi + ARGS=$({ if_gh_actions --format="##[error]{source_path}:{line_number}:{msg}"; echo $GIT_TRACKED_ALL_PY_FILES; }) + echo $ARGS | xargs $VALIDATE_CMD --validation-type="strings_with_wrong_placed_whitespace" --no-override RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check for import of private attributes across modules' ; echo $MSG - if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/ - else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/ - fi + ARGS=$(if_gh_actions --format="##[error]{source_path}:{line_number}:{msg}") + echo $ARGS | xargs $VALIDATE_CMD --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/ RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check for use of private functions across modules' ; echo $MSG - if [[ "$GITHUB_ACTIONS" == "true" ]]; then - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/ - else - $BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/ - fi + ARGS=$(if_gh_actions --format="##[error]{source_path}:{line_number}:{msg}") + echo $ARGS | xargs $VALIDATE_CMD --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/ RET=$(($RET + $?)) ; echo $MSG "DONE" echo "isort --version-number" @@ -239,7 +266,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check for extra blank lines after the class definition' ; echo $MSG - invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' . + invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' pandas RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG @@ -272,7 +299,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG INVGREP_APPEND=" <- trailing whitespaces found" - invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" * + echo $GIT_TRACKED_ALL | xargs bash -c 'invgrep -RI "\s$" "$@"' _ RET=$(($RET + $?)) ; echo $MSG "DONE" unset INVGREP_APPEND fi @@ -390,7 +417,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Validate correct capitalization among titles in documentation' ; echo $MSG - $BASE_DIR/scripts/validate_rst_title_capitalization.py $BASE_DIR/doc/source/development $BASE_DIR/doc/source/reference + echo "${GIT_TRACKED_REFERENCE_RST_FILES} ${GIT_TRACKED_DEVELOPMENT_RST_FILES}" | xargs $BASE_DIR/scripts/validate_rst_title_capitalization.py RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c index 5343999c369f7..2af10a5b72d33 100644 --- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -1134,7 +1134,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, } break; - + } } diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index f647098140528..8eb995dee645b 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -312,7 +312,7 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a, * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) * to convert to UTC time. * - * The following implementation just asks for attributes, and thus + * The following implementation just asks for attributes, and thus * supports datetime duck typing. The tzinfo time zone conversion * requires this style of access as well. * diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index b6ffab1482bbc..7b5b00159fcd5 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -16,7 +16,7 @@ import sys import token import tokenize -from typing import IO, Callable, FrozenSet, Iterable, List, Set, Tuple +from typing import IO, Callable, FrozenSet, Iterable, List, Sequence, Set, Tuple PRIVATE_IMPORTS_TO_IGNORE: Set[str] = { "_extension_array_shared_docs", @@ -403,10 +403,12 @@ def has_wrong_whitespace(first_line: str, second_line: str) -> bool: def main( function: Callable[[IO[str]], Iterable[Tuple[int, str]]], - source_path: str, + source_paths: Sequence[str], output_format: str, file_extensions_to_check: str, excluded_file_paths: str, + override: bool, + verbose: int, ) -> bool: """ Main entry point of the script. @@ -415,14 +417,19 @@ def main( ---------- function : Callable Function to execute for the specified validation type. - source_path : str - Source path representing path to a file/directory. + source_paths : list of str + File paths of files and directories to check. output_format : str Output format of the error message. file_extensions_to_check : str Comma separated values of what file extensions to check. excluded_file_paths : str Comma separated values of what file paths to exclude during the check. + override: + Whether individual files mentioned in ``source_paths`` should override + ``excluded_file_paths``. + verbose : int + Verbosity level (currently only distinguishes between zero and nonzero). Returns ------- @@ -434,46 +441,59 @@ def main( ValueError If the `source_path` is not pointing to existing file/directory. """ - if not os.path.exists(source_path): + if not all(os.path.exists(path) for path in source_paths): raise ValueError("Please enter a valid path, pointing to a file/directory.") - is_failed: bool = False - file_path: str = "" - FILE_EXTENSIONS_TO_CHECK: FrozenSet[str] = frozenset( file_extensions_to_check.split(",") ) - PATHS_TO_IGNORE = frozenset(excluded_file_paths.split(",")) + PATHS_TO_IGNORE = frozenset( + os.path.abspath(os.path.normpath(path)) + for path in excluded_file_paths.split(",") + ) + + is_failed: bool = False - if os.path.isfile(source_path): - file_path = source_path + def check_file(file_path: str): + nonlocal is_failed + local_is_failed = False + if verbose: + print(f"Checking {file_path}...", file=sys.stderr, end="") with open(file_path) as file_obj: for line_number, msg in function(file_obj): - is_failed = True + local_is_failed = True print( output_format.format( source_path=file_path, line_number=line_number, msg=msg ) ) + if not local_is_failed: + if verbose: + print(" OK", file=sys.stderr) + else: + is_failed = True - for subdir, _, files in os.walk(source_path): - if any(path in subdir for path in PATHS_TO_IGNORE): - continue - for file_name in files: - if not any( - file_name.endswith(extension) for extension in FILE_EXTENSIONS_TO_CHECK - ): - continue + def is_ignored(path: str): + path = os.path.abspath(os.path.normpath(path)) + return any(path.startswith(ignored_path) for ignored_path in PATHS_TO_IGNORE) - file_path = os.path.join(subdir, file_name) - with open(file_path) as file_obj: - for line_number, msg in function(file_obj): - is_failed = True - print( - output_format.format( - source_path=file_path, line_number=line_number, msg=msg - ) - ) + for source_path in source_paths: + if os.path.isfile(source_path): + if override or not is_ignored(source_path): + check_file(source_path) + else: + for subdir, _, files in os.walk(source_path): + if is_ignored(subdir): + continue + for file_name in files: + file_path = os.path.join(subdir, file_name) + if is_ignored(file_path) or not any( + file_name.endswith(extension) + for extension in FILE_EXTENSIONS_TO_CHECK + ): + continue + + check_file(file_path) return is_failed @@ -490,7 +510,13 @@ def main( parser = argparse.ArgumentParser(description="Unwanted patterns checker.") parser.add_argument( - "path", nargs="?", default=".", help="Source path of file/directory to check." + "paths", + nargs="*", + default=["."], + help=( + "Source path(s) of files and directories to check. If a directory is " + "specified, all its contents are checked recursively." + ), ) parser.add_argument( "--format", @@ -513,7 +539,26 @@ def main( parser.add_argument( "--excluded-file-paths", default="asv_bench/env", - help="Comma separated file paths to exclude.", + help=( + "Comma separated file paths to exclude. If an individual file is " + "explicitly passed in `paths`, it overrides this setting, unless the " + "--no-override flag is used." + ), + ) + parser.add_argument( + "--no-override", + dest="override", + action="store_false", + help=( + "Don't allow individual files explicitly mentioned in `pahts` to override " + "the excluded file paths (see --excluded-file-paths)." + ), + ) + parser.add_argument( + "-v", + "--verbose", + action="count", + help="Set the verbosity level to the number of times this flag is used.", ) args = parser.parse_args() @@ -521,9 +566,11 @@ def main( sys.exit( main( function=globals().get(args.validation_type), # type: ignore - source_path=args.path, + source_paths=args.paths, output_format=args.format, file_extensions_to_check=args.included_file_extensions, excluded_file_paths=args.excluded_file_paths, + override=args.override, + verbose=args.verbose, ) ) diff --git a/web/pandas/_templates/layout.html b/web/pandas/_templates/layout.html index 023bfe9e26b78..700c2dbab6dc9 100644 --- a/web/pandas/_templates/layout.html +++ b/web/pandas/_templates/layout.html @@ -33,7 +33,7 @@ {% if static.logo %}{% endif %} -