diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 944575dcc8659..3c222c7701833 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -69,8 +69,10 @@ } ALL_ERRORS = set(NUMPYDOC_ERROR_MSGS).union(set(ERROR_MSGS)) duplicated_errors = set(NUMPYDOC_ERROR_MSGS).intersection(set(ERROR_MSGS)) -assert not duplicated_errors, (f"Errors {duplicated_errors} exist in both pandas " - "and numpydoc, should they be removed from pandas?") +assert not duplicated_errors, ( + f"Errors {duplicated_errors} exist in both pandas " + "and numpydoc, should they be removed from pandas?" +) def pandas_error(code, **kwargs): @@ -245,7 +247,15 @@ def pandas_validate(func_name: str): # Some objects are instances, e.g. IndexSlice, which numpydoc can't validate doc_obj = get_doc_object(func_obj, doc=func_obj.__doc__) doc = PandasDocstring(func_name, doc_obj) - result = validate(doc_obj) + if func_obj.__doc__ is not None: + result = validate(doc_obj) + else: + result = { + "docstring": "", + "file": None, + "file_line": None, + "errors": [("GL08", "The object does not have a docstring")], + } mentioned_errs = doc.mentioned_private_classes if mentioned_errs: result["errors"].append( @@ -257,7 +267,7 @@ def pandas_validate(func_name: str): pandas_error( "SA05", reference_name=rel_name, - right_reference=rel_name[len("pandas."):], + right_reference=rel_name[len("pandas.") :], ) for rel_name in doc.see_also if rel_name.startswith("pandas.") @@ -365,12 +375,13 @@ def print_validate_all_results( for func_name, res in result.items(): error_messages = dict(res["errors"]) actual_failures = set(error_messages) - expected_failures = (ignore_errors.get(func_name, set()) - | ignore_errors.get(None, set())) + expected_failures = ignore_errors.get(func_name, set()) | ignore_errors.get( + None, set() + ) for err_code in actual_failures - expected_failures: sys.stdout.write( f'{prefix}{res["file"]}:{res["file_line"]}:' - f'{err_code}:{func_name}:{error_messages[err_code]}\n' + f"{err_code}:{func_name}:{error_messages[err_code]}\n" ) exit_status += 1 for err_code in ignore_errors.get(func_name, set()) - actual_failures: @@ -384,8 +395,9 @@ def print_validate_all_results( return exit_status -def print_validate_one_results(func_name: str, - ignore_errors: dict[str, set[str]]) -> int: +def print_validate_one_results( + func_name: str, ignore_errors: dict[str, set[str]] +) -> int: def header(title, width=80, char="#") -> str: full_line = char * width side_len = (width - len(title) - 2) // 2 @@ -396,8 +408,11 @@ def header(title, width=80, char="#") -> str: result = pandas_validate(func_name) - result["errors"] = [(code, message) for code, message in result["errors"] - if code not in ignore_errors.get(None, set())] + result["errors"] = [ + (code, message) + for code, message in result["errors"] + if code not in ignore_errors.get(None, set()) + ] sys.stderr.write(header(f"Docstring ({func_name})")) sys.stderr.write(f"{result['docstring']}\n") @@ -431,14 +446,16 @@ def _format_ignore_errors(raw_ignore_errors): raise ValueError( f"Object `{obj_name}` is present in more than one " "--ignore_errors argument. Please use it once and specify " - "the errors separated by commas.") + "the errors separated by commas." + ) ignore_errors[obj_name] = set(error_codes.split(",")) unknown_errors = ignore_errors[obj_name] - ALL_ERRORS if unknown_errors: raise ValueError( f"Object `{obj_name}` is ignoring errors {unknown_errors} " - f"which are not known. Known errors are: {ALL_ERRORS}") + f"which are not known. Known errors are: {ALL_ERRORS}" + ) # global errors "PR02,ES01" else: @@ -448,27 +465,19 @@ def _format_ignore_errors(raw_ignore_errors): if unknown_errors: raise ValueError( f"Unknown errors {unknown_errors} specified using --ignore_errors " - "Known errors are: {ALL_ERRORS}") + "Known errors are: {ALL_ERRORS}" + ) return ignore_errors -def main( - func_name, - output_format, - prefix, - ignore_deprecated, - ignore_errors -): +def main(func_name, output_format, prefix, ignore_deprecated, ignore_errors): """ Main entry point. Call the validation for one or for all docstrings. """ if func_name is None: return print_validate_all_results( - output_format, - prefix, - ignore_deprecated, - ignore_errors + output_format, prefix, ignore_deprecated, ignore_errors ) else: return print_validate_one_results(func_name, ignore_errors) @@ -524,10 +533,11 @@ def main( args = argparser.parse_args(sys.argv[1:]) sys.exit( - main(args.function, - args.format, - args.prefix, - args.ignore_deprecated, - _format_ignore_errors(args.ignore_errors), - ) + main( + args.function, + args.format, + args.prefix, + args.ignore_deprecated, + _format_ignore_errors(args.ignore_errors), + ) )