Skip to content

Add script and test to check for any unused Diagnostics in the codebase #68902

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions test/diagnostics/check-unused-diagnostics.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// REQUIRES: OS=macosx

// RUN: %{python} %utils/check-unused-diagnostics.py
94 changes: 94 additions & 0 deletions utils/check-unused-diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import os
import re
import sys
import ahocorasick

def extract_diagnostics_from_file(file_path):
"""
Extract diagnostics patterns from a given file.

Args:
- file_path (str): Path to the file.

Returns:
- list: List of extracted diagnostics with the "diag::" prefix.
"""
with open(file_path, 'r') as file:
content = file.read()
pattern = r'^\s*(WARNING|NOTE|ERROR|REMARK)\(([^,]+),'
return ["diag::" + match.group(2).strip() for match in re.finditer(pattern, content, re.MULTILINE)]

def extract_all_diagnostics():
"""
Recursively extract diagnostics from all diagnostics definition files.

Returns:
- list: List of all extracted diagnostics.
"""
file_pattern = re.compile(r'diagnostic.*\.def$', re.IGNORECASE)
identifiers = []

for dirpath, dirnames, filenames in os.walk('.'):
for filename in filenames:
if file_pattern.match(filename):
file_path = os.path.join(dirpath, filename)
identifiers.extend(extract_diagnostics_from_file(file_path))

return identifiers

def build_automaton(patterns):
"""
Build an Aho-Corasick automaton from the given patterns.

Args:
- patterns (list): List of patterns.

Returns:
- Automaton: Aho-Corasick automaton.
"""
A = ahocorasick.Automaton()
for idx, pattern in enumerate(patterns):
A.add_word(pattern, (idx, pattern))
A.make_automaton()
return A

def check_strings_in_files(strings, folder_path):
"""
Check if all strings appear in the files within the given folder (including subfolders).

Args:
- strings (list): List of strings to search for.
- folder_path (str): Path to the folder.

Returns:
- list: List of strings that haven't been found.
"""
pattern_map = {s: False for s in strings}
automaton = build_automaton(strings)

for dirpath, dirnames, filenames in os.walk(folder_path):
if all(pattern_map.values()): # End early if all patterns are found
break
for file_name in filenames:
file_path = os.path.join(dirpath, file_name)
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
for _, (_, found) in automaton.iter(f.read()):
pattern_map[found] = True

return [key for key, found in pattern_map.items() if not found]

def check_for_unused_diagnostics():
diagnostics = extract_all_diagnostics()
unused_diagnostics = check_strings_in_files(diagnostics, './lib')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you may need to check ./include too since we could emit diagnostics in headers


if not unused_diagnostics:
print("All diagnostics appear at least once in the codebase!")
return 0
else:
print("The following diagnostics did not appear in the codebase:")
for diag in unused_diagnostics:
print(diag)
return 1

if __name__ == '__main__':
sys.exit(check_for_unused_diagnostics())