From fa69e010c61a5007c00a29f84f133fca96840129 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Fri, 9 Dec 2022 15:18:45 +0200 Subject: [PATCH 1/5] Group linkcheck configs together --- conf.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/conf.py b/conf.py index df7dfbaff..0c1d7cdf8 100644 --- a/conf.py +++ b/conf.py @@ -51,6 +51,18 @@ # match any anchor that starts with a '/' since this is an invalid HTML anchor r'\/.*', ] + +linkcheck_ignore = [ + # The voters repo is private and appears as a 404 + 'https://github.com/python/voters/', + # The python-core team link is private, redirects to login + 'https://github.com/orgs/python/teams/python-core', + # The Discourse groups are private unless you are logged in + 'https://discuss.python.org/groups/staff', + 'https://discuss.python.org/groups/moderators', + 'https://discuss.python.org/groups/admins', +] + rediraffe_redirects = { "clang.rst": "advanced-tools/clang.rst", "coverity.rst": "advanced-tools/coverity.rst", @@ -90,17 +102,6 @@ "triaging.rst": "triage/triaging.rst", } -linkcheck_ignore = [ - # The voters repo is private and appears as a 404 - 'https://github.com/python/voters/', - # The python-core team link is private, redirects to login - 'https://github.com/orgs/python/teams/python-core', - # The Discourse groups are private unless you are logged in - 'https://discuss.python.org/groups/staff', - 'https://discuss.python.org/groups/moderators', - 'https://discuss.python.org/groups/admins', -] - intersphinx_mapping = { 'python': ('https://docs.python.org/3', None), } From d241ac95949cc14c893ca19b9f28037b5d663d56 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Fri, 9 Dec 2022 15:44:33 +0200 Subject: [PATCH 2/5] Update linkcheck_ignore rules --- conf.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf.py b/conf.py index 0c1d7cdf8..cf29b2463 100644 --- a/conf.py +++ b/conf.py @@ -61,6 +61,12 @@ 'https://discuss.python.org/groups/staff', 'https://discuss.python.org/groups/moderators', 'https://discuss.python.org/groups/admins', + # Anchor not found + r'https://github.com.+?#L\d+', + r'https://github.com/cli/cli#installation', + r'https://github.com/github/renaming#renaming-existing-branches', + # 403 Client Error: Forbidden + r'https://support.discord.com/hc/en-us/articles/219070107-Server-Nicknames', ] rediraffe_redirects = { From 7922958e8c61a2b33a9c31cbb225edf5853a57a2 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Fri, 9 Dec 2022 17:55:18 +0200 Subject: [PATCH 3/5] Add linkcheck_allowed_redirects rules --- conf.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/conf.py b/conf.py index cf29b2463..ff6a0602e 100644 --- a/conf.py +++ b/conf.py @@ -44,6 +44,30 @@ # Set to '' to prevent appending "documentation" to the site title html_title = "" +linkcheck_allowed_redirects = { + # Edit page + r"https://docs.google.com/document/d/.*/": r"https://docs.google.com/document/d/.*/edit", + # Canonical + r"https://docs.python.org/": r"https://docs.python.org/3/", + # Translations with country codes + r"https://docs.python.org/[a-z-]+/": r"https://docs.python.org/[a-z-]+/3/", + # Personal /my/ links redirect to login page + r"https://discuss.python.org/my/.*": r"https://discuss.python.org/login-preferences", + # Login page + r"https://github.com/python/core-workflow/issues/new.*": r"https://github.com/login.*", + # Archive redirect + r"https://github.com/python/cpython/archive/main.zip": r"https://codeload.github.com/python/cpython/zip/refs/heads/main", + # Blob to tree + r"https://github.com/python/cpython/blob/.*": r"https://github.com/python/cpython/tree/.*", + # HackMD shortcuts + r"https://hackmd.io/s/.*": r"https://hackmd.io/@.*", + # Read the Docs + r"https://virtualenv.pypa.io/": r"https://virtualenv.pypa.io/en/latest/", + r"https://www.sphinx-doc.org/": r"https://www.sphinx-doc.org/en/master/", + # Cookie consent + r"https://www.youtube.com/playlist.*": r"https://consent.youtube.com/ml.*", +} + # ignore linkcheck anchors for /#/$ANCHOR since it is used for # dynamic pages such as http://buildbot.python.org/all/#/console # http://www.sphinx-doc.org/en/stable/config.html?highlight=linkcheck#confval-linkcheck_anchors_ignore From bdcc03cad56331e59b1264e8ea82c3b76854d62e Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Fri, 9 Dec 2022 18:14:15 +0200 Subject: [PATCH 4/5] Update PSF link redirect --- getting-started/pull-request-lifecycle.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/getting-started/pull-request-lifecycle.rst b/getting-started/pull-request-lifecycle.rst index bb685e21b..e6be00c29 100644 --- a/getting-started/pull-request-lifecycle.rst +++ b/getting-started/pull-request-lifecycle.rst @@ -343,7 +343,7 @@ Here are the steps needed in order to sign the CLA: .. _PSF license: https://docs.python.org/dev/license.html#terms-and-conditions-for-accessing-or-otherwise-using-python .. _contributor agreement: https://www.python.org/psf/contrib/ .. _contributor form: https://www.python.org/psf/contrib/contrib-form/ -.. _Python Software Foundation: https://www.python.org/psf/ +.. _Python Software Foundation: https://www.python.org/psf-landing/ Submitting From 134664ac77eefa949866ca6ee53664453bb42eb6 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sat, 10 Dec 2022 22:58:49 +0200 Subject: [PATCH 5/5] Summarise reasons for linkcheck_allowed_redirects entries --- conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf.py b/conf.py index ff6a0602e..c899822f8 100644 --- a/conf.py +++ b/conf.py @@ -85,11 +85,11 @@ 'https://discuss.python.org/groups/staff', 'https://discuss.python.org/groups/moderators', 'https://discuss.python.org/groups/admins', - # Anchor not found + # The crawler gets "Anchor not found" for GitHub anchors r'https://github.com.+?#L\d+', r'https://github.com/cli/cli#installation', r'https://github.com/github/renaming#renaming-existing-branches', - # 403 Client Error: Forbidden + # Discord doesn't allow robot crawlers: "403 Client Error: Forbidden" r'https://support.discord.com/hc/en-us/articles/219070107-Server-Nicknames', ]