From 7129f00d952ace3634bc08a2931a00aa59d47815 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 22 Jan 2025 17:55:21 +0100
Subject: [PATCH 01/12] generate_tokens.py: Only generate docs for 'literal'
 tokens; check the rest

---
 Doc/library/token-list.inc    | 37 ------------------------
 Doc/library/token.rst         | 51 +++++++++++++++++++++++++--------
 Tools/build/generate_token.py | 53 +++++++++++++++++++++++++++--------
 3 files changed, 82 insertions(+), 59 deletions(-)

diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc
index 39df2927a0b7f2..128bb218dfb02c 100644
--- a/Doc/library/token-list.inc
+++ b/Doc/library/token-list.inc
@@ -1,18 +1,4 @@
 .. Auto-generated by Tools/build/generate_token.py
-.. data:: ENDMARKER
-
-.. data:: NAME
-
-.. data:: NUMBER
-
-.. data:: STRING
-
-.. data:: NEWLINE
-
-.. data:: INDENT
-
-.. data:: DEDENT
-
 .. data:: LPAR
 
    Token value for ``"("``.
@@ -205,26 +191,3 @@
 
    Token value for ``"!"``.
 
-.. data:: OP
-
-.. data:: TYPE_IGNORE
-
-.. data:: TYPE_COMMENT
-
-.. data:: SOFT_KEYWORD
-
-.. data:: FSTRING_START
-
-.. data:: FSTRING_MIDDLE
-
-.. data:: FSTRING_END
-
-.. data:: COMMENT
-
-.. data:: NL
-
-.. data:: ERRORTOKEN
-
-.. data:: N_TOKENS
-
-.. data:: NT_OFFSET
diff --git a/Doc/library/token.rst b/Doc/library/token.rst
index 40982f32b4beee..73a5fff3f04ce8 100644
--- a/Doc/library/token.rst
+++ b/Doc/library/token.rst
@@ -44,25 +44,51 @@ functions.  The functions mirror definitions in the Python C header files.
 
 The token constants are:
 
-.. include:: token-list.inc
+.. data:: ENDMARKER
+
+.. data:: NAME
+
+.. data:: NUMBER
+
+.. data:: STRING
+
+.. data:: NEWLINE
+
+.. data:: INDENT
+
+.. data:: DEDENT
+
+.. data:: OP
+
+.. data:: TYPE_IGNORE
+
+.. data:: TYPE_COMMENT
+
+   Token value indicating that a type comment was recognized.  Such
+   tokens are only produced when :func:`ast.parse` is invoked with
+   ``type_comments=True``.
+
+.. data:: SOFT_KEYWORD
+
+.. data:: FSTRING_START
 
-The following token type values aren't used by the C tokenizer but are needed for
-the :mod:`tokenize` module.
+.. data:: FSTRING_MIDDLE
+
+.. data:: FSTRING_END
 
 .. data:: COMMENT
-   :noindex:
 
    Token value used to indicate a comment.
 
-
 .. data:: NL
-   :noindex:
 
    Token value used to indicate a non-terminating newline.  The
    :data:`NEWLINE` token indicates the end of a logical line of Python code;
    ``NL`` tokens are generated when a logical line of code is continued over
    multiple physical lines.
 
+.. data:: ERRORTOKEN
+
 
 .. data:: ENCODING
 
@@ -70,14 +96,17 @@ the :mod:`tokenize` module.
    into text. The first token returned by :func:`tokenize.tokenize` will
    always be an ``ENCODING`` token.
 
+   This token type isn't used by the C tokenizer but is needed for
+   the :mod:`tokenize` module.
 
-.. data:: TYPE_COMMENT
-   :noindex:
+The remaining tokens represent literal text; most are :ref:`operators`
+and :ref:`delimiters`:
 
-   Token value indicating that a type comment was recognized.  Such
-   tokens are only produced when :func:`ast.parse` is invoked with
-   ``type_comments=True``.
+.. include:: token-list.inc
+
+.. data:: N_TOKENS
 
+.. data:: NT_OFFSET
 
 .. data:: EXACT_TOKEN_TYPES
 
diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py
index d32747f19945d8..2672c9054d3518 100755
--- a/Tools/build/generate_token.py
+++ b/Tools/build/generate_token.py
@@ -1,10 +1,17 @@
 #! /usr/bin/env python3
 # This script generates token related files from Grammar/Tokens:
 #
-#   Doc/library/token-list.inc
-#   Include/token.h
-#   Parser/token.c
-#   Lib/token.py
+#   make_rst:
+#       Doc/library/token-list.inc
+#       Doc/library/token.rst  (checked, not generated)
+#   make_h:
+#       Include/token.h
+#   make_c:
+#       Parser/token.c
+#   make_py:
+#       Lib/token.py
+
+import re
 
 
 SCRIPT_NAME = 'Tools/build/generate_token.py'
@@ -200,22 +207,46 @@ def make_c(infile, outfile='Parser/token.c'):
 token_inc_template = f"""\
 .. {AUTO_GENERATED_BY_SCRIPT}
 %s
-.. data:: N_TOKENS
-
-.. data:: NT_OFFSET
 """
 
-def make_rst(infile, outfile='Doc/library/token-list.inc'):
+def make_rst(infile, outfile='Doc/library/token-list.inc',
+             rstfile='Doc/library/token.rst'):
     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
     tok_to_string = {value: s for s, value in string_to_tok.items()}
 
+    needs_handwritten_doc = set()
+
     names = []
-    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
-        names.append('.. data:: %s' % (name,))
+    for value, name in enumerate(tok_names):
         if value in tok_to_string:
+            assert name.isupper()
+            names.append('.. data:: %s' % (name,))
             names.append('')
             names.append('   Token value for ``"%s"``.' % tok_to_string[value])
-        names.append('')
+            names.append('')
+        else:
+            needs_handwritten_doc.add(name)
+
+    has_handwritten_doc = set()
+    with open(rstfile) as fileobj:
+        tokendef_re = re.compile(r'.. data:: (\w+)')
+        for line in fileobj:
+            if match := tokendef_re.fullmatch(line.strip()):
+                if match[1].isupper():
+                    has_handwritten_doc.add(match[1])
+
+    # Exclude non-token constants in token.py
+    has_handwritten_doc -= {'N_TOKENS', 'NT_OFFSET', 'EXACT_TOKEN_TYPES'}
+
+    if needs_handwritten_doc != has_handwritten_doc:
+        message_parts = [f'ERROR: {rstfile} does not document all tokens!']
+        undocumented = needs_handwritten_doc - has_handwritten_doc
+        extra = has_handwritten_doc - needs_handwritten_doc
+        if undocumented:
+            message_parts.append(f'Undocumented tokens: {undocumented}')
+        if extra:
+            message_parts.append(f'Documented nonexistent tokens: {extra}')
+        exit('\n'.join(message_parts))
 
     if update_file(outfile, token_inc_template % '\n'.join(names)):
         print("%s regenerated from %s" % (outfile, infile))

From d400ae7c62ff17bc92f190a7a083e80ef6030acf Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 22 Jan 2025 18:01:54 +0100
Subject: [PATCH 02/12] Change docs for the "literal" tokens to a list-table

---
 Doc/library/token-list.inc    | 292 ++++++++++++----------------------
 Tools/build/generate_token.py |  12 +-
 2 files changed, 109 insertions(+), 195 deletions(-)

diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc
index 128bb218dfb02c..357638aed2714a 100644
--- a/Doc/library/token-list.inc
+++ b/Doc/library/token-list.inc
@@ -1,193 +1,103 @@
 .. Auto-generated by Tools/build/generate_token.py
-.. data:: LPAR
-
-   Token value for ``"("``.
-
-.. data:: RPAR
-
-   Token value for ``")"``.
-
-.. data:: LSQB
-
-   Token value for ``"["``.
-
-.. data:: RSQB
-
-   Token value for ``"]"``.
-
-.. data:: COLON
-
-   Token value for ``":"``.
-
-.. data:: COMMA
-
-   Token value for ``","``.
-
-.. data:: SEMI
-
-   Token value for ``";"``.
-
-.. data:: PLUS
-
-   Token value for ``"+"``.
-
-.. data:: MINUS
-
-   Token value for ``"-"``.
-
-.. data:: STAR
-
-   Token value for ``"*"``.
-
-.. data:: SLASH
-
-   Token value for ``"/"``.
-
-.. data:: VBAR
-
-   Token value for ``"|"``.
-
-.. data:: AMPER
-
-   Token value for ``"&"``.
-
-.. data:: LESS
-
-   Token value for ``"<"``.
-
-.. data:: GREATER
-
-   Token value for ``">"``.
-
-.. data:: EQUAL
-
-   Token value for ``"="``.
-
-.. data:: DOT
-
-   Token value for ``"."``.
-
-.. data:: PERCENT
-
-   Token value for ``"%"``.
-
-.. data:: LBRACE
-
-   Token value for ``"{"``.
-
-.. data:: RBRACE
-
-   Token value for ``"}"``.
-
-.. data:: EQEQUAL
-
-   Token value for ``"=="``.
-
-.. data:: NOTEQUAL
-
-   Token value for ``"!="``.
-
-.. data:: LESSEQUAL
-
-   Token value for ``"<="``.
-
-.. data:: GREATEREQUAL
-
-   Token value for ``">="``.
-
-.. data:: TILDE
-
-   Token value for ``"~"``.
-
-.. data:: CIRCUMFLEX
-
-   Token value for ``"^"``.
-
-.. data:: LEFTSHIFT
-
-   Token value for ``"<<"``.
-
-.. data:: RIGHTSHIFT
-
-   Token value for ``">>"``.
-
-.. data:: DOUBLESTAR
-
-   Token value for ``"**"``.
-
-.. data:: PLUSEQUAL
-
-   Token value for ``"+="``.
-
-.. data:: MINEQUAL
-
-   Token value for ``"-="``.
-
-.. data:: STAREQUAL
-
-   Token value for ``"*="``.
-
-.. data:: SLASHEQUAL
-
-   Token value for ``"/="``.
-
-.. data:: PERCENTEQUAL
-
-   Token value for ``"%="``.
-
-.. data:: AMPEREQUAL
-
-   Token value for ``"&="``.
-
-.. data:: VBAREQUAL
-
-   Token value for ``"|="``.
-
-.. data:: CIRCUMFLEXEQUAL
-
-   Token value for ``"^="``.
-
-.. data:: LEFTSHIFTEQUAL
-
-   Token value for ``"<<="``.
-
-.. data:: RIGHTSHIFTEQUAL
-
-   Token value for ``">>="``.
-
-.. data:: DOUBLESTAREQUAL
-
-   Token value for ``"**="``.
-
-.. data:: DOUBLESLASH
-
-   Token value for ``"//"``.
-
-.. data:: DOUBLESLASHEQUAL
-
-   Token value for ``"//="``.
-
-.. data:: AT
-
-   Token value for ``"@"``.
-
-.. data:: ATEQUAL
-
-   Token value for ``"@="``.
-
-.. data:: RARROW
-
-   Token value for ``"->"``.
-
-.. data:: ELLIPSIS
-
-   Token value for ``"..."``.
-
-.. data:: COLONEQUAL
-
-   Token value for ``":="``.
-
-.. data:: EXCLAMATION
-
-   Token value for ``"!"``.
 
+.. list-table::
+   :header-rows: 1
+
+   * - Token
+     - Value
+   * - .. data:: LPAR
+     - ``"("``
+   * - .. data:: RPAR
+     - ``")"``
+   * - .. data:: LSQB
+     - ``"["``
+   * - .. data:: RSQB
+     - ``"]"``
+   * - .. data:: COLON
+     - ``":"``
+   * - .. data:: COMMA
+     - ``","``
+   * - .. data:: SEMI
+     - ``";"``
+   * - .. data:: PLUS
+     - ``"+"``
+   * - .. data:: MINUS
+     - ``"-"``
+   * - .. data:: STAR
+     - ``"*"``
+   * - .. data:: SLASH
+     - ``"/"``
+   * - .. data:: VBAR
+     - ``"|"``
+   * - .. data:: AMPER
+     - ``"&"``
+   * - .. data:: LESS
+     - ``"<"``
+   * - .. data:: GREATER
+     - ``">"``
+   * - .. data:: EQUAL
+     - ``"="``
+   * - .. data:: DOT
+     - ``"."``
+   * - .. data:: PERCENT
+     - ``"%"``
+   * - .. data:: LBRACE
+     - ``"{"``
+   * - .. data:: RBRACE
+     - ``"}"``
+   * - .. data:: EQEQUAL
+     - ``"=="``
+   * - .. data:: NOTEQUAL
+     - ``"!="``
+   * - .. data:: LESSEQUAL
+     - ``"<="``
+   * - .. data:: GREATEREQUAL
+     - ``">="``
+   * - .. data:: TILDE
+     - ``"~"``
+   * - .. data:: CIRCUMFLEX
+     - ``"^"``
+   * - .. data:: LEFTSHIFT
+     - ``"<<"``
+   * - .. data:: RIGHTSHIFT
+     - ``">>"``
+   * - .. data:: DOUBLESTAR
+     - ``"**"``
+   * - .. data:: PLUSEQUAL
+     - ``"+="``
+   * - .. data:: MINEQUAL
+     - ``"-="``
+   * - .. data:: STAREQUAL
+     - ``"*="``
+   * - .. data:: SLASHEQUAL
+     - ``"/="``
+   * - .. data:: PERCENTEQUAL
+     - ``"%="``
+   * - .. data:: AMPEREQUAL
+     - ``"&="``
+   * - .. data:: VBAREQUAL
+     - ``"|="``
+   * - .. data:: CIRCUMFLEXEQUAL
+     - ``"^="``
+   * - .. data:: LEFTSHIFTEQUAL
+     - ``"<<="``
+   * - .. data:: RIGHTSHIFTEQUAL
+     - ``">>="``
+   * - .. data:: DOUBLESTAREQUAL
+     - ``"**="``
+   * - .. data:: DOUBLESLASH
+     - ``"//"``
+   * - .. data:: DOUBLESLASHEQUAL
+     - ``"//="``
+   * - .. data:: AT
+     - ``"@"``
+   * - .. data:: ATEQUAL
+     - ``"@="``
+   * - .. data:: RARROW
+     - ``"->"``
+   * - .. data:: ELLIPSIS
+     - ``"..."``
+   * - .. data:: COLONEQUAL
+     - ``":="``
+   * - .. data:: EXCLAMATION
+     - ``"!"``
diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py
index 2672c9054d3518..e25837eb715bc0 100755
--- a/Tools/build/generate_token.py
+++ b/Tools/build/generate_token.py
@@ -206,6 +206,12 @@ def make_c(infile, outfile='Parser/token.c'):
 
 token_inc_template = f"""\
 .. {AUTO_GENERATED_BY_SCRIPT}
+
+.. list-table::
+   :header-rows: 1
+
+   * - Token
+     - Value
 %s
 """
 
@@ -220,10 +226,8 @@ def make_rst(infile, outfile='Doc/library/token-list.inc',
     for value, name in enumerate(tok_names):
         if value in tok_to_string:
             assert name.isupper()
-            names.append('.. data:: %s' % (name,))
-            names.append('')
-            names.append('   Token value for ``"%s"``.' % tok_to_string[value])
-            names.append('')
+            names.append(f'   * - .. data:: {name}')
+            names.append(f'     - ``"{tok_to_string[value]}"``')
         else:
             needs_handwritten_doc.add(name)
 

From 4411c370437cc7c929c69f872d635687ce4e9bcd Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 12 Feb 2025 18:04:12 +0100
Subject: [PATCH 03/12] Document most of the tokens; improve top-level grammar
 docs link ENDMARKER to that

---
 Doc/library/token.rst                 | 75 ++++++++++++++++++++++++---
 Doc/reference/toplevel_components.rst | 11 ++--
 2 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/Doc/library/token.rst b/Doc/library/token.rst
index 73a5fff3f04ce8..cdba4652d725e6 100644
--- a/Doc/library/token.rst
+++ b/Doc/library/token.rst
@@ -46,22 +46,58 @@ The token constants are:
 
 .. data:: ENDMARKER
 
+   Token value that indicates the end of input.
+   Used in :ref:`top-level grammar rules <top-level>`.
+
 .. data:: NAME
 
+   Token value that indicates an :ref:`identifier <identifiers>`.
+   Note that keywords are also identifiers.
+
 .. data:: NUMBER
 
+   Token value that indicates a :ref:`numeric literal <numbers>`
+
 .. data:: STRING
 
+   Token value that indicates a :ref:`string or byte literal <strings>`.
+   The token string is not interpreted: it includes the prefix (if any)
+   and the quote characters; escape sequences are included with their
+   initial backslash.
+
 .. data:: NEWLINE
 
+   Token value that indicates the end of a :ref:`logical line <logical-lines>`
+   of Python code.
+
+.. data:: NL
+
+   Token value used to indicate a non-terminating newline.
+   :data:`!NL` tokens are generated when a logical line of code is continued
+   over multiple physical lines. The parser ignores :data:`!NL` tokens.
+
 .. data:: INDENT
 
+   Token value used at the beginning of a :ref:`logical line <logical-lines>`
+   to indicate the start of an :ref:`indented block <indentation>`.
+
 .. data:: DEDENT
 
+   Token value used at the beginning of a :ref:`logical line <logical-lines>`
+   to indicate the end of an :ref:`indented block <indentation>`.
+
 .. data:: OP
 
+   A generic token value returned by the :mod:`tokenize` module for
+   :ref:`operator <operators>` and :ref:`delimiter <delimiters>`.
+   See the :mod:`tokenize` module documentation for details.
+
 .. data:: TYPE_IGNORE
 
+   Token value indicating that a ``type: ignore`` comment was recognized.
+   Such tokens are only produced when :func:`ast.parse` is invoked with
+   ``type_comments=True``.
+
 .. data:: TYPE_COMMENT
 
    Token value indicating that a type comment was recognized.  Such
@@ -72,23 +108,44 @@ The token constants are:
 
 .. data:: FSTRING_START
 
+   .. impl-detail::
+
+      Token value used to indicate the beginning of a
+      :ref:`f-string <f-strings>`.
+      The token string includes the prefix and the opening quote, but none
+      of the contents of the literal.
+
 .. data:: FSTRING_MIDDLE
 
+   .. impl-detail::
+
+      Token value used for literal text inside an :ref:`f-string <f-strings>`,
+      including format specifications.
+
+      Replacement fields (that is, the non-literal parts of f-strings) use
+      the same tokens as other expressions, and are delimited by :data:`LBRACE`
+      and :data:`RBRACE` tokens.
+
 .. data:: FSTRING_END
 
+   .. impl-detail::
+
+      Token value used to indicate the end of a :ref:`f-string <f-strings>`.
+      The token string contains the closing quote.
+
 .. data:: COMMENT
 
    Token value used to indicate a comment.
+   The parser ignores :data:`!COMMENT` tokens.
 
-.. data:: NL
+.. data:: ERRORTOKEN
 
-   Token value used to indicate a non-terminating newline.  The
-   :data:`NEWLINE` token indicates the end of a logical line of Python code;
-   ``NL`` tokens are generated when a logical line of code is continued over
-   multiple physical lines.
+   Token value used to indicate wrong input.
 
-.. data:: ERRORTOKEN
+   .. impl-detail::
 
+      The :mod:`tokenize` module generally indicates errors by
+      raising exceptions instead of emitting this token.
 
 .. data:: ENCODING
 
@@ -96,8 +153,10 @@ The token constants are:
    into text. The first token returned by :func:`tokenize.tokenize` will
    always be an ``ENCODING`` token.
 
-   This token type isn't used by the C tokenizer but is needed for
-   the :mod:`tokenize` module.
+   .. impl-detail::
+
+      This token type isn't used by the C tokenizer but is needed for
+      the :mod:`tokenize` module.
 
 The remaining tokens represent literal text; most are :ref:`operators`
 and :ref:`delimiters`:
diff --git a/Doc/reference/toplevel_components.rst b/Doc/reference/toplevel_components.rst
index f155fafbe4d738..a5fd320c2cb308 100644
--- a/Doc/reference/toplevel_components.rst
+++ b/Doc/reference/toplevel_components.rst
@@ -69,7 +69,7 @@ All input read from non-interactive files has the same form:
 .. grammar-snippet::
    :group: python-grammar
 
-   file_input: (NEWLINE | `statement`)*
+   file: `statement`* ENDMARKER
 
 This syntax is used in the following situations:
 
@@ -90,7 +90,7 @@ Input in interactive mode is parsed using the following grammar:
 .. grammar-snippet::
    :group: python-grammar
 
-   interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE
+   interactive: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE | ENDMARKER
 
 Note that a (top-level) compound statement must be followed by a blank line in
 interactive mode; this is needed to help the parser detect the end of the input.
@@ -107,5 +107,8 @@ Expression input
 :func:`eval` is used for expression input.  It ignores leading whitespace. The
 string argument to :func:`eval` must have the following form:
 
-.. productionlist:: python-grammar
-   eval_input: `expression_list` NEWLINE*
+.. productionlist::
+   :group: python-grammar
+
+   eval: `expressions` NEWLINE* ENDMARKER
+   expressions: ','.`expression`+ [',']

From 1bf55112465c60053faa143ae0f6fae137d46c84 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 19 Feb 2025 17:25:13 +0100
Subject: [PATCH 04/12] Write prose; reorganize the token list

---
 Doc/library/token.rst | 101 +++++++++++++++++++++++++-----------------
 1 file changed, 61 insertions(+), 40 deletions(-)

diff --git a/Doc/library/token.rst b/Doc/library/token.rst
index cdba4652d725e6..5fe3bec4e9b1f8 100644
--- a/Doc/library/token.rst
+++ b/Doc/library/token.rst
@@ -44,11 +44,6 @@ functions.  The functions mirror definitions in the Python C header files.
 
 The token constants are:
 
-.. data:: ENDMARKER
-
-   Token value that indicates the end of input.
-   Used in :ref:`top-level grammar rules <top-level>`.
-
 .. data:: NAME
 
    Token value that indicates an :ref:`identifier <identifiers>`.
@@ -65,6 +60,17 @@ The token constants are:
    and the quote characters; escape sequences are included with their
    initial backslash.
 
+.. data:: OP
+
+   A generic token value returned by the :mod:`tokenize` module for
+   :ref:`operators <operators>` and :ref:`delimiters <delimiters>`.
+   See the :mod:`tokenize` module documentation for details.
+
+.. data:: COMMENT
+
+   Token value used to indicate a comment.
+   The parser ignores :data:`!COMMENT` tokens.
+
 .. data:: NEWLINE
 
    Token value that indicates the end of a :ref:`logical line <logical-lines>`
@@ -86,26 +92,6 @@ The token constants are:
    Token value used at the beginning of a :ref:`logical line <logical-lines>`
    to indicate the end of an :ref:`indented block <indentation>`.
 
-.. data:: OP
-
-   A generic token value returned by the :mod:`tokenize` module for
-   :ref:`operator <operators>` and :ref:`delimiter <delimiters>`.
-   See the :mod:`tokenize` module documentation for details.
-
-.. data:: TYPE_IGNORE
-
-   Token value indicating that a ``type: ignore`` comment was recognized.
-   Such tokens are only produced when :func:`ast.parse` is invoked with
-   ``type_comments=True``.
-
-.. data:: TYPE_COMMENT
-
-   Token value indicating that a type comment was recognized.  Such
-   tokens are only produced when :func:`ast.parse` is invoked with
-   ``type_comments=True``.
-
-.. data:: SOFT_KEYWORD
-
 .. data:: FSTRING_START
 
    .. impl-detail::
@@ -133,19 +119,10 @@ The token constants are:
       Token value used to indicate the end of a :ref:`f-string <f-strings>`.
       The token string contains the closing quote.
 
-.. data:: COMMENT
-
-   Token value used to indicate a comment.
-   The parser ignores :data:`!COMMENT` tokens.
-
-.. data:: ERRORTOKEN
-
-   Token value used to indicate wrong input.
-
-   .. impl-detail::
+.. data:: ENDMARKER
 
-      The :mod:`tokenize` module generally indicates errors by
-      raising exceptions instead of emitting this token.
+   Token value that indicates the end of input.
+   Used in :ref:`top-level grammar rules <top-level>`.
 
 .. data:: ENCODING
 
@@ -158,14 +135,55 @@ The token constants are:
       This token type isn't used by the C tokenizer but is needed for
       the :mod:`tokenize` module.
 
-The remaining tokens represent literal text; most are :ref:`operators`
-and :ref:`delimiters`:
+
+The following token types are not produced by the :mod:`tokenize` module,
+and are defined for special uses in the tokenizer or parser:
+
+.. data:: TYPE_IGNORE
+
+   Token value indicating that a ``type: ignore`` comment was recognized.
+   Such tokens are produced instead of regular :data:`COMMENT` tokens only when
+   :func:`ast.parse` is invoked with ``type_comments=True``.
+
+.. data:: TYPE_COMMENT
+
+   Token value indicating that a type comment was recognized.
+   Such tokens are produced instead of regular :data:`COMMENT` tokens only when
+   :func:`ast.parse` is invoked with ``type_comments=True``.
+
+.. data:: SOFT_KEYWORD
+
+   Token value indicating a :ref:`soft keyword <soft-keywords>`.
+
+   The tokenizer never produces this value.
+   To check for a soft keyword, pass a :data:`NAME` token's string to
+   :func:`keyword.issoftkeyword`.
+
+.. data:: ERRORTOKEN
+
+   Token value used to indicate wrong input.
+
+   The :mod:`tokenize` module generally indicates errors by
+   raising exceptions instead of emitting this token.
+   It can also emit tokens such as :data:`OP` or :data:`NAME` with strings that
+   are later rejected by the parser.
+
+
+The remaining tokens represent specific :ref:`operators` and :ref:`delimiters`.
+(The :mod:`tokenize` module reports these as :data:`OP`; see ``exact_type``
+in the :mod:`tokenize` documentation for details.)
 
 .. include:: token-list.inc
 
+
+The following are non-token constants:
+
 .. data:: N_TOKENS
 
-.. data:: NT_OFFSET
+   The number of token types defined in this module.
+
+.. NT_OFFSET is deliberately undocumented; if you need it you should be
+   reading the source
 
 .. data:: EXACT_TOKEN_TYPES
 
@@ -190,6 +208,9 @@ and :ref:`delimiters`:
    to support parsing older Python versions for :func:`ast.parse` with
    ``feature_version`` set to 6 or lower).
 
+.. versionchanged:: 3.12
+   Added :data:`EXCLAMATION`.
+
 .. versionchanged:: 3.13
    Removed :data:`!AWAIT` and :data:`!ASYNC` tokens again.
 

From e1e498b0b4c0810821321ceeab7cd156d68849b2 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 19 Feb 2025 17:38:57 +0100
Subject: [PATCH 05/12] Fixups

---
 Doc/library/token.rst | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/Doc/library/token.rst b/Doc/library/token.rst
index 5fe3bec4e9b1f8..de01fc020197c1 100644
--- a/Doc/library/token.rst
+++ b/Doc/library/token.rst
@@ -55,7 +55,8 @@ The token constants are:
 
 .. data:: STRING
 
-   Token value that indicates a :ref:`string or byte literal <strings>`.
+   Token value that indicates a :ref:`string or byte literal <strings>`,
+   excluding :ref:`f-strings <f-strings>`.
    The token string is not interpreted: it includes the prefix (if any)
    and the quote characters; escape sequences are included with their
    initial backslash.
@@ -94,29 +95,32 @@ The token constants are:
 
 .. data:: FSTRING_START
 
+   Token value used to indicate the beginning of a
+   :ref:`f-string <f-strings>`.
+
    .. impl-detail::
 
-      Token value used to indicate the beginning of a
-      :ref:`f-string <f-strings>`.
       The token string includes the prefix and the opening quote, but none
       of the contents of the literal.
 
 .. data:: FSTRING_MIDDLE
 
-   .. impl-detail::
+   Token value used for literal text inside an :ref:`f-string <f-strings>`,
+   including format specifications.
 
-      Token value used for literal text inside an :ref:`f-string <f-strings>`,
-      including format specifications.
+   .. impl-detail::
 
       Replacement fields (that is, the non-literal parts of f-strings) use
-      the same tokens as other expressions, and are delimited by :data:`LBRACE`
-      and :data:`RBRACE` tokens.
+      the same tokens as other expressions, and are delimited by
+      :data:`LBRACE`, :data:`RBRACE`, :data:`EXCLAMATION` and :data:`COLON`
+      tokens.
 
 .. data:: FSTRING_END
 
+   Token value used to indicate the end of a :ref:`f-string <f-strings>`.
+
    .. impl-detail::
 
-      Token value used to indicate the end of a :ref:`f-string <f-strings>`.
       The token string contains the closing quote.
 
 .. data:: ENDMARKER
@@ -169,14 +173,15 @@ and are defined for special uses in the tokenizer or parser:
    are later rejected by the parser.
 
 
-The remaining tokens represent specific :ref:`operators` and :ref:`delimiters`.
+The remaining tokens represent specific :ref:`operators <operators>` and
+:ref:`delimiters <delimiters>`.
 (The :mod:`tokenize` module reports these as :data:`OP`; see ``exact_type``
 in the :mod:`tokenize` documentation for details.)
 
 .. include:: token-list.inc
 
 
-The following are non-token constants:
+The following non-token constants are provided:
 
 .. data:: N_TOKENS
 

From 8bbbb0f8e5c2f802e3d3c1cb7aac38d039c3ae01 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 19 Feb 2025 17:42:39 +0100
Subject: [PATCH 06/12] Correct directive name

Co-authored-by: Blaise Pabon <blaise@gmail.com>
---
 Doc/reference/toplevel_components.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/reference/toplevel_components.rst b/Doc/reference/toplevel_components.rst
index a5fd320c2cb308..48fd84e559e66d 100644
--- a/Doc/reference/toplevel_components.rst
+++ b/Doc/reference/toplevel_components.rst
@@ -107,7 +107,7 @@ Expression input
 :func:`eval` is used for expression input.  It ignores leading whitespace. The
 string argument to :func:`eval` must have the following form:
 
-.. productionlist::
+.. grammar-snippet::
    :group: python-grammar
 
    eval: `expressions` NEWLINE* ENDMARKER

From eed407e00df75f5c8eac4072551612a619e9d81a Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 19 Feb 2025 18:03:37 +0100
Subject: [PATCH 07/12] Don't use the Gather syntax

Co-authored-by: Blaise Pabon <blaise@gmail.com>
---
 Doc/reference/toplevel_components.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/reference/toplevel_components.rst b/Doc/reference/toplevel_components.rst
index 48fd84e559e66d..786eacbd013df9 100644
--- a/Doc/reference/toplevel_components.rst
+++ b/Doc/reference/toplevel_components.rst
@@ -111,4 +111,4 @@ string argument to :func:`eval` must have the following form:
    :group: python-grammar
 
    eval: `expressions` NEWLINE* ENDMARKER
-   expressions: ','.`expression`+ [',']
+   expressions: `expression` (',' `expression` )* [',']

From 0dd236f25a3c27a1eccd99fe31fe27257254925d Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 26 Feb 2025 16:10:08 +0100
Subject: [PATCH 08/12] Revert some changes to the toplevel_components

---
 Doc/reference/toplevel_components.rst | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/Doc/reference/toplevel_components.rst b/Doc/reference/toplevel_components.rst
index 786eacbd013df9..9e8579b1bda69f 100644
--- a/Doc/reference/toplevel_components.rst
+++ b/Doc/reference/toplevel_components.rst
@@ -69,7 +69,7 @@ All input read from non-interactive files has the same form:
 .. grammar-snippet::
    :group: python-grammar
 
-   file: `statement`* ENDMARKER
+   file_input: (`statement` | NEWLINE)* ENDMARKER
 
 This syntax is used in the following situations:
 
@@ -90,7 +90,7 @@ Input in interactive mode is parsed using the following grammar:
 .. grammar-snippet::
    :group: python-grammar
 
-   interactive: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE | ENDMARKER
+   interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE | ENDMARKER
 
 Note that a (top-level) compound statement must be followed by a blank line in
 interactive mode; this is needed to help the parser detect the end of the input.
@@ -110,5 +110,4 @@ string argument to :func:`eval` must have the following form:
 .. grammar-snippet::
    :group: python-grammar
 
-   eval: `expressions` NEWLINE* ENDMARKER
-   expressions: `expression` (',' `expression` )* [',']
+   eval_input: `expression_list` NEWLINE* ENDMARKER

From dca268ed98cbc8ae1278d6dfede0efb86cbc8c5f Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 26 Feb 2025 16:11:13 +0100
Subject: [PATCH 09/12] Revert an order change

---
 Doc/reference/toplevel_components.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Doc/reference/toplevel_components.rst b/Doc/reference/toplevel_components.rst
index 9e8579b1bda69f..bd64b1c08bd1ff 100644
--- a/Doc/reference/toplevel_components.rst
+++ b/Doc/reference/toplevel_components.rst
@@ -69,7 +69,7 @@ All input read from non-interactive files has the same form:
 .. grammar-snippet::
    :group: python-grammar
 
-   file_input: (`statement` | NEWLINE)* ENDMARKER
+   file_input: (NEWLINE | `statement`)* ENDMARKER
 
 This syntax is used in the following situations:
 

From e02ced8eadfa4edad70130c76c7597acc2723b14 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Thu, 27 Feb 2025 11:59:42 +0100
Subject: [PATCH 10/12] Apply suggestions from code review

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
---
 Doc/library/token-list.inc    |  1 +
 Doc/library/token.rst         | 31 +++++++++++++++----------------
 Tools/build/generate_token.py |  1 +
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/Doc/library/token-list.inc b/Doc/library/token-list.inc
index 357638aed2714a..655758c4a400cc 100644
--- a/Doc/library/token-list.inc
+++ b/Doc/library/token-list.inc
@@ -1,6 +1,7 @@
 .. Auto-generated by Tools/build/generate_token.py
 
 .. list-table::
+   :align: left
    :header-rows: 1
 
    * - Token
diff --git a/Doc/library/token.rst b/Doc/library/token.rst
index de01fc020197c1..a94d45c5b8b36a 100644
--- a/Doc/library/token.rst
+++ b/Doc/library/token.rst
@@ -47,7 +47,7 @@ The token constants are:
 .. data:: NAME
 
    Token value that indicates an :ref:`identifier <identifiers>`.
-   Note that keywords are also identifiers.
+   Note that keywords are also initially tokenized an ``NAME`` tokens.
 
 .. data:: NUMBER
 
@@ -56,10 +56,10 @@ The token constants are:
 .. data:: STRING
 
    Token value that indicates a :ref:`string or byte literal <strings>`,
-   excluding :ref:`f-strings <f-strings>`.
-   The token string is not interpreted: it includes the prefix (if any)
-   and the quote characters; escape sequences are included with their
-   initial backslash.
+   excluding :ref:`formatted string literals <f-strings>`.
+   The token string is not interpreted:
+   it includes the surrounding quotation marks and the prefix (if given);
+   backslashes are included literally, without processing escape sequences.
 
 .. data:: OP
 
@@ -74,8 +74,7 @@ The token constants are:
 
 .. data:: NEWLINE
 
-   Token value that indicates the end of a :ref:`logical line <logical-lines>`
-   of Python code.
+   Token value that indicates the end of a :ref:`logical line <logical-lines>`.
 
 .. data:: NL
 
@@ -95,17 +94,17 @@ The token constants are:
 
 .. data:: FSTRING_START
 
-   Token value used to indicate the beginning of a
-   :ref:`f-string <f-strings>`.
+   Token value used to indicate the beginning of an
+   :ref:`f-string literal <f-strings>`.
 
    .. impl-detail::
 
-      The token string includes the prefix and the opening quote, but none
+      The token string includes the prefix and the opening quote(s), but none
       of the contents of the literal.
 
 .. data:: FSTRING_MIDDLE
 
-   Token value used for literal text inside an :ref:`f-string <f-strings>`,
+   Token value used for literal text inside an :ref:`f-string literal <f-strings>`,
    including format specifications.
 
    .. impl-detail::
@@ -121,7 +120,7 @@ The token constants are:
 
    .. impl-detail::
 
-      The token string contains the closing quote.
+      The token string contains the closing quote(s).
 
 .. data:: ENDMARKER
 
@@ -146,14 +145,14 @@ and are defined for special uses in the tokenizer or parser:
 .. data:: TYPE_IGNORE
 
    Token value indicating that a ``type: ignore`` comment was recognized.
-   Such tokens are produced instead of regular :data:`COMMENT` tokens only when
-   :func:`ast.parse` is invoked with ``type_comments=True``.
+   Such tokens are produced instead of regular :data:`COMMENT` tokens only
+   with the :data:`~ast.PyCF_TYPE_COMMENTS` flag.
 
 .. data:: TYPE_COMMENT
 
    Token value indicating that a type comment was recognized.
-   Such tokens are produced instead of regular :data:`COMMENT` tokens only when
-   :func:`ast.parse` is invoked with ``type_comments=True``.
+   Such tokens are produced instead of regular :data:`COMMENT` tokens only
+   with the :data:`~ast.PyCF_TYPE_COMMENTS` flag.
 
 .. data:: SOFT_KEYWORD
 
diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py
index e25837eb715bc0..5577998029e3cb 100755
--- a/Tools/build/generate_token.py
+++ b/Tools/build/generate_token.py
@@ -208,6 +208,7 @@ def make_c(infile, outfile='Parser/token.c'):
 .. {AUTO_GENERATED_BY_SCRIPT}
 
 .. list-table::
+   :align: left
    :header-rows: 1
 
    * - Token

From 02fff75da34c460c58ea0b6cba96590192bc9792 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 5 Mar 2025 16:31:55 +0100
Subject: [PATCH 11/12] Add a note and reword OP docs

---
 Doc/library/token.rst | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/Doc/library/token.rst b/Doc/library/token.rst
index a94d45c5b8b36a..24455b1ef77893 100644
--- a/Doc/library/token.rst
+++ b/Doc/library/token.rst
@@ -19,6 +19,10 @@ change between Python versions.
 The module also provides a mapping from numeric codes to names and some
 functions.  The functions mirror definitions in the Python C header files.
 
+Note that a token's value may depend on tokenizer options. For example, a
+``"+"`` token may be reported as either :data:`PLUS` or :data:`OP`, or
+a ``"match"`` token may be either :data:`NAME` or :data:`SOFT_KEYWORD`.
+
 
 .. data:: tok_name
 
@@ -63,9 +67,14 @@ The token constants are:
 
 .. data:: OP
 
-   A generic token value returned by the :mod:`tokenize` module for
-   :ref:`operators <operators>` and :ref:`delimiters <delimiters>`.
-   See the :mod:`tokenize` module documentation for details.
+   A generic token value that indicates an
+   :ref:`operator <operators>` or :ref:`delimiter <delimiters>`.
+
+   .. impl-detail::
+
+      This value is only reported by the :mod:`tokenize` module.
+      Internally, the tokenizer uses
+      :ref:`exact token types <token_operators_delimiters>` instead.
 
 .. data:: COMMENT
 
@@ -172,6 +181,8 @@ and are defined for special uses in the tokenizer or parser:
    are later rejected by the parser.
 
 
+.. _token_operators_delimiters:
+
 The remaining tokens represent specific :ref:`operators <operators>` and
 :ref:`delimiters <delimiters>`.
 (The :mod:`tokenize` module reports these as :data:`OP`; see ``exact_type``

From 9b6eb2abd69d1281717390c6e8431c125ffb6b3c Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Wed, 5 Mar 2025 18:07:26 +0100
Subject: [PATCH 12/12] Apply suggestions from code review

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
---
 Tools/build/generate_token.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/Tools/build/generate_token.py b/Tools/build/generate_token.py
index 5577998029e3cb..a5f9828c466eda 100755
--- a/Tools/build/generate_token.py
+++ b/Tools/build/generate_token.py
@@ -234,11 +234,10 @@ def make_rst(infile, outfile='Doc/library/token-list.inc',
 
     has_handwritten_doc = set()
     with open(rstfile) as fileobj:
-        tokendef_re = re.compile(r'.. data:: (\w+)')
+        tokendef_re = re.compile(r'.. data:: ([0-9A-Z_]+)\s*')
         for line in fileobj:
-            if match := tokendef_re.fullmatch(line.strip()):
-                if match[1].isupper():
-                    has_handwritten_doc.add(match[1])
+            if match := tokendef_re.fullmatch(line):
+                has_handwritten_doc.add(match[1])
 
     # Exclude non-token constants in token.py
     has_handwritten_doc -= {'N_TOKENS', 'NT_OFFSET', 'EXACT_TOKEN_TYPES'}