Skip to content

Commit 72cabb2

Browse files
bpo-40939: Use the new grammar for the grammar specification documentation (GH-19969)
(We censor the heck out of actions and some other stuff using a custom "highlighter".) Co-authored-by: Guido van Rossum <[email protected]>
1 parent 67987ac commit 72cabb2

File tree

4 files changed

+91
-210
lines changed

4 files changed

+91
-210
lines changed

Doc/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
1717
'pyspecific', 'c_annotations', 'escape4chm',
18-
'asdl_highlight']
18+
'asdl_highlight', 'peg_highlight']
1919

2020

2121
doctest_global_setup = '''

Doc/reference/grammar.rst

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
11
Full Grammar specification
22
==========================
33

4-
This is the full Python grammar, as it is read by the parser generator and used
5-
to parse Python source files:
4+
This is the full Python grammar, derived directly from the grammar
5+
used to generate the CPython parser (see :source:`Grammar/python.gram`).
6+
The version here omits details related to code generation and
7+
error recovery.
68

7-
.. literalinclude:: ../../Grammar/Grammar
9+
The notation is a mixture of `EBNF
10+
<https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form>`_
11+
and `PEG <https://en.wikipedia.org/wiki/Parsing_expression_grammar>`_.
12+
In particular, ``&`` followed by a symbol, token or parenthesized
13+
group indicates a positive lookahead (i.e., is required to match but
14+
not consumed), while ``!`` indicates a negative lookahead (i.e., is
15+
required _not_ to match). We use the ``|`` separator to mean PEG's
16+
"ordered choice" (written as ``/`` in traditional PEG grammars).
17+
18+
.. literalinclude:: ../../Grammar/python.gram
19+
:language: peg

Doc/tools/extensions/peg_highlight.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from pygments.lexer import RegexLexer, bygroups, include
2+
from pygments.token import Comment, Generic, Keyword, Name, Operator, Punctuation, Text
3+
4+
from sphinx.highlighting import lexers
5+
6+
7+
class PEGLexer(RegexLexer):
8+
"""Pygments Lexer for PEG grammar (.gram) files
9+
10+
This lexer strips the following elements from the grammar:
11+
12+
- Meta-tags
13+
- Variable assignments
14+
- Actions
15+
- Lookaheads
16+
- Rule types
17+
- Rule options
18+
- Rules named `invalid_*` or `incorrect_*`
19+
"""
20+
21+
name = "PEG"
22+
aliases = ["peg"]
23+
filenames = ["*.gram"]
24+
_name = r"([^\W\d]\w*)"
25+
_text_ws = r"(\s*)"
26+
27+
tokens = {
28+
"ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
29+
"lookaheads": [
30+
(r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
31+
(r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
32+
(r'(?<=\|\s)(&".+"\s?)', bygroups(None)),
33+
(r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)),
34+
],
35+
"metas": [
36+
(r"(@\w+ '''(.|\n)+?''')", bygroups(None)),
37+
(r"^(@.*)$", bygroups(None)),
38+
],
39+
"actions": [(r"{(.|\n)+?}", bygroups(None)),],
40+
"strings": [
41+
(r"'\w+?'", Keyword),
42+
(r'"\w+?"', Keyword),
43+
(r"'\W+?'", Text),
44+
(r'"\W+?"', Text),
45+
],
46+
"variables": [(_name + _text_ws + "(=)", bygroups(None, None, None),),],
47+
"invalids": [
48+
(r"^(\s+\|\s+invalid_\w+\s*\n)", bygroups(None)),
49+
(r"^(\s+\|\s+incorrect_\w+\s*\n)", bygroups(None)),
50+
(r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),),
51+
],
52+
"root": [
53+
include("invalids"),
54+
include("ws"),
55+
include("lookaheads"),
56+
include("metas"),
57+
include("actions"),
58+
include("strings"),
59+
include("variables"),
60+
(r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,),
61+
(
62+
r"^\s*" + _name + "\s*" + "(\[.*\])?" + "\s*" + "(\(.+\))?" + "\s*(:)",
63+
bygroups(Name.Function, None, None, Punctuation),
64+
),
65+
(_name, Name.Function),
66+
(r"[\||\.|\+|\*|\?]", Operator),
67+
(r"{|}|\(|\)|\[|\]", Punctuation),
68+
(r".", Text),
69+
],
70+
}
71+
72+
73+
def setup(app):
74+
lexers["peg"] = PEGLexer()
75+
return {"version": "1.0", "parallel_read_safe": True}

Grammar/Grammar

Lines changed: 0 additions & 206 deletions
This file was deleted.

0 commit comments

Comments
 (0)