diff --git a/pep_sphinx_extensions/__init__.py b/pep_sphinx_extensions/__init__.py index 6a878eaf8ca..ad9d16d5da2 100644 --- a/pep_sphinx_extensions/__init__.py +++ b/pep_sphinx_extensions/__init__.py @@ -9,11 +9,9 @@ from docutils.writers.html5_polyglot import HTMLTranslator from sphinx import environment -from pep_sphinx_extensions.generate_rss import ( - create_rss_feed, - get_from_doctree, - pep_abstract, -) +from pep_sphinx_extensions.doctree import get_from_doctree +from pep_sphinx_extensions.generate_bibtex import create_bibtex_files +from pep_sphinx_extensions.generate_rss import create_rss_feed from pep_sphinx_extensions.pep_processor.html import ( pep_html_builder, pep_html_translator, @@ -51,6 +49,7 @@ def _post_build(app: Sphinx, exception: Exception | None) -> None: if "internal_builder" not in app.tags: create_index_file(Path(app.outdir), app.builder.name) create_rss_feed(app.doctreedir, app.outdir) + create_bibtex_files(app.doctreedir, app.outdir) def set_description( diff --git a/pep_sphinx_extensions/doctree.py b/pep_sphinx_extensions/doctree.py new file mode 100644 index 00000000000..b7f646ff3cb --- /dev/null +++ b/pep_sphinx_extensions/doctree.py @@ -0,0 +1,47 @@ +# This file is placed in the public domain or under the +# CC0-1.0-Universal license, whichever is more permissive. + +from __future__ import annotations + +import pickle +from pathlib import Path + +from docutils import nodes + +document_cache: dict[Path, dict[str, str]] = {} + + +def pep_abstract(document: nodes.document) -> str: + """Return the first paragraph of the PEP abstract. + If not found, return the first paragraph of the introduction. + """ + introduction = "" + for node in document.findall(nodes.section): + title_node = node.next_node(nodes.title) + if title_node is None: + continue + + if title_node.astext() == "Abstract": + if (para_node := node.next_node(nodes.paragraph)) is not None: + return para_node.astext().strip().replace("\n", " ") + return "" + if title_node.astext() == "Introduction": + introduction = node.next_node(nodes.paragraph).astext().strip().replace("\n", " ") + + return introduction + + +def get_from_doctree(full_path: Path, text: str) -> str: + """Retrieve a header value from a pickled doctree, with caching.""" + # Try and retrieve from cache + if full_path in document_cache: + return document_cache[full_path].get(text, "") + + # Else load doctree + document = pickle.loads(full_path.read_bytes()) + # Store the headers (populated in the PEPHeaders transform) + document_cache[full_path] = path_cache = document.get("headers", {}) + # Store the Abstract + path_cache["Abstract"] = pep_abstract(document) + # Return the requested key + return path_cache.get(text, "") diff --git a/pep_sphinx_extensions/generate_bibtex.py b/pep_sphinx_extensions/generate_bibtex.py new file mode 100644 index 00000000000..04915c25e75 --- /dev/null +++ b/pep_sphinx_extensions/generate_bibtex.py @@ -0,0 +1,73 @@ +# This file is placed in the public domain or under the +# CC0-1.0-Universal license, whichever is more permissive. + +from __future__ import annotations + +import re +import textwrap +from pathlib import Path + +from pep_sphinx_extensions.doctree import get_from_doctree + +# LaTeX special characters that need escaping in BibTeX values +_BIBTEX_SPECIAL = re.compile(r"([&%$#_{}~^])") +_EMAIL_ADDRESS = re.compile(r"\s*<[^>]+>") + + +def _escape_bibtex(text: str) -> str: + """Escape special BibTeX characters in a string.""" + return _BIBTEX_SPECIAL.sub(r"\\\1", text) + + +def _parse_created(created: str) -> tuple[str, str]: + """Parse a PEP 'Created' date string (e.g. '01-Jan-2020') into (year, month). + + Returns the year as a string and the BibTeX month abbreviation. + """ + _, month_abbr, year = created.split("-") + return year, month_abbr.lower() + + +def _format_authors(author_header: str) -> str: + """Format the Author header value for BibTeX. + + Strips email addresses and joins names with " and ". + """ + # Remove email addresses in angle brackets + author_header = _EMAIL_ADDRESS.sub("", author_header) + # Split on commas and clean up + authors = [name.strip() for name in author_header.split(",") if name.strip()] + return " and ".join(authors) + + +def _generate_bibtex_entry(full_path: Path) -> str: + """Generate a BibTeX entry for a single PEP from its doctree.""" + number = int(get_from_doctree(full_path, "PEP")) + created = get_from_doctree(full_path, "Created") + author = get_from_doctree(full_path, "Author") + title = get_from_doctree(full_path, "Title") + + year, month = _parse_created(created) + authors_bibtex = _escape_bibtex(_format_authors(author)) + title_escaped = _escape_bibtex(title) + + return textwrap.dedent(f"""\ + @techreport{{pep{number}, + author = "{authors_bibtex}", + title = "PEP {number} --- {title_escaped}", + institution = "Python Software Foundation", + year = "{year}", + month = {month}, + type = "PEP", + number = "{number}", + url = "https://peps.python.org/pep-{number:0>4}/", + }}""") + + +def create_bibtex_files(doctree_dir: str, output_dir: str) -> None: + """Generate a .bib file for each PEP in the output directory.""" + out = Path(output_dir) + for doctree_file in Path(doctree_dir).glob("pep-????.doctree"): + pep_name = doctree_file.stem # for example "pep-0008" + entry = _generate_bibtex_entry(doctree_file) + (out / f"{pep_name}.bib").write_text(entry + "\n", encoding="utf-8") diff --git a/pep_sphinx_extensions/generate_rss.py b/pep_sphinx_extensions/generate_rss.py index 296bb033e0e..b632fee19a1 100644 --- a/pep_sphinx_extensions/generate_rss.py +++ b/pep_sphinx_extensions/generate_rss.py @@ -4,12 +4,11 @@ from __future__ import annotations import datetime as dt -import pickle from email.utils import format_datetime, getaddresses from html import escape from pathlib import Path -from docutils import nodes +from pep_sphinx_extensions.doctree import get_from_doctree RSS_DESCRIPTION = ( "Newest Python Enhancement Proposals (PEPs): " @@ -23,24 +22,6 @@ def _format_rfc_2822(datetime: dt.datetime) -> str: return format_datetime(datetime, usegmt=True) -document_cache: dict[Path, dict[str, str]] = {} - - -def get_from_doctree(full_path: Path, text: str) -> str: - # Try and retrieve from cache - if full_path in document_cache: - return document_cache[full_path].get(text, "") - - # Else load doctree - document = pickle.loads(full_path.read_bytes()) - # Store the headers (populated in the PEPHeaders transform) - document_cache[full_path] = path_cache = document.get("headers", {}) - # Store the Abstract - path_cache["Abstract"] = pep_abstract(document) - # Return the requested key - return path_cache.get(text, "") - - def pep_creation(full_path: Path) -> dt.datetime: created_str = get_from_doctree(full_path, "Created") try: @@ -49,26 +30,6 @@ def pep_creation(full_path: Path) -> dt.datetime: return dt.datetime.min -def pep_abstract(document: nodes.document) -> str: - """Return the first paragraph of the PEP abstract. - If not found, return the first paragraph of the introduction. - """ - introduction = "" - for node in document.findall(nodes.section): - title_node = node.next_node(nodes.title) - if title_node is None: - continue - - if title_node.astext() == "Abstract": - if (para_node := node.next_node(nodes.paragraph)) is not None: - return para_node.astext().strip().replace("\n", " ") - return "" - if title_node.astext() == "Introduction": - introduction = node.next_node(nodes.paragraph).astext().strip().replace("\n", " ") - - return introduction - - def _generate_items(doctree_dir: Path): # get list of peps with creation time (from "Created:" string in pep source) peps_with_dt = sorted((pep_creation(path), path) for path in doctree_dir.glob("pep-????.doctree")) diff --git a/pep_sphinx_extensions/pep_processor/transforms/pep_footer.py b/pep_sphinx_extensions/pep_processor/transforms/pep_footer.py index ec83ca6a737..d423b9f075f 100644 --- a/pep_sphinx_extensions/pep_processor/transforms/pep_footer.py +++ b/pep_sphinx_extensions/pep_processor/transforms/pep_footer.py @@ -50,6 +50,7 @@ def apply(self) -> None: self.document += nodes.transition() self.document += _add_source_link(pep_source_path) self.document += _add_commit_history_info(pep_source_path) + self.document += _add_bibtex_link(pep_source_path) def _add_source_link(pep_source_path: Path) -> nodes.paragraph: @@ -71,6 +72,13 @@ def _add_commit_history_info(pep_source_path: Path) -> nodes.paragraph: return nodes.paragraph("", "Last modified: ", link_node) +def _add_bibtex_link(pep_source_path: Path) -> nodes.paragraph: + """Add link to download BibTeX citation.""" + bib_url = f"{pep_source_path.stem}.bib" + link_node = nodes.reference("", "BibTeX", refuri=bib_url) + return nodes.paragraph("", "Cite: ", link_node) + + def _get_last_modified_timestamps(): # get timestamps and changed files from all commits (without paging results) args = ("git", "--no-pager", "log", "--format=#%at", "--name-only") diff --git a/pep_sphinx_extensions/tests/test_generate_bibtex.py b/pep_sphinx_extensions/tests/test_generate_bibtex.py new file mode 100644 index 00000000000..099539ff385 --- /dev/null +++ b/pep_sphinx_extensions/tests/test_generate_bibtex.py @@ -0,0 +1,150 @@ +from pathlib import Path +from unittest.mock import patch + +import pytest + +from pep_sphinx_extensions.generate_bibtex import ( + _escape_bibtex, + _format_authors, + _generate_bibtex_entry, + _parse_created, + create_bibtex_files, +) + +MOCK_TARGET = "pep_sphinx_extensions.generate_bibtex.get_from_doctree" + +PEP_8_HEADERS = { + "PEP": "8", + "Title": "Style Guide for Python Code", + "Author": "Guido van Rossum, Barry Warsaw, Alyssa Coghlan", + "Created": "05-Jul-2001", +} + + +def _mock_doctree(headers: dict[str, str]): + """Return a mock get_from_doctree that returns values from headers dict.""" + return lambda full_path, text: headers.get(text, "") + + +@pytest.mark.parametrize( + ("text", "expected"), + [ + ("Hello World", "Hello World"), + ("Tom & Jerry", r"Tom \& Jerry"), + ("100%", r"100\%"), + ("$x$", r"\$x\$"), + ("C#", r"C\#"), + ("snake_case", r"snake\_case"), + ("{}", r"\{\}"), + ("~tilde", r"\~tilde"), + ("no specials", "no specials"), + ], +) +def test_escape_bibtex(text: str, expected: str) -> None: + assert _escape_bibtex(text) == expected + + +@pytest.mark.parametrize( + ("created", "expected"), + [ + ("01-Jan-1990", ("1990", "jan")), + ("15-Sep-2021", ("2021", "sep")), + ("28-Feb-2000", ("2000", "feb")), + ], +) +def test_parse_created(created: str, expected: tuple[str, str]) -> None: + assert _parse_created(created) == expected + + +@pytest.mark.parametrize( + ("author_header", "expected"), + [ + ("Cardinal Ximénez", "Cardinal Ximénez"), + ( + "Cardinal Ximénez ," + " Cardinal Biggles ", + "Cardinal Ximénez and Cardinal Biggles", + ), + ( + "Cardinal Ximénez,\n Cardinal Biggles", + "Cardinal Ximénez and Cardinal Biggles", + ), + ( + "Cardinal Ximénez, Cardinal Biggles, Cardinal Fang", + "Cardinal Ximénez and Cardinal Biggles and Cardinal Fang", + ), + ], +) +def test_format_authors(author_header: str, expected: str) -> None: + assert _format_authors(author_header) == expected + + +def test_generate_bibtex_entry() -> None: + # Arrange / Act + with patch(MOCK_TARGET, _mock_doctree(PEP_8_HEADERS)): + result = _generate_bibtex_entry(Path("pep-0008.doctree")) + + # Assert + assert "@techreport{pep8," in result + assert 'author = "Guido van Rossum and Barry Warsaw and Alyssa Coghlan"' in result + assert 'title = "PEP 8 --- Style Guide for Python Code"' in result + assert 'year = "2001"' in result + assert "month = jul," in result + assert 'number = "8"' in result + assert 'url = "https://peps.python.org/pep-0008/"' in result + + +def test_generate_bibtex_entry_title_escaped() -> None: + # Arrange + headers = {**PEP_8_HEADERS, "PEP": "999", "Title": "Use of $ & % in PEPs"} + + # Act + with patch(MOCK_TARGET, _mock_doctree(headers)): + result = _generate_bibtex_entry(Path("pep-0999.doctree")) + + # Assert + assert r"Use of \$ \& \% in PEPs" in result + + +def test_generate_bibtex_entry_author_escaped() -> None: + # Arrange + headers = {**PEP_8_HEADERS, "Author": "Tom & Jerry "} + + # Act + with patch(MOCK_TARGET, _mock_doctree(headers)): + result = _generate_bibtex_entry(Path("pep-0008.doctree")) + + # Assert + assert r"Tom \& Jerry" in result + + +def test_create_bibtex_files(tmp_path: Path) -> None: + # Arrange + doctree_dir = tmp_path / "doctrees" + doctree_dir.mkdir() + output_dir = tmp_path / "output" + output_dir.mkdir() + (doctree_dir / "pep-0008.doctree").touch() + + # Act + with patch(MOCK_TARGET, _mock_doctree(PEP_8_HEADERS)): + create_bibtex_files(str(doctree_dir), str(output_dir)) + + # Assert + bib = (output_dir / "pep-0008.bib").read_text() + assert "@techreport{pep8," in bib + assert 'author = "Guido van Rossum and Barry Warsaw and Alyssa Coghlan"' in bib + + +def test_create_bibtex_files_no_doctrees(tmp_path: Path) -> None: + # Arrange + doctree_dir = tmp_path / "doctrees" + doctree_dir.mkdir() + output_dir = tmp_path / "output" + output_dir.mkdir() + + # Act + create_bibtex_files(str(doctree_dir), str(output_dir)) + + # Assert + assert list(output_dir.glob("*.bib")) == []