Skip to content

Commit 4322026

Browse files
vintaclaude
andcommitted
refactor: parse thematic groups from README bold markers instead of hardcoding them
The website builder previously relied on a hardcoded SECTION_GROUPS list in build.py to organize categories into thematic groups. This was fragile: any rename or addition to README.md required a matching code change. Replace this with a parser-driven approach: - readme_parser.py now detects bold-only paragraphs (**Group Name**) as group boundary markers and groups H2 categories beneath them into ParsedGroup structs. - build.py drops SECTION_GROUPS entirely; group_categories() now just passes parsed groups through and appends the Resources group. - sort.py is removed as it relied on the old flat section model. - Tests updated throughout to reflect the new (groups, resources) return shape and to cover the new grouping logic. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent fd9b266 commit 4322026

File tree

5 files changed

+346
-324
lines changed

5 files changed

+346
-324
lines changed

sort.py

Lines changed: 0 additions & 83 deletions
This file was deleted.

website/build.py

Lines changed: 11 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -10,179 +10,20 @@
1010
from jinja2 import Environment, FileSystemLoader
1111
from readme_parser import parse_readme, slugify
1212

13-
# Thematic grouping of categories. Each category name must match exactly
14-
# as it appears in README.md (the ## heading text).
15-
SECTION_GROUPS: list[tuple[str, list[str]]] = [
16-
(
17-
"Web & API",
18-
[
19-
"Admin Panels",
20-
"CMS",
21-
"Email",
22-
"Static Site Generator",
23-
"URL Manipulation",
24-
"Web Frameworks",
25-
"RESTful API",
26-
"GraphQL",
27-
"WebSocket",
28-
"ASGI Servers",
29-
"WSGI Servers",
30-
"HTTP Clients",
31-
"Template Engine",
32-
"Web Asset Management",
33-
"Web Content Extracting",
34-
"Web Crawling",
35-
],
36-
),
37-
(
38-
"AI & ML",
39-
[
40-
"AI and Agents",
41-
"Machine Learning",
42-
"Deep Learning",
43-
"Computer Vision",
44-
"Natural Language Processing",
45-
"Recommender Systems",
46-
"Robotics",
47-
],
48-
),
49-
(
50-
"Data & Science",
51-
[
52-
"Data Analysis",
53-
"Data Validation",
54-
"Data Visualization",
55-
"Geolocation",
56-
"Science",
57-
"Quantum Computing",
58-
],
59-
),
60-
(
61-
"DevOps & Infrastructure",
62-
[
63-
"DevOps Tools",
64-
"Distributed Computing",
65-
"Task Queues",
66-
"Job Scheduler",
67-
"Serverless Frameworks",
68-
"Logging",
69-
"Processes",
70-
"Shell",
71-
"Network Virtualization",
72-
"RPC Servers",
73-
],
74-
),
75-
(
76-
"Database & Storage",
77-
[
78-
"Database",
79-
"Database Drivers",
80-
"ORM",
81-
"Caching",
82-
"Search",
83-
"Serialization",
84-
],
85-
),
86-
(
87-
"Development Tools",
88-
[
89-
"Testing",
90-
"Debugging Tools",
91-
"Code Analysis",
92-
"Build Tools",
93-
"Algorithms and Design Patterns",
94-
"Refactoring",
95-
"Documentation",
96-
"Editor Plugins and IDEs",
97-
"Interactive Interpreter",
98-
],
99-
),
100-
(
101-
"CLI & GUI",
102-
[
103-
"Command-line Interface Development",
104-
"Command-line Tools",
105-
"GUI Development",
106-
],
107-
),
108-
(
109-
"Content & Media",
110-
[
111-
"Audio",
112-
"Video",
113-
"Game Development",
114-
"Image Processing",
115-
"Internationalization",
116-
"HTML Manipulation",
117-
"Text Processing",
118-
"Specific Formats Processing",
119-
"File Manipulation",
120-
"Downloader",
121-
],
122-
),
123-
(
124-
"System & Runtime",
125-
[
126-
"Asynchronous Programming",
127-
"Environment Management",
128-
"Package Management",
129-
"Package Repositories",
130-
"Date and Time",
131-
"Distribution",
132-
"Hardware",
133-
"Implementations",
134-
"Microsoft Windows",
135-
"Built-in Classes Enhancement",
136-
"Functional Programming",
137-
"Configuration Files",
138-
],
139-
),
140-
(
141-
"Security & Auth",
142-
[
143-
"Authentication",
144-
"Cryptography",
145-
"Penetration Testing",
146-
"Permissions",
147-
],
148-
),
149-
("Resources", []), # Filled dynamically from parsed resources
150-
]
151-
15213

15314
def group_categories(
154-
categories: list[dict],
15+
parsed_groups: list[dict],
15516
resources: list[dict],
15617
) -> list[dict]:
157-
"""Organize categories and resources into thematic section groups."""
158-
cat_by_name = {c["name"]: c for c in categories}
159-
groups = []
160-
grouped_names: set[str] = set()
161-
162-
for group_name, cat_names in SECTION_GROUPS:
163-
grouped_names.update(cat_names)
164-
if group_name == "Resources":
165-
group_cats = list(resources)
166-
else:
167-
group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
168-
169-
if group_cats:
170-
groups.append(
171-
{
172-
"name": group_name,
173-
"slug": slugify(group_name),
174-
"categories": group_cats,
175-
}
176-
)
18+
"""Combine parsed groups with resources for template rendering."""
19+
groups = list(parsed_groups)
17720

178-
# Any categories not in a group go into "Other"
179-
ungrouped = [c for c in categories if c["name"] not in grouped_names]
180-
if ungrouped:
21+
if resources:
18122
groups.append(
18223
{
183-
"name": "Other",
184-
"slug": "other",
185-
"categories": ungrouped,
24+
"name": "Resources",
25+
"slug": slugify("Resources"),
26+
"categories": list(resources),
18627
}
18728
)
18829

@@ -295,11 +136,11 @@ def build(repo_root: str) -> None:
295136
subtitle = stripped
296137
break
297138

298-
categories, resources = parse_readme(readme_text)
299-
# All fields pre-computed: entry_count, content_html, preview, description
139+
parsed_groups, resources = parse_readme(readme_text)
300140

141+
categories = [cat for g in parsed_groups for cat in g["categories"]]
301142
total_entries = sum(c["entry_count"] for c in categories)
302-
groups = group_categories(categories, resources)
143+
groups = group_categories(parsed_groups, resources)
303144
entries = extract_entries(categories, groups)
304145

305146
stars_data = load_stars(website / "data" / "github_stars.json")
@@ -344,7 +185,7 @@ def build(repo_root: str) -> None:
344185

345186
shutil.copy(repo / "README.md", site_dir / "llms.txt")
346187

347-
print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
188+
print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories + {len(resources)} resources")
348189
print(f"Total entries: {total_entries}")
349190
print(f"Output: {site_dir}")
350191

0 commit comments

Comments
 (0)