First good level of progress

This commit is contained in:
2025-06-29 12:36:25 -04:00
commit 159be1eb82
15 changed files with 10628 additions and 0 deletions

38
tools/book_downloader.py Normal file
View File

@ -0,0 +1,38 @@
# tools/book_downloader.py
import requests
import os
DATA_DIR = os.path.join("data", "books")
GUTENBERG_URL = "https://www.gutenberg.org/files/{id}/{id}-0.txt"
def download_book(gutenberg_id, title_hint="book"):
os.makedirs(DATA_DIR, exist_ok=True)
url = GUTENBERG_URL.format(id=gutenberg_id)
try:
response = requests.get(url, timeout=10)
if response.status_code != 200:
print(f"❌ Failed to download book ID {gutenberg_id}")
return
filename = os.path.join(DATA_DIR, f"{title_hint}_{gutenberg_id}.txt")
with open(filename, "w", encoding="utf-8") as f:
f.write(response.text)
print(f"✅ Saved: {filename}")
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
books = [
(1342, "PrideAndPrejudice"), # Jane Austen
(11, "AliceInWonderland"), # Lewis Carroll
(98, "AesopFables"), # Aesop
(1661, "SherlockHolmes"), # Doyle
(76, "HuckFinn") # Mark Twain
]
for gutenberg_id, name in books:
download_book(gutenberg_id, name)