39 lines
1.1 KiB
Python
39 lines
1.1 KiB
Python
# tools/book_downloader.py
|
|
|
|
import requests
|
|
import os
|
|
|
|
DATA_DIR = os.path.join("data", "books")
|
|
GUTENBERG_URL = "https://www.gutenberg.org/files/{id}/{id}-0.txt"
|
|
|
|
|
|
def download_book(gutenberg_id, title_hint="book"):
|
|
os.makedirs(DATA_DIR, exist_ok=True)
|
|
url = GUTENBERG_URL.format(id=gutenberg_id)
|
|
try:
|
|
response = requests.get(url, timeout=10)
|
|
if response.status_code != 200:
|
|
print(f"❌ Failed to download book ID {gutenberg_id}")
|
|
return
|
|
|
|
filename = os.path.join(DATA_DIR, f"{title_hint}_{gutenberg_id}.txt")
|
|
with open(filename, "w", encoding="utf-8") as f:
|
|
f.write(response.text)
|
|
|
|
print(f"✅ Saved: {filename}")
|
|
except Exception as e:
|
|
print(f"❌ Error: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
books = [
|
|
(1342, "PrideAndPrejudice"), # Jane Austen
|
|
(11, "AliceInWonderland"), # Lewis Carroll
|
|
(98, "AesopFables"), # Aesop
|
|
(1661, "SherlockHolmes"), # Doyle
|
|
(76, "HuckFinn") # Mark Twain
|
|
]
|
|
|
|
for gutenberg_id, name in books:
|
|
download_book(gutenberg_id, name)
|