practice_code/python/spider/book/biqg.cc.py
2024-03-29 00:16:12 +08:00

48 lines
1.6 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import re
import aiohttp
import aiofiles
from bs4 import BeautifulSoup
import os
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor # 多线程/多进程
async def aiodownload(url):
if not os.path.exists("xs"):
os.mkdir("xs")
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
html = BeautifulSoup(await response.read(), "html.parser")
title = html.find("h1", class_="wap_none").text
content = html.find("div", id="chaptercontent").text
if os.name == 'posix':
name = "xs\\" + title.split(" ")[0] + ".txt"
elif os.name == 'nt':
name = "xs/" + title.split(" ")[0] + ".txt"
content = content.replace("  ", "\n").replace(
"请收藏本站https://www.biqg.cc。笔趣阁手机版https://m.biqg.cc ", "").replace("『点此报错』『加入书签』",
"")
async with aiofiles.open(name, "a+") as file:
await file.write(content)
print(title, "成功下载")
async def main(urls):
takes = []
for url in urls:
take = asyncio.create_task(aiodownload(url))
takes.append(take)
await asyncio.wait(takes)
if __name__ == "__main__":
urls = []
for index in range(1, 2):
url = f"https://www.biqg.cc/book/3670/{index}.html"
urls.append(url)
asyncio.run(main(urls=urls))
print("全部下载完成")