小说爬虫
This commit is contained in:
parent
8f5539fc74
commit
63859b11a4
47
python/spider/book/biqg.cc.py
Normal file
47
python/spider/book/biqg.cc.py
Normal file
@ -0,0 +1,47 @@
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
import aiohttp
|
||||
import aiofiles
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor # 多线程/多进程
|
||||
|
||||
|
||||
async def aiodownload(url):
|
||||
if not os.path.exists("xs"):
|
||||
os.mkdir("xs")
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as response:
|
||||
html = BeautifulSoup(await response.read(), "html.parser")
|
||||
title = html.find("h1", class_="wap_none").text
|
||||
content = html.find("div", id="chaptercontent").text
|
||||
if os.name == 'posix':
|
||||
name = "xs\\" + title.split(" ")[0] + ".txt"
|
||||
elif os.name == 'nt':
|
||||
name = "xs/" + title.split(" ")[0] + ".txt"
|
||||
content = content.replace(" ", "\n").replace(
|
||||
"请收藏本站:https://www.biqg.cc。笔趣阁手机版:https://m.biqg.cc ", "").replace("『点此报错』『加入书签』",
|
||||
"")
|
||||
async with aiofiles.open(name, "a+") as file:
|
||||
await file.write(content)
|
||||
|
||||
print(title, "成功下载")
|
||||
|
||||
|
||||
async def main(urls):
|
||||
takes = []
|
||||
for url in urls:
|
||||
take = asyncio.create_task(aiodownload(url))
|
||||
takes.append(take)
|
||||
await asyncio.wait(takes)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
urls = []
|
||||
for index in range(1, 2):
|
||||
url = f"https://www.biqg.cc/book/3670/{index}.html"
|
||||
urls.append(url)
|
||||
asyncio.run(main(urls=urls))
|
||||
print("全部下载完成")
|
||||
|
Loading…
Reference in New Issue
Block a user