梨视频,电影天堂,22图床的爬虫
This commit is contained in:
parent
67b20ba291
commit
aef2ea236f
48
python/spider/Movie/22tc.py
Normal file
48
python/spider/Movie/22tc.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
import re # 正则表达式
|
||||||
|
import requests # 获取网页
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
##拿到发现图片页面信息
|
||||||
|
|
||||||
|
domian = "https://tc.lsy22.com/explore"
|
||||||
|
|
||||||
|
i = 1
|
||||||
|
|
||||||
|
UA = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"}
|
||||||
|
|
||||||
|
param = {"page": i,
|
||||||
|
"seek": "2023-10-20+13%3A40%3A23.5pOu"}
|
||||||
|
|
||||||
|
resp = requests.get(domian, headers=UA)
|
||||||
|
|
||||||
|
page = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
|
||||||
|
list = page.find_all("div", class_="list-item fixed-size c8 gutter-margin-right-bottom")
|
||||||
|
|
||||||
|
resp.close()
|
||||||
|
|
||||||
|
href = []
|
||||||
|
##获取子页面
|
||||||
|
for item in list:
|
||||||
|
href.append(item.find("div", class_="list-item-image fixed-size").find("a").get("href"))
|
||||||
|
j = 0
|
||||||
|
## 下载图片
|
||||||
|
for i in range(len(list)):
|
||||||
|
j += 1
|
||||||
|
##获取链接
|
||||||
|
resp = requests.get(href[i], headers=UA)
|
||||||
|
page = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
image = page.find("div", class_="content-width margin-top-10").find("div",
|
||||||
|
class_="header header-content margin-bottom-10").find(
|
||||||
|
"div", class_="header-content-right").find("a").get("href")
|
||||||
|
##下载图片
|
||||||
|
img_resp = requests.get(image, headers=UA)
|
||||||
|
img_name = r"image\\"+str(j)+"."+image.split(".")[-1]
|
||||||
|
img_file =open(img_name, "wb")
|
||||||
|
img_file.write(img_resp.content)
|
||||||
|
##关闭下载
|
||||||
|
print("成功",j,"次")
|
||||||
|
img_file.close()
|
||||||
|
img_file.close()
|
||||||
|
resp.close()
|
56
python/spider/Movie/dianyingtiantang.py
Normal file
56
python/spider/Movie/dianyingtiantang.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import re # 正则表达式
|
||||||
|
import requests # 获取网页
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings("ignore")#去除443警告
|
||||||
|
|
||||||
|
file = open("电影天堂.txt","a+",encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
index = 1
|
||||||
|
|
||||||
|
domain = "https://www.dyttcn.com"
|
||||||
|
|
||||||
|
|
||||||
|
UA = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"}
|
||||||
|
|
||||||
|
rules1 = re.compile(r'<a href="(?P<url>.*?.html)" class="ulink" title="\d+年.*?片《.*?》.*?">.*?</a>')
|
||||||
|
|
||||||
|
|
||||||
|
data1 = []
|
||||||
|
|
||||||
|
for index in range(1, 16):
|
||||||
|
class_typical = f"/jingdiandapian/list_18_{index}.html"
|
||||||
|
resp1 = requests.get(domain + class_typical, headers=UA, verify=False)
|
||||||
|
resp1.close()
|
||||||
|
resp1.encoding = "gb2312"
|
||||||
|
data1.append(rules1.findall(resp1.text))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
rules2 = re.compile(r'.*?◎片 名 (?P<name>.*?)</p>'
|
||||||
|
r'.*?◎年 代 (?P<year>\d+)</p>'
|
||||||
|
r'.*?◎类 别 (?P<class>.*?)</p>'
|
||||||
|
r'.*?<a href="(?P<url>magnet:\?xt=urn:btih:.*?")>',re.S)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for i in data1:
|
||||||
|
for j in i:
|
||||||
|
if type(j) == str:
|
||||||
|
resp2 = requests.get(domain + j, headers=UA, verify=False)
|
||||||
|
elif type(j) == list:
|
||||||
|
resp2 = requests.get(domain + j[0], headers=UA, verify=False)
|
||||||
|
print(j)
|
||||||
|
resp2.close()
|
||||||
|
resp2.encoding = "gb2312"
|
||||||
|
data2 = rules2.findall(resp2.text)
|
||||||
|
if len(data2) > 0:
|
||||||
|
print(str(data2))
|
||||||
|
file.write(str(data2))
|
||||||
|
file.write('\n')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
file.close()
|
32
python/spider/Movie/lishiping.py
Normal file
32
python/spider/Movie/lishiping.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import re
|
||||||
|
import requests
|
||||||
|
|
||||||
|
url = "https://www.pearvideo.com/video_1792985"
|
||||||
|
|
||||||
|
conID = url.split("_")[1]
|
||||||
|
videourl = f"https://www.pearvideo.com/videoStatus.jsp?contId={conID}"
|
||||||
|
|
||||||
|
header = {"Referer": url,
|
||||||
|
"User-Agent":
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = requests.get(videourl, headers=header)
|
||||||
|
|
||||||
|
dic = resp.json()
|
||||||
|
|
||||||
|
srcUrl = dic["videoInfo"]["videos"]["srcUrl"]
|
||||||
|
|
||||||
|
systemTime = dic["systemTime"]
|
||||||
|
|
||||||
|
downloadurl = srcUrl.replace(systemTime,f"cont-{conID}")
|
||||||
|
|
||||||
|
|
||||||
|
url =requests.get(downloadurl, headers=header)
|
||||||
|
|
||||||
|
file = open("lsp.mp4","wb+")
|
||||||
|
|
||||||
|
file.write(url.content)
|
||||||
|
|
||||||
|
file.close()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user