From aef2ea236fe8febba67f281b5e86733b5848bd69 Mon Sep 17 00:00:00 2001 From: lsy2246 Date: Sun, 24 Mar 2024 23:20:16 +0800 Subject: [PATCH] =?UTF-8?q?=E6=A2=A8=E8=A7=86=E9=A2=91,=E7=94=B5=E5=BD=B1?= =?UTF-8?q?=E5=A4=A9=E5=A0=82,22=E5=9B=BE=E5=BA=8A=E7=9A=84=E7=88=AC?= =?UTF-8?q?=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/spider/Movie/22tc.py | 48 +++++++++++++++++++++ python/spider/Movie/dianyingtiantang.py | 56 +++++++++++++++++++++++++ python/spider/{ => Movie}/douban.py | 0 python/spider/Movie/lishiping.py | 32 ++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 python/spider/Movie/22tc.py create mode 100644 python/spider/Movie/dianyingtiantang.py rename python/spider/{ => Movie}/douban.py (100%) create mode 100644 python/spider/Movie/lishiping.py diff --git a/python/spider/Movie/22tc.py b/python/spider/Movie/22tc.py new file mode 100644 index 0000000..022011a --- /dev/null +++ b/python/spider/Movie/22tc.py @@ -0,0 +1,48 @@ +import re # 正则表达式 +import requests # 获取网页 +from bs4 import BeautifulSoup + +##拿到发现图片页面信息 + +domian = "https://tc.lsy22.com/explore" + +i = 1 + +UA = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"} + +param = {"page": i, + "seek": "2023-10-20+13%3A40%3A23.5pOu"} + +resp = requests.get(domian, headers=UA) + +page = BeautifulSoup(resp.text, "html.parser") + +list = page.find_all("div", class_="list-item fixed-size c8 gutter-margin-right-bottom") + +resp.close() + +href = [] +##获取子页面 +for item in list: + href.append(item.find("div", class_="list-item-image fixed-size").find("a").get("href")) +j = 0 +## 下载图片 +for i in range(len(list)): + j += 1 + ##获取链接 + resp = requests.get(href[i], headers=UA) + page = BeautifulSoup(resp.text, "html.parser") + image = page.find("div", class_="content-width margin-top-10").find("div", + class_="header header-content margin-bottom-10").find( + "div", class_="header-content-right").find("a").get("href") + ##下载图片 + img_resp = requests.get(image, headers=UA) + img_name = r"image\\"+str(j)+"."+image.split(".")[-1] + img_file =open(img_name, "wb") + img_file.write(img_resp.content) + ##关闭下载 + print("成功",j,"次") + img_file.close() + img_file.close() + resp.close() diff --git a/python/spider/Movie/dianyingtiantang.py b/python/spider/Movie/dianyingtiantang.py new file mode 100644 index 0000000..89bebbd --- /dev/null +++ b/python/spider/Movie/dianyingtiantang.py @@ -0,0 +1,56 @@ +import re # 正则表达式 +import requests # 获取网页 +import warnings +warnings.filterwarnings("ignore")#去除443警告 + +file = open("电影天堂.txt","a+",encoding="utf-8") + + +index = 1 + +domain = "https://www.dyttcn.com" + + +UA = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"} + +rules1 = re.compile(r'.*?') + + +data1 = [] + +for index in range(1, 16): + class_typical = f"/jingdiandapian/list_18_{index}.html" + resp1 = requests.get(domain + class_typical, headers=UA, verify=False) + resp1.close() + resp1.encoding = "gb2312" + data1.append(rules1.findall(resp1.text)) + + + + +rules2 = re.compile(r'.*?◎片  名 (?P.*?)

' + r'.*?◎年  代 (?P\d+)

' + r'.*?◎类  别 (?P.*?)

' + r'.*?',re.S) + + + +for i in data1: + for j in i: + if type(j) == str: + resp2 = requests.get(domain + j, headers=UA, verify=False) + elif type(j) == list: + resp2 = requests.get(domain + j[0], headers=UA, verify=False) + print(j) + resp2.close() + resp2.encoding = "gb2312" + data2 = rules2.findall(resp2.text) + if len(data2) > 0: + print(str(data2)) + file.write(str(data2)) + file.write('\n') + + + +file.close() diff --git a/python/spider/douban.py b/python/spider/Movie/douban.py similarity index 100% rename from python/spider/douban.py rename to python/spider/Movie/douban.py diff --git a/python/spider/Movie/lishiping.py b/python/spider/Movie/lishiping.py new file mode 100644 index 0000000..2986bbe --- /dev/null +++ b/python/spider/Movie/lishiping.py @@ -0,0 +1,32 @@ +import re +import requests + +url = "https://www.pearvideo.com/video_1792985" + +conID = url.split("_")[1] +videourl = f"https://www.pearvideo.com/videoStatus.jsp?contId={conID}" + +header = {"Referer": url, + "User-Agent": + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" + } + +resp = requests.get(videourl, headers=header) + +dic = resp.json() + +srcUrl = dic["videoInfo"]["videos"]["srcUrl"] + +systemTime = dic["systemTime"] + +downloadurl = srcUrl.replace(systemTime,f"cont-{conID}") + + +url =requests.get(downloadurl, headers=header) + +file = open("lsp.mp4","wb+") + +file.write(url.content) + +file.close() +