From aef2ea236fe8febba67f281b5e86733b5848bd69 Mon Sep 17 00:00:00 2001
From: lsy2246
Date: Sun, 24 Mar 2024 23:20:16 +0800
Subject: [PATCH] =?UTF-8?q?=E6=A2=A8=E8=A7=86=E9=A2=91,=E7=94=B5=E5=BD=B1?=
=?UTF-8?q?=E5=A4=A9=E5=A0=82,22=E5=9B=BE=E5=BA=8A=E7=9A=84=E7=88=AC?=
=?UTF-8?q?=E8=99=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
python/spider/Movie/22tc.py | 48 +++++++++++++++++++++
python/spider/Movie/dianyingtiantang.py | 56 +++++++++++++++++++++++++
python/spider/{ => Movie}/douban.py | 0
python/spider/Movie/lishiping.py | 32 ++++++++++++++
4 files changed, 136 insertions(+)
create mode 100644 python/spider/Movie/22tc.py
create mode 100644 python/spider/Movie/dianyingtiantang.py
rename python/spider/{ => Movie}/douban.py (100%)
create mode 100644 python/spider/Movie/lishiping.py
diff --git a/python/spider/Movie/22tc.py b/python/spider/Movie/22tc.py
new file mode 100644
index 0000000..022011a
--- /dev/null
+++ b/python/spider/Movie/22tc.py
@@ -0,0 +1,48 @@
+import re # 正则表达式
+import requests # 获取网页
+from bs4 import BeautifulSoup
+
+##拿到发现图片页面信息
+
+domian = "https://tc.lsy22.com/explore"
+
+i = 1
+
+UA = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"}
+
+param = {"page": i,
+ "seek": "2023-10-20+13%3A40%3A23.5pOu"}
+
+resp = requests.get(domian, headers=UA)
+
+page = BeautifulSoup(resp.text, "html.parser")
+
+list = page.find_all("div", class_="list-item fixed-size c8 gutter-margin-right-bottom")
+
+resp.close()
+
+href = []
+##获取子页面
+for item in list:
+ href.append(item.find("div", class_="list-item-image fixed-size").find("a").get("href"))
+j = 0
+## 下载图片
+for i in range(len(list)):
+ j += 1
+ ##获取链接
+ resp = requests.get(href[i], headers=UA)
+ page = BeautifulSoup(resp.text, "html.parser")
+ image = page.find("div", class_="content-width margin-top-10").find("div",
+ class_="header header-content margin-bottom-10").find(
+ "div", class_="header-content-right").find("a").get("href")
+ ##下载图片
+ img_resp = requests.get(image, headers=UA)
+ img_name = r"image\\"+str(j)+"."+image.split(".")[-1]
+ img_file =open(img_name, "wb")
+ img_file.write(img_resp.content)
+ ##关闭下载
+ print("成功",j,"次")
+ img_file.close()
+ img_file.close()
+ resp.close()
diff --git a/python/spider/Movie/dianyingtiantang.py b/python/spider/Movie/dianyingtiantang.py
new file mode 100644
index 0000000..89bebbd
--- /dev/null
+++ b/python/spider/Movie/dianyingtiantang.py
@@ -0,0 +1,56 @@
+import re # 正则表达式
+import requests # 获取网页
+import warnings
+warnings.filterwarnings("ignore")#去除443警告
+
+file = open("电影天堂.txt","a+",encoding="utf-8")
+
+
+index = 1
+
+domain = "https://www.dyttcn.com"
+
+
+UA = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"}
+
+rules1 = re.compile(r'.*?')
+
+
+data1 = []
+
+for index in range(1, 16):
+ class_typical = f"/jingdiandapian/list_18_{index}.html"
+ resp1 = requests.get(domain + class_typical, headers=UA, verify=False)
+ resp1.close()
+ resp1.encoding = "gb2312"
+ data1.append(rules1.findall(resp1.text))
+
+
+
+
+rules2 = re.compile(r'.*?◎片 名 (?P.*?)
'
+ r'.*?◎年 代 (?P\d+)'
+ r'.*?◎类 别 (?P.*?)'
+ r'.*?',re.S)
+
+
+
+for i in data1:
+ for j in i:
+ if type(j) == str:
+ resp2 = requests.get(domain + j, headers=UA, verify=False)
+ elif type(j) == list:
+ resp2 = requests.get(domain + j[0], headers=UA, verify=False)
+ print(j)
+ resp2.close()
+ resp2.encoding = "gb2312"
+ data2 = rules2.findall(resp2.text)
+ if len(data2) > 0:
+ print(str(data2))
+ file.write(str(data2))
+ file.write('\n')
+
+
+
+file.close()
diff --git a/python/spider/douban.py b/python/spider/Movie/douban.py
similarity index 100%
rename from python/spider/douban.py
rename to python/spider/Movie/douban.py
diff --git a/python/spider/Movie/lishiping.py b/python/spider/Movie/lishiping.py
new file mode 100644
index 0000000..2986bbe
--- /dev/null
+++ b/python/spider/Movie/lishiping.py
@@ -0,0 +1,32 @@
+import re
+import requests
+
+url = "https://www.pearvideo.com/video_1792985"
+
+conID = url.split("_")[1]
+videourl = f"https://www.pearvideo.com/videoStatus.jsp?contId={conID}"
+
+header = {"Referer": url,
+ "User-Agent":
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
+ }
+
+resp = requests.get(videourl, headers=header)
+
+dic = resp.json()
+
+srcUrl = dic["videoInfo"]["videos"]["srcUrl"]
+
+systemTime = dic["systemTime"]
+
+downloadurl = srcUrl.replace(systemTime,f"cont-{conID}")
+
+
+url =requests.get(downloadurl, headers=header)
+
+file = open("lsp.mp4","wb+")
+
+file.write(url.content)
+
+file.close()
+