get_jobs/boss.py

268 lines
9.8 KiB
Python
Raw Normal View History

2025-04-16 18:00:14 +08:00
import time
import base
from base import Logger
from selenium.webdriver.common.by import By
import json
import os
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import consts
class BossCore(base.Core):
citys = {}
def __init__(self):
super().__init__(
name="boss",
url_base="https://www.zhipin.com/",
url_login="/web/user/?ka=header-login",
send_amount=300,
)
def detect_login(self):
last_status = self.login_status
self.login_status = (
len(self.driver.find_elements(By.CLASS_NAME, "link-logout")) > 0
)
if not last_status and self.login_status:
self.save_cookies()
def detect_verify(self):
self.verify_status = (
len(self.driver.find_elements(By.CLASS_NAME, "validate_button_click")) > 0
)
def get_city_info(self, value):
province = next(iter(value.keys()))
city = next(iter(value.keys()))
if province in self.citys.keys():
if city in self.citys[province].keys():
return self.citys[province][city]
try:
with open(
os.path.join(self.assets_path + "\site.json"), "r", encoding="utf-8"
) as f:
allData = json.load(f)
ProvinceData = None
Data = None
# 获取省份数据
for item in allData["siteList"]:
if item["name"] == province:
ProvinceData = item
self.citys[province] = {}
break
if ProvinceData is None:
Logger.warn(f"没有找到省份 {province} 的信息")
return
# 获取城市数据
for item in ProvinceData["subLevelModelList"]:
if item["name"] == city:
Data = item
self.citys[province][city] = {}
break
if Data is None:
Logger.warn(f"没有找到城市 {city} 的信息")
return
self.citys[province][city]["url"] = Data["url"]
self.citys[province][city]["code"] = Data["code"]
return self.citys[province][city]
except Exception as e:
Logger.warn(f"获取城市信息失败", e, {"province": province, "city": city})
return
def send(self):
# 所有检查
if core.detect():
self.send()
return
# 检查简历信息
if len(self.info["resumes"]) == 0:
send_amount = 0
for name, resume in self.esumes.items():
send_amount += len(resume["keywords"]) * len(resume["citys"])
self.info["resumes"][name] = {}
for name, resume in self.esumes.items():
for province, citys in resume["citys"].items():
self.info["resumes"][name][province] = {}
for city in citys:
self.info["resumes"][name][province][city] = {}
for keyword in resume["keywords"]:
self.info["resumes"][name][province][city][keyword] = {
"expected": self.send_amount // send_amount,
"actual": 0,
"page": 1,
"surplus": False,
}
self.save_info()
# 取出本次需要投递的
current_name = None
current_keyword = None
current_province = None
current_city = None
actual_amount = 0
for name, provinces in self.info["resumes"].items():
for province, citys in provinces.items():
for city, kywords in citys.items():
for keyword, info in kywords.items():
if (
info["surplus"] == False
and info["actual"] < info["expected"]
):
current_province = province
current_city = city
current_keyword = keyword
current_name = name
break
if current_name is not None:
break
if current_name is not None:
break
if current_name is not None:
break
if actual_amount < self.send_amount and current_keyword is None:
for name, provinces in self.info["resumes"].items():
for province, citys in provinces.items():
for city, kywords in citys.items():
for keyword, info in kywords.items():
if info["surplus"] == False:
current_province = province
current_city = city
current_keyword = keyword
current_name = name
break
if current_name is not None:
break
if current_name is not None:
break
if current_name is not None:
break
if current_keyword is None:
Logger.info("投递完毕")
self.driver.close()
exit()
info_path = [
"resumes",
current_name,
current_province,
current_city,
current_keyword,
]
Logger.info(
f"当前投递 {current_name} - {current_province} - {current_city} - {current_keyword}"
)
# 获取要投递的城市链接
city_info = self.get_city_info({current_province: [current_city]})
if city_info is None:
self.info["resumes"][current_name][current_province][current_city][
current_keyword
]["surplus"] = True
self.save_info()
return
# 跳转到搜索页面
self.driver.get(
self.get_url(
f"/web/geek/job?query={current_keyword}&city={city_info['code']}&page={base.deep_get(self.info, info_path)['page']}"
)
)
# 所有检查
if core.detect():
self.send()
return
## 设置筛选标签
def get_level_ones():
if core.detect():
return
try:
level_ones = self.driver.find_elements(By.CLASS_NAME, "placeholder-text")
level_ones_dict = {}
for item in level_ones:
s = item.find_element(By.XPATH, "./..")
box = s.find_element(By.XPATH, "./..")
level_ones_dict[item.text] = {}
level_ones_dict[item.text]["father"] = box
level_ones_dict[item.text]["self"] = item
if len(level_ones_dict)!=0:
return level_ones_dict
else:
Logger.error("筛选的一级菜单盒子查找失败")
except Exception as e:
Logger.error("筛选的一级菜单盒子查找失败")
# 设置公司行业
current_industry_site = 0
while current_industry_site < len(self.esumes[current_name]["industry"]):
try:
level_ones_dict = get_level_ones()
if level_ones_dict is None: return
is_select = False
level_one_box = level_ones_dict["公司行业"]["father"]
level_two_box = level_one_box.find_element(
By.CLASS_NAME, "filter-select-dropdown"
)
if level_two_box is None:
Logger.error("公司行业的二级菜单盒子查找失败")
level_twos = level_two_box.find_elements(By.TAG_NAME, "a")
if level_twos is None:
Logger.error("公司行业的二级菜单选项查找失败")
while not is_select and current_industry_site < len(
self.esumes[current_name]["industry"]
):
for item in level_twos:
tag_text = item.get_attribute("innerText")
if (
tag_text
== self.esumes[current_name]["industry"][
current_industry_site
]
):
core.human_move(level_ones_dict["公司行业"]["self"])
core.human_click(item)
is_select = True
WebDriverWait(self.driver, 10).until(EC.staleness_of(item))
break
except Exception as e:
Logger.error(
"设置公司行业失败",
e,
{
"industry": self.esumes[current_name]["industry"][
current_industry_site
]
},
)
finally:
current_industry_site += 1
Logger.info("设置公司行业完成")
time.sleep(1000)
core = BossCore()
# 配置日志输出
Logger.enable_log_save(core.output_path)
# 首次打开页面
core.request_await()
core.driver.get(core.get_url())
core.page_load_await()
# 添加cookies
# core.request_await()
# core.add_cookies()
# core.page_load_await()
# 投递
core.send()
core.driver.close()