完成筛选功能

This commit is contained in:
lsy 2025-04-17 00:36:31 +08:00
parent bf11186b8d
commit 3bfff88784
3 changed files with 88 additions and 92 deletions

35
base.py
View File

@ -11,6 +11,7 @@ import time
import random import random
import json import json
# 定义常量 # 定义常量
DEBUGGER = True # 调试模式 DEBUGGER = True # 调试模式
SEND_TIME = 10 # 每投递一个岗位 SEND_TIME = 10 # 每投递一个岗位
@ -190,8 +191,9 @@ class Core:
last_send_time = time.time() # 上次发送时间 last_send_time = time.time() # 上次发送时间
last_request_time = time.time() # 上次请求时间 last_request_time = time.time() # 上次请求时间
info = {} # 操作信息 info = {} # 操作信息
filter_dict = {} # 筛选条件映射表
def __init__(self, name, url_base, url_login, send_amount): def __init__(self, name, url_base, url_login, send_amount, filter_dict):
# 创建名字 # 创建名字
self.name = name self.name = name
# 配置url # 配置url
@ -199,6 +201,8 @@ class Core:
self.url.login = url_login self.url.login = url_login
# 请求总数 # 请求总数
self.send_amount = send_amount self.send_amount = send_amount
# 筛选条件映射表
self.filter_dict = filter_dict
# 获取简历 # 获取简历
for key, value in Info.resume_profiles.items(): for key, value in Info.resume_profiles.items():
if self.name in value["platforms"]: if self.name in value["platforms"]:
@ -240,7 +244,9 @@ class Core:
chrome_options.add_argument("--disable-web-security") chrome_options.add_argument("--disable-web-security")
chrome_options.add_argument("--allow-running-insecure-content") chrome_options.add_argument("--allow-running-insecure-content")
chrome_options.add_argument("--reduce-security-for-testing") chrome_options.add_argument("--reduce-security-for-testing")
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging", "enable-automation"]) chrome_options.add_experimental_option(
"excludeSwitches", ["enable-logging", "enable-automation"]
)
chrome_options.add_experimental_option("useAutomationExtension", False) chrome_options.add_experimental_option("useAutomationExtension", False)
# 设置偏好,禁用自动化提示 # 设置偏好,禁用自动化提示
prefs = { prefs = {
@ -249,7 +255,7 @@ class Core:
"profile.default_content_setting_values.notifications": 2, "profile.default_content_setting_values.notifications": 2,
# 下面的设置对绕过检测很重要 # 下面的设置对绕过检测很重要
"excludeSwitches": ["enable-automation"], "excludeSwitches": ["enable-automation"],
"useAutomationExtension": False "useAutomationExtension": False,
} }
chrome_options.add_experimental_option("prefs", prefs) chrome_options.add_experimental_option("prefs", prefs)
@ -257,7 +263,7 @@ class Core:
user_agents = [ user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
] ]
chrome_options.add_argument(f"--user-agent={random.choice(user_agents)}") chrome_options.add_argument(f"--user-agent={random.choice(user_agents)}")
@ -282,10 +288,14 @@ class Core:
self.driver.get("about:blank") self.driver.get("about:blank")
# 立即执行脚本移除webdriver标志 # 立即执行脚本移除webdriver标志
self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") self.driver.execute_script(
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
)
# 执行CDP命令修改navigator.webdriver标志位 # 执行CDP命令修改navigator.webdriver标志位
self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { self.driver.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": """ "source": """
// 覆盖 webdriver 属性 // 覆盖 webdriver 属性
Object.defineProperty(navigator, 'webdriver', { Object.defineProperty(navigator, 'webdriver', {
@ -327,7 +337,8 @@ class Core:
window.Notification.requestPermission = originalNotification.requestPermission; window.Notification.requestPermission = originalNotification.requestPermission;
} }
""" """
}) },
)
# 获取实际url # 获取实际url
def get_url(self, url_path=""): def get_url(self, url_path=""):
@ -626,13 +637,9 @@ class Info:
DEFAULT_CONFIG = { DEFAULT_CONFIG = {
"citys": {"全国": ["全国"]}, "citys": {"全国": ["全国"]},
"keywords": ["Python"], # 需要搜索的职位,会依次投递 "keywords": ["Python"], # 需要搜索的职位,会依次投递
"industry": [ "industry": ["不限"], # 公司行业
"不限" "experience": ["不限"], # 工作经验
], # 公司行业,只能选三个,相关代码枚举的部分,如果需要其他的需要自己找 "jobType": "不限", # 求职类型
"experience": [
"不限"
], # 工作经验:"应届毕业生", "1年以下", "1-3年", "3-5年", "5-10年", "10年以上"
"jobType": "不限", # 求职类型:"全职", "兼职"
"salary": "50K以上", # 薪资(单选):"3K以下", "3-5K", "5-10K", "10-20K", "20-50K", "50K以上" "salary": "50K以上", # 薪资(单选):"3K以下", "3-5K", "5-10K", "10-20K", "20-50K", "50K以上"
"degree": [ "degree": [
"不限" "不限"

102
boss.py
View File

@ -18,6 +18,15 @@ class BossCore(base.Core):
url_base="https://www.zhipin.com/", url_base="https://www.zhipin.com/",
url_login="/web/user/?ka=header-login", url_login="/web/user/?ka=header-login",
send_amount=300, send_amount=300,
filter_dict={
"公司行业": "industry",
"工作经验": "experience",
"求职类型": "jobType",
"薪资待遇": "salary",
"学历要求": "degree",
"公司规模": "scale",
"融资阶段": "stage",
},
) )
def detect_login(self): def detect_login(self):
@ -172,79 +181,72 @@ class BossCore(base.Core):
) )
) )
# 所有检查 # 筛选函数
def set_filter(tag_name, filter_name):
if filter_name == "不限":
return True
if core.detect(): if core.detect():
self.send() return False
return
## 设置筛选标签
def get_level_ones():
if core.detect():
return
try:
level_ones = self.driver.find_elements(By.CLASS_NAME, "placeholder-text")
level_ones_dict = {} level_ones_dict = {}
try:
level_ones = self.driver.find_elements(
By.CLASS_NAME, "placeholder-text"
)
for item in level_ones: for item in level_ones:
s = item.find_element(By.XPATH, "./..") s = item.find_element(By.XPATH, "./..")
box = s.find_element(By.XPATH, "./..") box = s.find_element(By.XPATH, "./..")
level_ones_dict[item.text] = {} level_ones_dict[item.text] = {}
level_ones_dict[item.text]["father"] = box level_ones_dict[item.text]["father"] = box
level_ones_dict[item.text]["self"] = item level_ones_dict[item.text]["self"] = item
if len(level_ones_dict)!=0:
return level_ones_dict
else:
Logger.error("筛选的一级菜单盒子查找失败")
except Exception as e: except Exception as e:
Logger.error("筛选的一级菜单盒子查找失败", e)
if level_ones_dict == 0:
Logger.error("筛选的一级菜单盒子查找失败") Logger.error("筛选的一级菜单盒子查找失败")
return False
# 设置公司行业
current_industry_site = 0
while current_industry_site < len(self.esumes[current_name]["industry"]):
try: try:
level_ones_dict = get_level_ones() level_one_box = level_ones_dict[tag_name]["father"]
if level_ones_dict is None: return
is_select = False
level_one_box = level_ones_dict["公司行业"]["father"]
level_two_box = level_one_box.find_element( level_two_box = level_one_box.find_element(
By.CLASS_NAME, "filter-select-dropdown" By.CLASS_NAME, "filter-select-dropdown"
) )
if level_two_box is None:
Logger.error("公司行业的二级菜单盒子查找失败")
level_twos = level_two_box.find_elements(By.TAG_NAME, "a") level_twos = level_two_box.find_elements(By.TAG_NAME, "a")
if level_twos is None: if len(level_twos) == 0:
Logger.error("公司行业的二级菜单选项查找失败") level_twos = level_two_box.find_elements(By.TAG_NAME, "li")
while not is_select and current_industry_site < len( if len(level_twos) == 0:
self.esumes[current_name]["industry"] Logger.warn(f"筛选的二级菜单列表内容查找失败 {tag_name}")
):
for item in level_twos: for item in level_twos:
tag_text = item.get_attribute("innerText") tag_text = item.get_attribute("innerText").strip()
if ( if tag_text == filter_name:
tag_text core.human_move(level_ones_dict[tag_name]["self"])
== self.esumes[current_name]["industry"][
current_industry_site
]
):
core.human_move(level_ones_dict["公司行业"]["self"])
core.human_click(item) core.human_click(item)
is_select = True Logger.info(f"设置筛选条件成功 {tag_name}-{filter_name}")
self.request_await()
try:
WebDriverWait(self.driver, 10).until(EC.staleness_of(item)) WebDriverWait(self.driver, 10).until(EC.staleness_of(item))
break
except Exception as e:
Logger.error(
"设置公司行业失败",
e,
{
"industry": self.esumes[current_name]["industry"][
current_industry_site
]
},
)
finally: finally:
current_industry_site += 1 return True
Logger.info(f"设置筛选条件失败 {tag_name}-{filter_name}")
except:
Logger.info(f"设置筛选条件失败 {tag_name}-{filter_name}")
# 筛选
for key,value in self.filter_dict.items():
data = self.esumes[current_name][value]
if isinstance(data, str):
if set_filter(key,data) is False:
return
elif isinstance(data, list):
for name in data:
if set_filter(key,name) is False:
return
else:
Logger.info("格式错误 {key}")
Logger.info("应用筛选标签完成")
Logger.info("设置公司行业完成")
time.sleep(1000) time.sleep(1000)

13
test.py
View File

@ -1,13 +0,0 @@
import base
import time
from selenium.webdriver.common.by import By
test = base.Core("test", "https://www.baidu.com", "/login", 100)
test.driver.get(test.get_url())
test.page_load_await()
submit = test.driver.find_element(By.ID, "su")
print(submit.text)
test.human_move(submit)
time.sleep(1000)