完成筛选功能

This commit is contained in:
lsy 2025-04-17 00:36:31 +08:00
parent bf11186b8d
commit 3bfff88784
3 changed files with 88 additions and 92 deletions

55
base.py
View File

@ -11,6 +11,7 @@ import time
import random
import json
# 定义常量
DEBUGGER = True # 调试模式
SEND_TIME = 10 # 每投递一个岗位
@ -190,8 +191,9 @@ class Core:
last_send_time = time.time() # 上次发送时间
last_request_time = time.time() # 上次请求时间
info = {} # 操作信息
filter_dict = {} # 筛选条件映射表
def __init__(self, name, url_base, url_login, send_amount):
def __init__(self, name, url_base, url_login, send_amount, filter_dict):
# 创建名字
self.name = name
# 配置url
@ -199,6 +201,8 @@ class Core:
self.url.login = url_login
# 请求总数
self.send_amount = send_amount
# 筛选条件映射表
self.filter_dict = filter_dict
# 获取简历
for key, value in Info.resume_profiles.items():
if self.name in value["platforms"]:
@ -240,7 +244,9 @@ class Core:
chrome_options.add_argument("--disable-web-security")
chrome_options.add_argument("--allow-running-insecure-content")
chrome_options.add_argument("--reduce-security-for-testing")
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging", "enable-automation"])
chrome_options.add_experimental_option(
"excludeSwitches", ["enable-logging", "enable-automation"]
)
chrome_options.add_experimental_option("useAutomationExtension", False)
# 设置偏好,禁用自动化提示
prefs = {
@ -249,24 +255,24 @@ class Core:
"profile.default_content_setting_values.notifications": 2,
# 下面的设置对绕过检测很重要
"excludeSwitches": ["enable-automation"],
"useAutomationExtension": False
"useAutomationExtension": False,
}
chrome_options.add_experimental_option("prefs", prefs)
# 使用随机用户代理
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
]
chrome_options.add_argument(f"--user-agent={random.choice(user_agents)}")
# 使用隐身模式
chrome_options.add_argument("--incognito")
# 添加新的自动化相关选项
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# 添加CDP命令彻底禁用"Chrome正在被自动化软件控制"的提示
chrome_options.add_argument("--remote-debugging-port=9222")
# 添加新的自动化相关选项
@ -277,16 +283,20 @@ class Core:
# 创建Chrome浏览器实例
self.driver = webdriver.Chrome(options=chrome_options)
# 核心先访问空白页面然后执行脚本移除webdriver属性
self.driver.get("about:blank")
# 立即执行脚本移除webdriver标志
self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
self.driver.execute_script(
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
)
# 执行CDP命令修改navigator.webdriver标志位
self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
self.driver.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": """
// 覆盖 webdriver 属性
Object.defineProperty(navigator, 'webdriver', {
get: () => false
@ -327,7 +337,8 @@ class Core:
window.Notification.requestPermission = originalNotification.requestPermission;
}
"""
})
},
)
# 获取实际url
def get_url(self, url_path=""):
@ -457,12 +468,12 @@ class Core:
# 所有检测+请求限制
def detect(self):
self.page_load_await()
# self.detect_verify()
# if self.verify_status:
# Logger.warn("当前处于人机验证")
# return True
# self.detect_login()
# if not self.login_status:
# Logger.warn("当前处于未登录状态")
@ -626,13 +637,9 @@ class Info:
DEFAULT_CONFIG = {
"citys": {"全国": ["全国"]},
"keywords": ["Python"], # 需要搜索的职位,会依次投递
"industry": [
"不限"
], # 公司行业,只能选三个,相关代码枚举的部分,如果需要其他的需要自己找
"experience": [
"不限"
], # 工作经验:"应届毕业生", "1年以下", "1-3年", "3-5年", "5-10年", "10年以上"
"jobType": "不限", # 求职类型:"全职", "兼职"
"industry": ["不限"], # 公司行业
"experience": ["不限"], # 工作经验
"jobType": "不限", # 求职类型
"salary": "50K以上", # 薪资(单选):"3K以下", "3-5K", "5-10K", "10-20K", "20-50K", "50K以上"
"degree": [
"不限"

112
boss.py
View File

@ -18,6 +18,15 @@ class BossCore(base.Core):
url_base="https://www.zhipin.com/",
url_login="/web/user/?ka=header-login",
send_amount=300,
filter_dict={
"公司行业": "industry",
"工作经验": "experience",
"求职类型": "jobType",
"薪资待遇": "salary",
"学历要求": "degree",
"公司规模": "scale",
"融资阶段": "stage",
},
)
def detect_login(self):
@ -172,79 +181,72 @@ class BossCore(base.Core):
)
)
# 所有检查
if core.detect():
self.send()
return
## 设置筛选标签
def get_level_ones():
# 筛选函数
def set_filter(tag_name, filter_name):
if filter_name == "不限":
return True
if core.detect():
return
return False
level_ones_dict = {}
try:
level_ones = self.driver.find_elements(By.CLASS_NAME, "placeholder-text")
level_ones_dict = {}
level_ones = self.driver.find_elements(
By.CLASS_NAME, "placeholder-text"
)
for item in level_ones:
s = item.find_element(By.XPATH, "./..")
box = s.find_element(By.XPATH, "./..")
level_ones_dict[item.text] = {}
level_ones_dict[item.text]["father"] = box
level_ones_dict[item.text]["self"] = item
if len(level_ones_dict)!=0:
return level_ones_dict
else:
Logger.error("筛选的一级菜单盒子查找失败")
except Exception as e:
Logger.error("筛选的一级菜单盒子查找失败", e)
if level_ones_dict == 0:
Logger.error("筛选的一级菜单盒子查找失败")
return False
# 设置公司行业
current_industry_site = 0
while current_industry_site < len(self.esumes[current_name]["industry"]):
try:
level_ones_dict = get_level_ones()
if level_ones_dict is None: return
is_select = False
level_one_box = level_ones_dict["公司行业"]["father"]
level_one_box = level_ones_dict[tag_name]["father"]
level_two_box = level_one_box.find_element(
By.CLASS_NAME, "filter-select-dropdown"
)
if level_two_box is None:
Logger.error("公司行业的二级菜单盒子查找失败")
level_twos = level_two_box.find_elements(By.TAG_NAME, "a")
if level_twos is None:
Logger.error("公司行业的二级菜单选项查找失败")
while not is_select and current_industry_site < len(
self.esumes[current_name]["industry"]
):
for item in level_twos:
tag_text = item.get_attribute("innerText")
if (
tag_text
== self.esumes[current_name]["industry"][
current_industry_site
]
):
core.human_move(level_ones_dict["公司行业"]["self"])
core.human_click(item)
is_select = True
if len(level_twos) == 0:
level_twos = level_two_box.find_elements(By.TAG_NAME, "li")
if len(level_twos) == 0:
Logger.warn(f"筛选的二级菜单列表内容查找失败 {tag_name}")
for item in level_twos:
tag_text = item.get_attribute("innerText").strip()
if tag_text == filter_name:
core.human_move(level_ones_dict[tag_name]["self"])
core.human_click(item)
Logger.info(f"设置筛选条件成功 {tag_name}-{filter_name}")
self.request_await()
try:
WebDriverWait(self.driver, 10).until(EC.staleness_of(item))
break
except Exception as e:
Logger.error(
"设置公司行业失败",
e,
{
"industry": self.esumes[current_name]["industry"][
current_industry_site
]
},
)
finally:
current_industry_site += 1
finally:
return True
Logger.info(f"设置筛选条件失败 {tag_name}-{filter_name}")
except:
Logger.info(f"设置筛选条件失败 {tag_name}-{filter_name}")
# 筛选
for key,value in self.filter_dict.items():
data = self.esumes[current_name][value]
if isinstance(data, str):
if set_filter(key,data) is False:
return
elif isinstance(data, list):
for name in data:
if set_filter(key,name) is False:
return
else:
Logger.info("格式错误 {key}")
Logger.info("应用筛选标签完成")
Logger.info("设置公司行业完成")
time.sleep(1000)

13
test.py
View File

@ -1,13 +0,0 @@
import base
import time
from selenium.webdriver.common.by import By
test = base.Core("test", "https://www.baidu.com", "/login", 100)
test.driver.get(test.get_url())
test.page_load_await()
submit = test.driver.find_element(By.ID, "su")
print(submit.text)
test.human_move(submit)
time.sleep(1000)