完成筛选功能
This commit is contained in:
parent
bf11186b8d
commit
3bfff88784
55
base.py
55
base.py
@ -11,6 +11,7 @@ import time
|
||||
import random
|
||||
import json
|
||||
|
||||
|
||||
# 定义常量
|
||||
DEBUGGER = True # 调试模式
|
||||
SEND_TIME = 10 # 每投递一个岗位
|
||||
@ -190,8 +191,9 @@ class Core:
|
||||
last_send_time = time.time() # 上次发送时间
|
||||
last_request_time = time.time() # 上次请求时间
|
||||
info = {} # 操作信息
|
||||
filter_dict = {} # 筛选条件映射表
|
||||
|
||||
def __init__(self, name, url_base, url_login, send_amount):
|
||||
def __init__(self, name, url_base, url_login, send_amount, filter_dict):
|
||||
# 创建名字
|
||||
self.name = name
|
||||
# 配置url
|
||||
@ -199,6 +201,8 @@ class Core:
|
||||
self.url.login = url_login
|
||||
# 请求总数
|
||||
self.send_amount = send_amount
|
||||
# 筛选条件映射表
|
||||
self.filter_dict = filter_dict
|
||||
# 获取简历
|
||||
for key, value in Info.resume_profiles.items():
|
||||
if self.name in value["platforms"]:
|
||||
@ -240,7 +244,9 @@ class Core:
|
||||
chrome_options.add_argument("--disable-web-security")
|
||||
chrome_options.add_argument("--allow-running-insecure-content")
|
||||
chrome_options.add_argument("--reduce-security-for-testing")
|
||||
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging", "enable-automation"])
|
||||
chrome_options.add_experimental_option(
|
||||
"excludeSwitches", ["enable-logging", "enable-automation"]
|
||||
)
|
||||
chrome_options.add_experimental_option("useAutomationExtension", False)
|
||||
# 设置偏好,禁用自动化提示
|
||||
prefs = {
|
||||
@ -249,24 +255,24 @@ class Core:
|
||||
"profile.default_content_setting_values.notifications": 2,
|
||||
# 下面的设置对绕过检测很重要
|
||||
"excludeSwitches": ["enable-automation"],
|
||||
"useAutomationExtension": False
|
||||
"useAutomationExtension": False,
|
||||
}
|
||||
chrome_options.add_experimental_option("prefs", prefs)
|
||||
|
||||
|
||||
# 使用随机用户代理
|
||||
user_agents = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
|
||||
]
|
||||
chrome_options.add_argument(f"--user-agent={random.choice(user_agents)}")
|
||||
|
||||
|
||||
# 使用隐身模式
|
||||
chrome_options.add_argument("--incognito")
|
||||
|
||||
|
||||
# 添加新的自动化相关选项
|
||||
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
||||
|
||||
|
||||
# 添加CDP命令,彻底禁用"Chrome正在被自动化软件控制"的提示
|
||||
chrome_options.add_argument("--remote-debugging-port=9222")
|
||||
# 添加新的自动化相关选项
|
||||
@ -277,16 +283,20 @@ class Core:
|
||||
|
||||
# 创建Chrome浏览器实例
|
||||
self.driver = webdriver.Chrome(options=chrome_options)
|
||||
|
||||
|
||||
# 核心:先访问空白页面然后执行脚本移除webdriver属性
|
||||
self.driver.get("about:blank")
|
||||
|
||||
|
||||
# 立即执行脚本,移除webdriver标志
|
||||
self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
||||
|
||||
self.driver.execute_script(
|
||||
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
|
||||
)
|
||||
|
||||
# 执行CDP命令,修改navigator.webdriver标志位
|
||||
self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
||||
"source": """
|
||||
self.driver.execute_cdp_cmd(
|
||||
"Page.addScriptToEvaluateOnNewDocument",
|
||||
{
|
||||
"source": """
|
||||
// 覆盖 webdriver 属性
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => false
|
||||
@ -327,7 +337,8 @@ class Core:
|
||||
window.Notification.requestPermission = originalNotification.requestPermission;
|
||||
}
|
||||
"""
|
||||
})
|
||||
},
|
||||
)
|
||||
|
||||
# 获取实际url
|
||||
def get_url(self, url_path=""):
|
||||
@ -457,12 +468,12 @@ class Core:
|
||||
# 所有检测+请求限制
|
||||
def detect(self):
|
||||
self.page_load_await()
|
||||
|
||||
|
||||
# self.detect_verify()
|
||||
# if self.verify_status:
|
||||
# Logger.warn("当前处于人机验证")
|
||||
# return True
|
||||
|
||||
|
||||
# self.detect_login()
|
||||
# if not self.login_status:
|
||||
# Logger.warn("当前处于未登录状态")
|
||||
@ -626,13 +637,9 @@ class Info:
|
||||
DEFAULT_CONFIG = {
|
||||
"citys": {"全国": ["全国"]},
|
||||
"keywords": ["Python"], # 需要搜索的职位,会依次投递
|
||||
"industry": [
|
||||
"不限"
|
||||
], # 公司行业,只能选三个,相关代码枚举的部分,如果需要其他的需要自己找
|
||||
"experience": [
|
||||
"不限"
|
||||
], # 工作经验:"应届毕业生", "1年以下", "1-3年", "3-5年", "5-10年", "10年以上"
|
||||
"jobType": "不限", # 求职类型:"全职", "兼职"
|
||||
"industry": ["不限"], # 公司行业
|
||||
"experience": ["不限"], # 工作经验
|
||||
"jobType": "不限", # 求职类型
|
||||
"salary": "50K以上", # 薪资(单选):"3K以下", "3-5K", "5-10K", "10-20K", "20-50K", "50K以上"
|
||||
"degree": [
|
||||
"不限"
|
||||
|
112
boss.py
112
boss.py
@ -18,6 +18,15 @@ class BossCore(base.Core):
|
||||
url_base="https://www.zhipin.com/",
|
||||
url_login="/web/user/?ka=header-login",
|
||||
send_amount=300,
|
||||
filter_dict={
|
||||
"公司行业": "industry",
|
||||
"工作经验": "experience",
|
||||
"求职类型": "jobType",
|
||||
"薪资待遇": "salary",
|
||||
"学历要求": "degree",
|
||||
"公司规模": "scale",
|
||||
"融资阶段": "stage",
|
||||
},
|
||||
)
|
||||
|
||||
def detect_login(self):
|
||||
@ -172,79 +181,72 @@ class BossCore(base.Core):
|
||||
)
|
||||
)
|
||||
|
||||
# 所有检查
|
||||
if core.detect():
|
||||
self.send()
|
||||
return
|
||||
|
||||
## 设置筛选标签
|
||||
def get_level_ones():
|
||||
# 筛选函数
|
||||
def set_filter(tag_name, filter_name):
|
||||
if filter_name == "不限":
|
||||
return True
|
||||
if core.detect():
|
||||
return
|
||||
return False
|
||||
level_ones_dict = {}
|
||||
try:
|
||||
level_ones = self.driver.find_elements(By.CLASS_NAME, "placeholder-text")
|
||||
level_ones_dict = {}
|
||||
level_ones = self.driver.find_elements(
|
||||
By.CLASS_NAME, "placeholder-text"
|
||||
)
|
||||
for item in level_ones:
|
||||
s = item.find_element(By.XPATH, "./..")
|
||||
box = s.find_element(By.XPATH, "./..")
|
||||
level_ones_dict[item.text] = {}
|
||||
level_ones_dict[item.text]["father"] = box
|
||||
level_ones_dict[item.text]["self"] = item
|
||||
if len(level_ones_dict)!=0:
|
||||
return level_ones_dict
|
||||
else:
|
||||
Logger.error("筛选的一级菜单盒子查找失败")
|
||||
except Exception as e:
|
||||
Logger.error("筛选的一级菜单盒子查找失败", e)
|
||||
if level_ones_dict == 0:
|
||||
Logger.error("筛选的一级菜单盒子查找失败")
|
||||
return False
|
||||
|
||||
|
||||
|
||||
# 设置公司行业
|
||||
current_industry_site = 0
|
||||
while current_industry_site < len(self.esumes[current_name]["industry"]):
|
||||
try:
|
||||
level_ones_dict = get_level_ones()
|
||||
if level_ones_dict is None: return
|
||||
is_select = False
|
||||
level_one_box = level_ones_dict["公司行业"]["father"]
|
||||
level_one_box = level_ones_dict[tag_name]["father"]
|
||||
level_two_box = level_one_box.find_element(
|
||||
By.CLASS_NAME, "filter-select-dropdown"
|
||||
)
|
||||
if level_two_box is None:
|
||||
Logger.error("公司行业的二级菜单盒子查找失败")
|
||||
level_twos = level_two_box.find_elements(By.TAG_NAME, "a")
|
||||
if level_twos is None:
|
||||
Logger.error("公司行业的二级菜单选项查找失败")
|
||||
while not is_select and current_industry_site < len(
|
||||
self.esumes[current_name]["industry"]
|
||||
):
|
||||
for item in level_twos:
|
||||
tag_text = item.get_attribute("innerText")
|
||||
if (
|
||||
tag_text
|
||||
== self.esumes[current_name]["industry"][
|
||||
current_industry_site
|
||||
]
|
||||
):
|
||||
core.human_move(level_ones_dict["公司行业"]["self"])
|
||||
core.human_click(item)
|
||||
is_select = True
|
||||
if len(level_twos) == 0:
|
||||
level_twos = level_two_box.find_elements(By.TAG_NAME, "li")
|
||||
if len(level_twos) == 0:
|
||||
Logger.warn(f"筛选的二级菜单列表内容查找失败 {tag_name}")
|
||||
for item in level_twos:
|
||||
tag_text = item.get_attribute("innerText").strip()
|
||||
if tag_text == filter_name:
|
||||
core.human_move(level_ones_dict[tag_name]["self"])
|
||||
core.human_click(item)
|
||||
Logger.info(f"设置筛选条件成功 {tag_name}-{filter_name}")
|
||||
self.request_await()
|
||||
try:
|
||||
WebDriverWait(self.driver, 10).until(EC.staleness_of(item))
|
||||
break
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
"设置公司行业失败",
|
||||
e,
|
||||
{
|
||||
"industry": self.esumes[current_name]["industry"][
|
||||
current_industry_site
|
||||
]
|
||||
},
|
||||
)
|
||||
finally:
|
||||
current_industry_site += 1
|
||||
|
||||
finally:
|
||||
return True
|
||||
Logger.info(f"设置筛选条件失败 {tag_name}-{filter_name}")
|
||||
except:
|
||||
Logger.info(f"设置筛选条件失败 {tag_name}-{filter_name}")
|
||||
|
||||
# 筛选
|
||||
for key,value in self.filter_dict.items():
|
||||
data = self.esumes[current_name][value]
|
||||
if isinstance(data, str):
|
||||
if set_filter(key,data) is False:
|
||||
return
|
||||
elif isinstance(data, list):
|
||||
for name in data:
|
||||
if set_filter(key,name) is False:
|
||||
return
|
||||
else:
|
||||
Logger.info("格式错误 {key}")
|
||||
Logger.info("应用筛选标签完成")
|
||||
|
||||
|
||||
|
||||
|
||||
Logger.info("设置公司行业完成")
|
||||
time.sleep(1000)
|
||||
|
||||
|
||||
|
13
test.py
13
test.py
@ -1,13 +0,0 @@
|
||||
import base
|
||||
import time
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
test = base.Core("test", "https://www.baidu.com", "/login", 100)
|
||||
|
||||
test.driver.get(test.get_url())
|
||||
test.page_load_await()
|
||||
submit = test.driver.find_element(By.ID, "su")
|
||||
print(submit.text)
|
||||
test.human_move(submit)
|
||||
|
||||
time.sleep(1000)
|
Loading…
Reference in New Issue
Block a user