From 3bfff887847ab0b30d690a102552a80acd2a1eca Mon Sep 17 00:00:00 2001 From: lsy Date: Thu, 17 Apr 2025 00:36:31 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90=E7=AD=9B=E9=80=89=E5=8A=9F?= =?UTF-8?q?=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base.py | 55 ++++++++++++++++------------ boss.py | 112 ++++++++++++++++++++++++++++---------------------------- test.py | 13 ------- 3 files changed, 88 insertions(+), 92 deletions(-) delete mode 100644 test.py diff --git a/base.py b/base.py index 5357fd6..a13f164 100644 --- a/base.py +++ b/base.py @@ -11,6 +11,7 @@ import time import random import json + # 定义常量 DEBUGGER = True # 调试模式 SEND_TIME = 10 # 每投递一个岗位 @@ -190,8 +191,9 @@ class Core: last_send_time = time.time() # 上次发送时间 last_request_time = time.time() # 上次请求时间 info = {} # 操作信息 + filter_dict = {} # 筛选条件映射表 - def __init__(self, name, url_base, url_login, send_amount): + def __init__(self, name, url_base, url_login, send_amount, filter_dict): # 创建名字 self.name = name # 配置url @@ -199,6 +201,8 @@ class Core: self.url.login = url_login # 请求总数 self.send_amount = send_amount + # 筛选条件映射表 + self.filter_dict = filter_dict # 获取简历 for key, value in Info.resume_profiles.items(): if self.name in value["platforms"]: @@ -240,7 +244,9 @@ class Core: chrome_options.add_argument("--disable-web-security") chrome_options.add_argument("--allow-running-insecure-content") chrome_options.add_argument("--reduce-security-for-testing") - chrome_options.add_experimental_option("excludeSwitches", ["enable-logging", "enable-automation"]) + chrome_options.add_experimental_option( + "excludeSwitches", ["enable-logging", "enable-automation"] + ) chrome_options.add_experimental_option("useAutomationExtension", False) # 设置偏好,禁用自动化提示 prefs = { @@ -249,24 +255,24 @@ class Core: "profile.default_content_setting_values.notifications": 2, # 下面的设置对绕过检测很重要 "excludeSwitches": ["enable-automation"], - "useAutomationExtension": False + "useAutomationExtension": False, } chrome_options.add_experimental_option("prefs", prefs) - + # 使用随机用户代理 user_agents = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36" + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36", ] chrome_options.add_argument(f"--user-agent={random.choice(user_agents)}") - + # 使用隐身模式 chrome_options.add_argument("--incognito") - + # 添加新的自动化相关选项 chrome_options.add_argument("--disable-blink-features=AutomationControlled") - + # 添加CDP命令,彻底禁用"Chrome正在被自动化软件控制"的提示 chrome_options.add_argument("--remote-debugging-port=9222") # 添加新的自动化相关选项 @@ -277,16 +283,20 @@ class Core: # 创建Chrome浏览器实例 self.driver = webdriver.Chrome(options=chrome_options) - + # 核心:先访问空白页面然后执行脚本移除webdriver属性 self.driver.get("about:blank") - + # 立即执行脚本,移除webdriver标志 - self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") - + self.driver.execute_script( + "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" + ) + # 执行CDP命令,修改navigator.webdriver标志位 - self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { - "source": """ + self.driver.execute_cdp_cmd( + "Page.addScriptToEvaluateOnNewDocument", + { + "source": """ // 覆盖 webdriver 属性 Object.defineProperty(navigator, 'webdriver', { get: () => false @@ -327,7 +337,8 @@ class Core: window.Notification.requestPermission = originalNotification.requestPermission; } """ - }) + }, + ) # 获取实际url def get_url(self, url_path=""): @@ -457,12 +468,12 @@ class Core: # 所有检测+请求限制 def detect(self): self.page_load_await() - + # self.detect_verify() # if self.verify_status: # Logger.warn("当前处于人机验证") # return True - + # self.detect_login() # if not self.login_status: # Logger.warn("当前处于未登录状态") @@ -626,13 +637,9 @@ class Info: DEFAULT_CONFIG = { "citys": {"全国": ["全国"]}, "keywords": ["Python"], # 需要搜索的职位,会依次投递 - "industry": [ - "不限" - ], # 公司行业,只能选三个,相关代码枚举的部分,如果需要其他的需要自己找 - "experience": [ - "不限" - ], # 工作经验:"应届毕业生", "1年以下", "1-3年", "3-5年", "5-10年", "10年以上" - "jobType": "不限", # 求职类型:"全职", "兼职" + "industry": ["不限"], # 公司行业 + "experience": ["不限"], # 工作经验 + "jobType": "不限", # 求职类型 "salary": "50K以上", # 薪资(单选):"3K以下", "3-5K", "5-10K", "10-20K", "20-50K", "50K以上" "degree": [ "不限" diff --git a/boss.py b/boss.py index aa7a204..2a3124c 100644 --- a/boss.py +++ b/boss.py @@ -18,6 +18,15 @@ class BossCore(base.Core): url_base="https://www.zhipin.com/", url_login="/web/user/?ka=header-login", send_amount=300, + filter_dict={ + "公司行业": "industry", + "工作经验": "experience", + "求职类型": "jobType", + "薪资待遇": "salary", + "学历要求": "degree", + "公司规模": "scale", + "融资阶段": "stage", + }, ) def detect_login(self): @@ -172,79 +181,72 @@ class BossCore(base.Core): ) ) - # 所有检查 - if core.detect(): - self.send() - return - - ## 设置筛选标签 - def get_level_ones(): + # 筛选函数 + def set_filter(tag_name, filter_name): + if filter_name == "不限": + return True if core.detect(): - return + return False + level_ones_dict = {} try: - level_ones = self.driver.find_elements(By.CLASS_NAME, "placeholder-text") - level_ones_dict = {} + level_ones = self.driver.find_elements( + By.CLASS_NAME, "placeholder-text" + ) for item in level_ones: s = item.find_element(By.XPATH, "./..") box = s.find_element(By.XPATH, "./..") level_ones_dict[item.text] = {} level_ones_dict[item.text]["father"] = box level_ones_dict[item.text]["self"] = item - if len(level_ones_dict)!=0: - return level_ones_dict - else: - Logger.error("筛选的一级菜单盒子查找失败") except Exception as e: + Logger.error("筛选的一级菜单盒子查找失败", e) + if level_ones_dict == 0: Logger.error("筛选的一级菜单盒子查找失败") + return False + - - # 设置公司行业 - current_industry_site = 0 - while current_industry_site < len(self.esumes[current_name]["industry"]): try: - level_ones_dict = get_level_ones() - if level_ones_dict is None: return - is_select = False - level_one_box = level_ones_dict["公司行业"]["father"] + level_one_box = level_ones_dict[tag_name]["father"] level_two_box = level_one_box.find_element( By.CLASS_NAME, "filter-select-dropdown" ) - if level_two_box is None: - Logger.error("公司行业的二级菜单盒子查找失败") level_twos = level_two_box.find_elements(By.TAG_NAME, "a") - if level_twos is None: - Logger.error("公司行业的二级菜单选项查找失败") - while not is_select and current_industry_site < len( - self.esumes[current_name]["industry"] - ): - for item in level_twos: - tag_text = item.get_attribute("innerText") - if ( - tag_text - == self.esumes[current_name]["industry"][ - current_industry_site - ] - ): - core.human_move(level_ones_dict["公司行业"]["self"]) - core.human_click(item) - is_select = True + if len(level_twos) == 0: + level_twos = level_two_box.find_elements(By.TAG_NAME, "li") + if len(level_twos) == 0: + Logger.warn(f"筛选的二级菜单列表内容查找失败 {tag_name}") + for item in level_twos: + tag_text = item.get_attribute("innerText").strip() + if tag_text == filter_name: + core.human_move(level_ones_dict[tag_name]["self"]) + core.human_click(item) + Logger.info(f"设置筛选条件成功 {tag_name}-{filter_name}") + self.request_await() + try: WebDriverWait(self.driver, 10).until(EC.staleness_of(item)) - break - except Exception as e: - Logger.error( - "设置公司行业失败", - e, - { - "industry": self.esumes[current_name]["industry"][ - current_industry_site - ] - }, - ) - finally: - current_industry_site += 1 - + finally: + return True + Logger.info(f"设置筛选条件失败 {tag_name}-{filter_name}") + except: + Logger.info(f"设置筛选条件失败 {tag_name}-{filter_name}") + + # 筛选 + for key,value in self.filter_dict.items(): + data = self.esumes[current_name][value] + if isinstance(data, str): + if set_filter(key,data) is False: + return + elif isinstance(data, list): + for name in data: + if set_filter(key,name) is False: + return + else: + Logger.info("格式错误 {key}") + Logger.info("应用筛选标签完成") + + + - Logger.info("设置公司行业完成") time.sleep(1000) diff --git a/test.py b/test.py deleted file mode 100644 index b0c3d1a..0000000 --- a/test.py +++ /dev/null @@ -1,13 +0,0 @@ -import base -import time -from selenium.webdriver.common.by import By - -test = base.Core("test", "https://www.baidu.com", "/login", 100) - -test.driver.get(test.get_url()) -test.page_load_await() -submit = test.driver.find_element(By.ID, "su") -print(submit.text) -test.human_move(submit) - -time.sleep(1000)