import time import base from base import Logger from selenium.webdriver.common.by import By import json import os from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import consts class BossCore(base.Core): citys = {} def __init__(self): super().__init__( name="boss", url_base="https://www.zhipin.com/", url_login="/web/user/?ka=header-login", send_amount=300, ) def detect_login(self): last_status = self.login_status self.login_status = ( len(self.driver.find_elements(By.CLASS_NAME, "link-logout")) > 0 ) if not last_status and self.login_status: self.save_cookies() def detect_verify(self): self.verify_status = ( len(self.driver.find_elements(By.CLASS_NAME, "validate_button_click")) > 0 ) def get_city_info(self, value): province = next(iter(value.keys())) city = next(iter(value.keys())) if province in self.citys.keys(): if city in self.citys[province].keys(): return self.citys[province][city] try: with open( os.path.join(self.assets_path + "\site.json"), "r", encoding="utf-8" ) as f: allData = json.load(f) ProvinceData = None Data = None # 获取省份数据 for item in allData["siteList"]: if item["name"] == province: ProvinceData = item self.citys[province] = {} break if ProvinceData is None: Logger.warn(f"没有找到省份 {province} 的信息") return # 获取城市数据 for item in ProvinceData["subLevelModelList"]: if item["name"] == city: Data = item self.citys[province][city] = {} break if Data is None: Logger.warn(f"没有找到城市 {city} 的信息") return self.citys[province][city]["url"] = Data["url"] self.citys[province][city]["code"] = Data["code"] return self.citys[province][city] except Exception as e: Logger.warn(f"获取城市信息失败", e, {"province": province, "city": city}) return def send(self): # 所有检查 if core.detect(): self.send() return # 检查简历信息 if len(self.info["resumes"]) == 0: send_amount = 0 for name, resume in self.esumes.items(): send_amount += len(resume["keywords"]) * len(resume["citys"]) self.info["resumes"][name] = {} for name, resume in self.esumes.items(): for province, citys in resume["citys"].items(): self.info["resumes"][name][province] = {} for city in citys: self.info["resumes"][name][province][city] = {} for keyword in resume["keywords"]: self.info["resumes"][name][province][city][keyword] = { "expected": self.send_amount // send_amount, "actual": 0, "page": 1, "surplus": False, } self.save_info() # 取出本次需要投递的 current_name = None current_keyword = None current_province = None current_city = None actual_amount = 0 for name, provinces in self.info["resumes"].items(): for province, citys in provinces.items(): for city, kywords in citys.items(): for keyword, info in kywords.items(): if ( info["surplus"] == False and info["actual"] < info["expected"] ): current_province = province current_city = city current_keyword = keyword current_name = name break if current_name is not None: break if current_name is not None: break if current_name is not None: break if actual_amount < self.send_amount and current_keyword is None: for name, provinces in self.info["resumes"].items(): for province, citys in provinces.items(): for city, kywords in citys.items(): for keyword, info in kywords.items(): if info["surplus"] == False: current_province = province current_city = city current_keyword = keyword current_name = name break if current_name is not None: break if current_name is not None: break if current_name is not None: break if current_keyword is None: Logger.info("投递完毕") self.driver.close() exit() info_path = [ "resumes", current_name, current_province, current_city, current_keyword, ] Logger.info( f"当前投递 {current_name} - {current_province} - {current_city} - {current_keyword}" ) # 获取要投递的城市链接 city_info = self.get_city_info({current_province: [current_city]}) if city_info is None: self.info["resumes"][current_name][current_province][current_city][ current_keyword ]["surplus"] = True self.save_info() return # 跳转到搜索页面 self.driver.get( self.get_url( f"/web/geek/job?query={current_keyword}&city={city_info['code']}&page={base.deep_get(self.info, info_path)['page']}" ) ) # 所有检查 if core.detect(): self.send() return ## 设置筛选标签 def get_level_ones(): if core.detect(): return try: level_ones = self.driver.find_elements(By.CLASS_NAME, "placeholder-text") level_ones_dict = {} for item in level_ones: s = item.find_element(By.XPATH, "./..") box = s.find_element(By.XPATH, "./..") level_ones_dict[item.text] = {} level_ones_dict[item.text]["father"] = box level_ones_dict[item.text]["self"] = item if len(level_ones_dict)!=0: return level_ones_dict else: Logger.error("筛选的一级菜单盒子查找失败") except Exception as e: Logger.error("筛选的一级菜单盒子查找失败") # 设置公司行业 current_industry_site = 0 while current_industry_site < len(self.esumes[current_name]["industry"]): try: level_ones_dict = get_level_ones() if level_ones_dict is None: return is_select = False level_one_box = level_ones_dict["公司行业"]["father"] level_two_box = level_one_box.find_element( By.CLASS_NAME, "filter-select-dropdown" ) if level_two_box is None: Logger.error("公司行业的二级菜单盒子查找失败") level_twos = level_two_box.find_elements(By.TAG_NAME, "a") if level_twos is None: Logger.error("公司行业的二级菜单选项查找失败") while not is_select and current_industry_site < len( self.esumes[current_name]["industry"] ): for item in level_twos: tag_text = item.get_attribute("innerText") if ( tag_text == self.esumes[current_name]["industry"][ current_industry_site ] ): core.human_move(level_ones_dict["公司行业"]["self"]) core.human_click(item) is_select = True WebDriverWait(self.driver, 10).until(EC.staleness_of(item)) break except Exception as e: Logger.error( "设置公司行业失败", e, { "industry": self.esumes[current_name]["industry"][ current_industry_site ] }, ) finally: current_industry_site += 1 Logger.info("设置公司行业完成") time.sleep(1000) core = BossCore() # 配置日志输出 Logger.enable_log_save(core.output_path) # 首次打开页面 core.request_await() core.driver.get(core.get_url()) core.page_load_await() # 添加cookies # core.request_await() # core.add_cookies() # core.page_load_await() # 投递 core.send() core.driver.close()