完成boss直聘公司行业的筛选
This commit is contained in:
commit
bf11186b8d
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
**/test
|
||||||
|
**/output
|
||||||
|
**/__pycache__
|
2493
assets/boss/site.json
Normal file
2493
assets/boss/site.json
Normal file
File diff suppressed because it is too large
Load Diff
670
base.py
Normal file
670
base.py
Normal file
@ -0,0 +1,670 @@
|
|||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
import os
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
|
from datetime import datetime
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
import json
|
||||||
|
|
||||||
|
# 定义常量
|
||||||
|
DEBUGGER = True # 调试模式
|
||||||
|
SEND_TIME = 10 # 每投递一个岗位
|
||||||
|
REQUEST_TIME = 2 # 请求间隔时间
|
||||||
|
|
||||||
|
|
||||||
|
# 调试信息输出
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class Logger:
|
||||||
|
LEVELS = {"debug": 10, "info": 20, "warn": 30, "error": 40}
|
||||||
|
|
||||||
|
COLORS = {
|
||||||
|
"debug": "\033[90m", # 灰色
|
||||||
|
"info": "\033[92m", # 绿色
|
||||||
|
"warn": "\033[93m", # 黄色
|
||||||
|
"error": "\033[91m", # 红色
|
||||||
|
}
|
||||||
|
|
||||||
|
STYLES = {
|
||||||
|
"debug": "\033[0m", # 默认样式(无加粗等)
|
||||||
|
"info": "\033[1m", # 加粗
|
||||||
|
"warn": "\033[4m", # 下划线
|
||||||
|
"error": "\033[1;31m", # 红色加粗
|
||||||
|
}
|
||||||
|
|
||||||
|
RESET = "\033[0m" # 重置样式
|
||||||
|
current_level = "debug"
|
||||||
|
save_logs = False
|
||||||
|
log_dir = "logs" # 默认日志目录
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def set_level(cls, level):
|
||||||
|
"""设置日志级别"""
|
||||||
|
if level.lower() not in cls.LEVELS:
|
||||||
|
raise ValueError(f"未知日志等级: {level}")
|
||||||
|
cls.current_level = level.lower()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def enable_log_save(cls, base_path):
|
||||||
|
"""启用日志保存功能,并指定基础路径"""
|
||||||
|
cls.save_logs = True
|
||||||
|
cls.log_dir = os.path.join(base_path, "logs") # 生成路径: path/logs
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def disable_log_save(cls):
|
||||||
|
"""禁用日志保存功能"""
|
||||||
|
cls.save_logs = False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _should_log(cls, level):
|
||||||
|
"""判断是否满足日志等级输出条件"""
|
||||||
|
return cls.LEVELS[level] >= cls.LEVELS[cls.current_level]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _write_to_file(cls, log_message):
|
||||||
|
"""写入文件,路径动态生成:logs/yyyy-mm-dd.txt"""
|
||||||
|
if cls.save_logs:
|
||||||
|
date_str = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
log_file_path = os.path.join(cls.log_dir, f"{date_str}.txt")
|
||||||
|
os.makedirs(cls.log_dir, exist_ok=True) # 确保目录存在
|
||||||
|
with open(log_file_path, "a", encoding="utf-8") as file:
|
||||||
|
file.write(log_message + "\n")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _log(cls, level, title, msg=None, data=None):
|
||||||
|
"""打印日志并保存到文件"""
|
||||||
|
if not cls._should_log(level):
|
||||||
|
return
|
||||||
|
|
||||||
|
color = cls.COLORS.get(level, "")
|
||||||
|
style = cls.STYLES.get(level, "")
|
||||||
|
now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
|
||||||
|
|
||||||
|
log_message = f"{now}"
|
||||||
|
|
||||||
|
# 只有标题
|
||||||
|
if msg is None and data is None:
|
||||||
|
log_message += f" {title}"
|
||||||
|
else:
|
||||||
|
log_message += f"\n{title}"
|
||||||
|
if msg:
|
||||||
|
log_message += f"\n{msg}"
|
||||||
|
if data and isinstance(data, dict):
|
||||||
|
for k, v in data.items():
|
||||||
|
log_message += f"\n{k}: {v}"
|
||||||
|
|
||||||
|
# 控制台输出
|
||||||
|
print(f"{style}{color}{log_message}{cls.RESET}")
|
||||||
|
|
||||||
|
# 如果启用了保存日志,则写入文件
|
||||||
|
cls._write_to_file(log_message)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def debug(cls, title, msg=None, data=None):
|
||||||
|
cls._log("debug", title, msg, data)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def info(cls, title, msg=None, data=None):
|
||||||
|
cls._log("info", title, msg, data)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def warn(cls, title, msg=None, data=None):
|
||||||
|
cls._log("warn", title, msg, data)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def error(cls, title, msg=None, data=None):
|
||||||
|
cls._log("error", title, msg, data)
|
||||||
|
|
||||||
|
|
||||||
|
# 深度查询
|
||||||
|
def deep_get(d, keys, default=None):
|
||||||
|
for key in keys:
|
||||||
|
if isinstance(d, dict):
|
||||||
|
d = d.get(key, default)
|
||||||
|
else:
|
||||||
|
return default
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
# 深度设置
|
||||||
|
def deep_set(data, path, value):
|
||||||
|
for i, key in enumerate(path):
|
||||||
|
is_last = i == len(path) - 1
|
||||||
|
|
||||||
|
if isinstance(key, int):
|
||||||
|
if not isinstance(data, list):
|
||||||
|
raise TypeError(
|
||||||
|
f"预期 list,但找到 {type(data).__name__},在路径 {path[:i]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
while len(data) <= key:
|
||||||
|
data.append({})
|
||||||
|
|
||||||
|
if is_last:
|
||||||
|
data[key] = value
|
||||||
|
else:
|
||||||
|
if not isinstance(data[key], (dict, list)):
|
||||||
|
data[key] = {}
|
||||||
|
data = data[key]
|
||||||
|
|
||||||
|
else: # str 类型,处理 dict
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
raise TypeError(
|
||||||
|
f"预期 dict,但找到 {type(data).__name__},在路径 {path[:i]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_last:
|
||||||
|
data[key] = value
|
||||||
|
else:
|
||||||
|
if key not in data or not isinstance(data[key], (dict, list)):
|
||||||
|
# 如果下一个是 int,自动生成 list,否则 dict
|
||||||
|
next_key = path[i + 1]
|
||||||
|
data[key] = [] if isinstance(next_key, int) else {}
|
||||||
|
data = data[key]
|
||||||
|
|
||||||
|
|
||||||
|
class Url:
|
||||||
|
|
||||||
|
base = None # 基础url
|
||||||
|
login = None # 登录路径
|
||||||
|
|
||||||
|
|
||||||
|
class Core:
|
||||||
|
name = None # 平台名称
|
||||||
|
driver = None # 浏览器驱动
|
||||||
|
esumes = {} # 属于该平台的简历
|
||||||
|
cookies = [] # cookies
|
||||||
|
url = Url() # url
|
||||||
|
login_status = False # 登录状态
|
||||||
|
verify_status = False # 验证状态
|
||||||
|
assets_path = None # 资源目录
|
||||||
|
output_path = None # 输出目录
|
||||||
|
send_amount = None # 投递总数
|
||||||
|
last_send_time = time.time() # 上次发送时间
|
||||||
|
last_request_time = time.time() # 上次请求时间
|
||||||
|
info = {} # 操作信息
|
||||||
|
|
||||||
|
def __init__(self, name, url_base, url_login, send_amount):
|
||||||
|
# 创建名字
|
||||||
|
self.name = name
|
||||||
|
# 配置url
|
||||||
|
self.url.base = url_base
|
||||||
|
self.url.login = url_login
|
||||||
|
# 请求总数
|
||||||
|
self.send_amount = send_amount
|
||||||
|
# 获取简历
|
||||||
|
for key, value in Info.resume_profiles.items():
|
||||||
|
if self.name in value["platforms"]:
|
||||||
|
self.esumes[key] = value
|
||||||
|
if len(self.esumes) == 0:
|
||||||
|
exit()
|
||||||
|
# 配置资源目录
|
||||||
|
self.assets_path = os.path.join(os.getcwd(), "assets", self.name)
|
||||||
|
# 配置输出目录
|
||||||
|
self.output_path = os.path.join(os.getcwd(), "output", self.name)
|
||||||
|
try:
|
||||||
|
if not os.path.exists(self.output_path):
|
||||||
|
os.makedirs(self.output_path)
|
||||||
|
except Exception as e:
|
||||||
|
Logger.error("创建输出目录失败", e)
|
||||||
|
# 配置信息
|
||||||
|
info = {}
|
||||||
|
try:
|
||||||
|
with open(
|
||||||
|
os.path.join(self.output_path, "info.json"),
|
||||||
|
"r",
|
||||||
|
encoding="utf-8",
|
||||||
|
) as f:
|
||||||
|
info = json.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
Logger.warn("没有读取到配置信息")
|
||||||
|
if "date" not in info or info["date"] != datetime.now().strftime("%Y-%m-%d"):
|
||||||
|
info = {"date": datetime.now().strftime("%Y-%m-%d"), "resumes": {}}
|
||||||
|
self.info = info
|
||||||
|
# 配置Chrome选项
|
||||||
|
chrome_options = Options()
|
||||||
|
chrome_options.add_argument("--ignore-certificate-errors")
|
||||||
|
chrome_options.add_argument("--ignore-ssl-errors")
|
||||||
|
chrome_options.add_argument("--no-sandbox")
|
||||||
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||||
|
chrome_options.add_argument("--ignore-certificate-errors-spki-list")
|
||||||
|
chrome_options.add_argument("--allow-insecure-localhost")
|
||||||
|
# 新增以下选项
|
||||||
|
chrome_options.add_argument("--disable-web-security")
|
||||||
|
chrome_options.add_argument("--allow-running-insecure-content")
|
||||||
|
chrome_options.add_argument("--reduce-security-for-testing")
|
||||||
|
chrome_options.add_experimental_option("excludeSwitches", ["enable-logging", "enable-automation"])
|
||||||
|
chrome_options.add_experimental_option("useAutomationExtension", False)
|
||||||
|
# 设置偏好,禁用自动化提示
|
||||||
|
prefs = {
|
||||||
|
"credentials_enable_service": False,
|
||||||
|
"profile.password_manager_enabled": False,
|
||||||
|
"profile.default_content_setting_values.notifications": 2,
|
||||||
|
# 下面的设置对绕过检测很重要
|
||||||
|
"excludeSwitches": ["enable-automation"],
|
||||||
|
"useAutomationExtension": False
|
||||||
|
}
|
||||||
|
chrome_options.add_experimental_option("prefs", prefs)
|
||||||
|
|
||||||
|
# 使用随机用户代理
|
||||||
|
user_agents = [
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
|
||||||
|
]
|
||||||
|
chrome_options.add_argument(f"--user-agent={random.choice(user_agents)}")
|
||||||
|
|
||||||
|
# 使用隐身模式
|
||||||
|
chrome_options.add_argument("--incognito")
|
||||||
|
|
||||||
|
# 添加新的自动化相关选项
|
||||||
|
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
||||||
|
|
||||||
|
# 添加CDP命令,彻底禁用"Chrome正在被自动化软件控制"的提示
|
||||||
|
chrome_options.add_argument("--remote-debugging-port=9222")
|
||||||
|
# 添加新的自动化相关选项
|
||||||
|
chrome_options.add_argument("--ignore-gpu-blocklist")
|
||||||
|
chrome_options.add_argument("--use-gl=desktop")
|
||||||
|
# 禁用"浏览器正在被自动化软件控制"的信息栏
|
||||||
|
chrome_options.add_argument("--disable-infobars")
|
||||||
|
|
||||||
|
# 创建Chrome浏览器实例
|
||||||
|
self.driver = webdriver.Chrome(options=chrome_options)
|
||||||
|
|
||||||
|
# 核心:先访问空白页面然后执行脚本移除webdriver属性
|
||||||
|
self.driver.get("about:blank")
|
||||||
|
|
||||||
|
# 立即执行脚本,移除webdriver标志
|
||||||
|
self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
||||||
|
|
||||||
|
# 执行CDP命令,修改navigator.webdriver标志位
|
||||||
|
self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
||||||
|
"source": """
|
||||||
|
// 覆盖 webdriver 属性
|
||||||
|
Object.defineProperty(navigator, 'webdriver', {
|
||||||
|
get: () => false
|
||||||
|
});
|
||||||
|
|
||||||
|
// 修改window.navigator.chrome.runtime
|
||||||
|
if (window.navigator.chrome) {
|
||||||
|
window.navigator.chrome = {
|
||||||
|
runtime: {}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// 修改automation标识
|
||||||
|
const originalQuery = window.navigator.permissions.query;
|
||||||
|
window.navigator.permissions.query = (parameters) => (
|
||||||
|
parameters.name === 'notifications' ?
|
||||||
|
Promise.resolve({ state: Notification.permission }) :
|
||||||
|
originalQuery(parameters)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 修改语言和插件
|
||||||
|
Object.defineProperty(navigator, 'plugins', {
|
||||||
|
get: () => [1, 2, 3, 4, 5]
|
||||||
|
});
|
||||||
|
|
||||||
|
// 添加指纹模拟
|
||||||
|
Object.defineProperty(navigator, 'languages', {
|
||||||
|
get: () => ['zh-CN', 'zh', 'en-US', 'en']
|
||||||
|
});
|
||||||
|
|
||||||
|
// 覆盖 Notification API
|
||||||
|
const originalNotification = window.Notification;
|
||||||
|
if (originalNotification) {
|
||||||
|
window.Notification = function(title, options) {
|
||||||
|
return new originalNotification(title, options);
|
||||||
|
};
|
||||||
|
window.Notification.permission = originalNotification.permission;
|
||||||
|
window.Notification.requestPermission = originalNotification.requestPermission;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
})
|
||||||
|
|
||||||
|
# 获取实际url
|
||||||
|
def get_url(self, url_path=""):
|
||||||
|
return urljoin(self.url.base, url_path)
|
||||||
|
|
||||||
|
# 防止请求频繁
|
||||||
|
def request_await(self):
|
||||||
|
interval = time.time() - self.last_request_time
|
||||||
|
if interval < REQUEST_TIME:
|
||||||
|
rest = REQUEST_TIME - interval
|
||||||
|
Logger.info(f"请求间隔过短,等待{rest}秒")
|
||||||
|
time.sleep(rest)
|
||||||
|
self.last_request_time = time.time()
|
||||||
|
|
||||||
|
# 等待页面加载完成
|
||||||
|
def page_load_await(self, timeout=10, wait_for_element=None):
|
||||||
|
last_url = self.driver.current_url # 记录初始 URL
|
||||||
|
last_change_time = time.time() # 记录 URL 变化的时间
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
# 检查文档加载状态
|
||||||
|
ready_state = self.driver.execute_script("return document.readyState")
|
||||||
|
|
||||||
|
# 检查 URL 是否变化并更新时间
|
||||||
|
current_url = self.driver.current_url
|
||||||
|
if current_url != last_url:
|
||||||
|
last_url = current_url
|
||||||
|
last_change_time = time.time() # 更新时间
|
||||||
|
Logger.info("URL 发生变化,重新检测页面加载状态")
|
||||||
|
|
||||||
|
# 如果 URL 没有变化超过两秒并且文档状态是 complete
|
||||||
|
if time.time() - last_change_time > 2 and ready_state == "complete":
|
||||||
|
# 检查是否可以获取元素(确保 DOM 元素没有失效)
|
||||||
|
if wait_for_element:
|
||||||
|
try:
|
||||||
|
self.driver.find_element(*wait_for_element)
|
||||||
|
except Exception as e:
|
||||||
|
Logger.info(f"获取 DOM 元素时出错: {e}")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
Logger.warn(f"检查页面加载状态时出错: {e}")
|
||||||
|
|
||||||
|
# 如果在 timeout 时间内没有加载完成,退出等待
|
||||||
|
if time.time() - last_change_time > timeout:
|
||||||
|
Logger.warn("等待超时,退出")
|
||||||
|
return False
|
||||||
|
|
||||||
|
time.sleep(1) # 每秒检测一次
|
||||||
|
|
||||||
|
# 添加cookies
|
||||||
|
def add_cookies(self, cookies_str=None):
|
||||||
|
self.page_load_await()
|
||||||
|
# 读取cookies
|
||||||
|
if not cookies_str:
|
||||||
|
try:
|
||||||
|
with open(
|
||||||
|
os.path.join(self.output_path, "cookies.txt"),
|
||||||
|
"r",
|
||||||
|
encoding="utf-8",
|
||||||
|
) as f:
|
||||||
|
cookies_str = f.read()
|
||||||
|
except Exception as e:
|
||||||
|
Logger.info("读取cookies失败", e)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
self.save_cookies(cookies_str)
|
||||||
|
# 解析cookies
|
||||||
|
try:
|
||||||
|
cookies_list = []
|
||||||
|
|
||||||
|
for item in cookies_str.split("; "):
|
||||||
|
name, value = item.split("=", 1)
|
||||||
|
cookie = {
|
||||||
|
"name": name,
|
||||||
|
"value": value,
|
||||||
|
"domain": urlparse(self.url.base).netloc,
|
||||||
|
}
|
||||||
|
cookies_list.append(cookie)
|
||||||
|
|
||||||
|
self.cookies = cookies_list
|
||||||
|
except Exception as e:
|
||||||
|
Logger.warn("解析cookies失败", e, {"cookies_str": cookies_str})
|
||||||
|
return
|
||||||
|
|
||||||
|
# 添加cookies
|
||||||
|
try:
|
||||||
|
if self.cookies:
|
||||||
|
for cookie in self.cookies:
|
||||||
|
self.driver.add_cookie(cookie)
|
||||||
|
self.driver.refresh()
|
||||||
|
Logger.info("添加cookie完成")
|
||||||
|
else:
|
||||||
|
Logger.info("没有cookie,添加失败")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
Logger.info("添加cookie失败", e)
|
||||||
|
|
||||||
|
# 保存cookies
|
||||||
|
def save_cookies(self, cookies_str=None):
|
||||||
|
try:
|
||||||
|
if cookies_str is None:
|
||||||
|
cookies = self.driver.get_cookies()
|
||||||
|
cookies_str = "; ".join(
|
||||||
|
[f"{cookie['name']}={cookie['value']}" for cookie in cookies]
|
||||||
|
)
|
||||||
|
with open(
|
||||||
|
os.path.join(self.output_path, "cookies.txt"),
|
||||||
|
"w",
|
||||||
|
encoding="utf-8",
|
||||||
|
) as f:
|
||||||
|
f.write(cookies_str)
|
||||||
|
Logger.info("保存cookie成功")
|
||||||
|
except Exception as e:
|
||||||
|
Logger.info("保存cookie失败", e)
|
||||||
|
|
||||||
|
# 检测登录状态
|
||||||
|
def detect_login(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 检测是否触发验证
|
||||||
|
def detect_verify(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 所有检测+请求限制
|
||||||
|
def detect(self):
|
||||||
|
self.page_load_await()
|
||||||
|
|
||||||
|
# self.detect_verify()
|
||||||
|
# if self.verify_status:
|
||||||
|
# Logger.warn("当前处于人机验证")
|
||||||
|
# return True
|
||||||
|
|
||||||
|
# self.detect_login()
|
||||||
|
# if not self.login_status:
|
||||||
|
# Logger.warn("当前处于未登录状态")
|
||||||
|
# login_url = self.get_url(self.url.login)
|
||||||
|
# if self.driver.current_url != login_url:
|
||||||
|
# self.driver.get(login_url)
|
||||||
|
# else:
|
||||||
|
# time.sleep(5)
|
||||||
|
# return True
|
||||||
|
self.request_await()
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 获取城市链接
|
||||||
|
def get_city_info(self, value):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 模拟真实移动
|
||||||
|
def human_move(self, element, timeout=10):
|
||||||
|
try:
|
||||||
|
# 等待元素可见
|
||||||
|
WebDriverWait(self.driver, timeout).until(EC.visibility_of(element))
|
||||||
|
|
||||||
|
# 滚动到元素可见位置
|
||||||
|
self.driver.execute_script(
|
||||||
|
"arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
|
||||||
|
element,
|
||||||
|
)
|
||||||
|
time.sleep(random.uniform(0.3, 0.8)) # 等待滚动完成
|
||||||
|
|
||||||
|
# 使用JavaScript直接高亮元素,模拟鼠标悬停效果
|
||||||
|
self.driver.execute_script(
|
||||||
|
"""
|
||||||
|
var element = arguments[0];
|
||||||
|
var originalBackground = element.style.backgroundColor;
|
||||||
|
var originalTransition = element.style.transition;
|
||||||
|
|
||||||
|
element.style.transition = 'background-color 0.3s';
|
||||||
|
element.style.backgroundColor = 'rgba(255, 255, 0, 0.3)';
|
||||||
|
|
||||||
|
setTimeout(function() {
|
||||||
|
element.style.backgroundColor = originalBackground;
|
||||||
|
element.style.transition = originalTransition;
|
||||||
|
}, 300);
|
||||||
|
""",
|
||||||
|
element,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 直接移动到元素
|
||||||
|
actions = ActionChains(self.driver)
|
||||||
|
actions.move_to_element(element).perform()
|
||||||
|
|
||||||
|
time.sleep(random.uniform(0.3, 0.8))
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"移动到元素失败,错误信息:{e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 模拟真实点击
|
||||||
|
def human_click(self, element, timeout=10):
|
||||||
|
try:
|
||||||
|
# 滚动到元素中间
|
||||||
|
self.driver.execute_script(
|
||||||
|
"arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
|
||||||
|
element,
|
||||||
|
)
|
||||||
|
time.sleep(random.uniform(0.3, 0.8))
|
||||||
|
|
||||||
|
# 确保滚动完成
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# 简化鼠标操作,避免复杂的动作链
|
||||||
|
actions = ActionChains(self.driver)
|
||||||
|
# 直接移动到元素
|
||||||
|
actions.move_to_element(element)
|
||||||
|
# 短暂停顿
|
||||||
|
actions.pause(random.uniform(0.2, 0.5))
|
||||||
|
# 点击
|
||||||
|
actions.click()
|
||||||
|
# 执行动作链
|
||||||
|
actions.perform()
|
||||||
|
|
||||||
|
time.sleep(random.uniform(0.2, 0.6))
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
Logger.error("点击失败", e)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 模拟真实输入
|
||||||
|
def human_type(self, by, value, text, timeout=10):
|
||||||
|
# 显式等待元素可输入
|
||||||
|
wait = WebDriverWait(self.driver, timeout)
|
||||||
|
input_element = wait.until(EC.element_to_be_clickable((by, value)))
|
||||||
|
|
||||||
|
# 清空现有输入(如果有的话)
|
||||||
|
input_element.clear()
|
||||||
|
|
||||||
|
# 模拟键入的过程(逐个字符)
|
||||||
|
actions = ActionChains(self.driver)
|
||||||
|
for char in text:
|
||||||
|
actions.send_keys(char)
|
||||||
|
actions.pause(random.uniform(0.1, 0.4)) # 随机停顿模拟人类输入的速度
|
||||||
|
|
||||||
|
actions.perform()
|
||||||
|
|
||||||
|
# 投递
|
||||||
|
def send(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 保存信息
|
||||||
|
def save_info(self):
|
||||||
|
try:
|
||||||
|
with open(
|
||||||
|
os.path.join(self.output_path, "info.json"),
|
||||||
|
"w",
|
||||||
|
encoding="utf-8",
|
||||||
|
) as f:
|
||||||
|
json.dump(self.info, f, indent=4)
|
||||||
|
Logger.info("保存信息成功")
|
||||||
|
except Exception as e:
|
||||||
|
Logger.info("保存信息失败", e)
|
||||||
|
|
||||||
|
# 修改信息
|
||||||
|
def set_info(self, paths, value):
|
||||||
|
data = self.info
|
||||||
|
for i, key in enumerate(paths):
|
||||||
|
is_last = i == len(paths) - 1
|
||||||
|
|
||||||
|
if isinstance(key, int):
|
||||||
|
if not isinstance(data, list):
|
||||||
|
raise TypeError(
|
||||||
|
f"路径 {paths[:i]} 处应为 list,但实际为 {type(data).__name__}"
|
||||||
|
)
|
||||||
|
while len(data) <= key:
|
||||||
|
data.append({})
|
||||||
|
if is_last:
|
||||||
|
data[key] = value
|
||||||
|
else:
|
||||||
|
if not isinstance(data[key], (dict, list)):
|
||||||
|
data[key] = {}
|
||||||
|
data = data[key]
|
||||||
|
|
||||||
|
else: # key 是 str,处理 dict
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
raise TypeError(
|
||||||
|
f"路径 {paths[:i]} 处应为 dict,但实际为 {type(data).__name__}"
|
||||||
|
)
|
||||||
|
if is_last:
|
||||||
|
data[key] = value
|
||||||
|
else:
|
||||||
|
if key not in data or not isinstance(data[key], (dict, list)):
|
||||||
|
next_key = paths[i + 1]
|
||||||
|
data[key] = [] if isinstance(next_key, int) else {}
|
||||||
|
data = data[key]
|
||||||
|
|
||||||
|
|
||||||
|
class Info:
|
||||||
|
# 存储所有预定义的简历配置
|
||||||
|
resume_profiles = {}
|
||||||
|
|
||||||
|
# 默认配置
|
||||||
|
DEFAULT_CONFIG = {
|
||||||
|
"citys": {"全国": ["全国"]},
|
||||||
|
"keywords": ["Python"], # 需要搜索的职位,会依次投递
|
||||||
|
"industry": [
|
||||||
|
"不限"
|
||||||
|
], # 公司行业,只能选三个,相关代码枚举的部分,如果需要其他的需要自己找
|
||||||
|
"experience": [
|
||||||
|
"不限"
|
||||||
|
], # 工作经验:"应届毕业生", "1年以下", "1-3年", "3-5年", "5-10年", "10年以上"
|
||||||
|
"jobType": "不限", # 求职类型:"全职", "兼职"
|
||||||
|
"salary": "50K以上", # 薪资(单选):"3K以下", "3-5K", "5-10K", "10-20K", "20-50K", "50K以上"
|
||||||
|
"degree": [
|
||||||
|
"不限"
|
||||||
|
], # 学历: "初中及以下", "中专/中技", "高中", "大专", "本科", "硕士", "博士"
|
||||||
|
"scale": [
|
||||||
|
"不限"
|
||||||
|
], # 公司规模:"0-20人", "20-99人", "100-499人", "500-999人", "1000-9999人", "10000人以上"
|
||||||
|
"stage": [
|
||||||
|
"不限"
|
||||||
|
], # "未融资", "天使轮", "A轮", "B轮", "C轮", "D轮及以上", "已上市", "不需要融资"
|
||||||
|
"expectedSalary": [
|
||||||
|
25
|
||||||
|
], # 期望薪资,单位为K,第一个数字为最低薪资,第二个数字为最高薪资,只填一个数字默认为最低薪水
|
||||||
|
"filterDeadHR": True, # 是否过滤不活跃HR,该选项会过滤半年前活跃的HR
|
||||||
|
"sayHi": "您好,我有8年工作经验,还有AIGC大模型、Java,Python,Golang和运维的相关经验,希望应聘这个岗位,期待可以与您进一步沟通,谢谢!", # 必须要关闭boss的自动打招呼
|
||||||
|
"platforms": ["boss"],
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# 初始化时加载默认配置
|
||||||
|
if "default" not in Info.resume_profiles:
|
||||||
|
Info.resume_profiles["default"] = Info.DEFAULT_CONFIG.copy()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def add_profile(cls, name, config_dict):
|
||||||
|
"""添加一个新的简历配置"""
|
||||||
|
# 从默认配置创建一个新的配置副本
|
||||||
|
new_config = cls.DEFAULT_CONFIG.copy()
|
||||||
|
|
||||||
|
# 更新新的配置副本,只有在config_dict中有值的字段会被更新
|
||||||
|
new_config.update(config_dict)
|
||||||
|
|
||||||
|
# 将新的配置添加到resume_profiles
|
||||||
|
cls.resume_profiles[name] = new_config
|
||||||
|
return cls
|
267
boss.py
Normal file
267
boss.py
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
import time
|
||||||
|
import base
|
||||||
|
from base import Logger
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
import consts
|
||||||
|
|
||||||
|
|
||||||
|
class BossCore(base.Core):
|
||||||
|
citys = {}
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
name="boss",
|
||||||
|
url_base="https://www.zhipin.com/",
|
||||||
|
url_login="/web/user/?ka=header-login",
|
||||||
|
send_amount=300,
|
||||||
|
)
|
||||||
|
|
||||||
|
def detect_login(self):
|
||||||
|
last_status = self.login_status
|
||||||
|
self.login_status = (
|
||||||
|
len(self.driver.find_elements(By.CLASS_NAME, "link-logout")) > 0
|
||||||
|
)
|
||||||
|
if not last_status and self.login_status:
|
||||||
|
self.save_cookies()
|
||||||
|
|
||||||
|
def detect_verify(self):
|
||||||
|
self.verify_status = (
|
||||||
|
len(self.driver.find_elements(By.CLASS_NAME, "validate_button_click")) > 0
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_city_info(self, value):
|
||||||
|
province = next(iter(value.keys()))
|
||||||
|
city = next(iter(value.keys()))
|
||||||
|
if province in self.citys.keys():
|
||||||
|
if city in self.citys[province].keys():
|
||||||
|
return self.citys[province][city]
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(
|
||||||
|
os.path.join(self.assets_path + "\site.json"), "r", encoding="utf-8"
|
||||||
|
) as f:
|
||||||
|
allData = json.load(f)
|
||||||
|
ProvinceData = None
|
||||||
|
Data = None
|
||||||
|
# 获取省份数据
|
||||||
|
for item in allData["siteList"]:
|
||||||
|
if item["name"] == province:
|
||||||
|
ProvinceData = item
|
||||||
|
self.citys[province] = {}
|
||||||
|
break
|
||||||
|
if ProvinceData is None:
|
||||||
|
Logger.warn(f"没有找到省份 {province} 的信息")
|
||||||
|
return
|
||||||
|
# 获取城市数据
|
||||||
|
for item in ProvinceData["subLevelModelList"]:
|
||||||
|
if item["name"] == city:
|
||||||
|
Data = item
|
||||||
|
self.citys[province][city] = {}
|
||||||
|
break
|
||||||
|
if Data is None:
|
||||||
|
Logger.warn(f"没有找到城市 {city} 的信息")
|
||||||
|
return
|
||||||
|
self.citys[province][city]["url"] = Data["url"]
|
||||||
|
self.citys[province][city]["code"] = Data["code"]
|
||||||
|
return self.citys[province][city]
|
||||||
|
except Exception as e:
|
||||||
|
Logger.warn(f"获取城市信息失败", e, {"province": province, "city": city})
|
||||||
|
return
|
||||||
|
|
||||||
|
def send(self):
|
||||||
|
# 所有检查
|
||||||
|
if core.detect():
|
||||||
|
self.send()
|
||||||
|
return
|
||||||
|
# 检查简历信息
|
||||||
|
if len(self.info["resumes"]) == 0:
|
||||||
|
send_amount = 0
|
||||||
|
|
||||||
|
for name, resume in self.esumes.items():
|
||||||
|
send_amount += len(resume["keywords"]) * len(resume["citys"])
|
||||||
|
self.info["resumes"][name] = {}
|
||||||
|
|
||||||
|
for name, resume in self.esumes.items():
|
||||||
|
for province, citys in resume["citys"].items():
|
||||||
|
self.info["resumes"][name][province] = {}
|
||||||
|
for city in citys:
|
||||||
|
self.info["resumes"][name][province][city] = {}
|
||||||
|
for keyword in resume["keywords"]:
|
||||||
|
self.info["resumes"][name][province][city][keyword] = {
|
||||||
|
"expected": self.send_amount // send_amount,
|
||||||
|
"actual": 0,
|
||||||
|
"page": 1,
|
||||||
|
"surplus": False,
|
||||||
|
}
|
||||||
|
self.save_info()
|
||||||
|
# 取出本次需要投递的
|
||||||
|
current_name = None
|
||||||
|
current_keyword = None
|
||||||
|
current_province = None
|
||||||
|
current_city = None
|
||||||
|
actual_amount = 0
|
||||||
|
|
||||||
|
for name, provinces in self.info["resumes"].items():
|
||||||
|
for province, citys in provinces.items():
|
||||||
|
for city, kywords in citys.items():
|
||||||
|
for keyword, info in kywords.items():
|
||||||
|
if (
|
||||||
|
info["surplus"] == False
|
||||||
|
and info["actual"] < info["expected"]
|
||||||
|
):
|
||||||
|
current_province = province
|
||||||
|
current_city = city
|
||||||
|
current_keyword = keyword
|
||||||
|
current_name = name
|
||||||
|
break
|
||||||
|
if current_name is not None:
|
||||||
|
break
|
||||||
|
if current_name is not None:
|
||||||
|
break
|
||||||
|
if current_name is not None:
|
||||||
|
break
|
||||||
|
|
||||||
|
if actual_amount < self.send_amount and current_keyword is None:
|
||||||
|
for name, provinces in self.info["resumes"].items():
|
||||||
|
for province, citys in provinces.items():
|
||||||
|
for city, kywords in citys.items():
|
||||||
|
for keyword, info in kywords.items():
|
||||||
|
if info["surplus"] == False:
|
||||||
|
current_province = province
|
||||||
|
current_city = city
|
||||||
|
current_keyword = keyword
|
||||||
|
current_name = name
|
||||||
|
break
|
||||||
|
if current_name is not None:
|
||||||
|
break
|
||||||
|
if current_name is not None:
|
||||||
|
break
|
||||||
|
if current_name is not None:
|
||||||
|
break
|
||||||
|
if current_keyword is None:
|
||||||
|
Logger.info("投递完毕")
|
||||||
|
self.driver.close()
|
||||||
|
exit()
|
||||||
|
|
||||||
|
info_path = [
|
||||||
|
"resumes",
|
||||||
|
current_name,
|
||||||
|
current_province,
|
||||||
|
current_city,
|
||||||
|
current_keyword,
|
||||||
|
]
|
||||||
|
Logger.info(
|
||||||
|
f"当前投递 {current_name} - {current_province} - {current_city} - {current_keyword}"
|
||||||
|
)
|
||||||
|
# 获取要投递的城市链接
|
||||||
|
city_info = self.get_city_info({current_province: [current_city]})
|
||||||
|
if city_info is None:
|
||||||
|
self.info["resumes"][current_name][current_province][current_city][
|
||||||
|
current_keyword
|
||||||
|
]["surplus"] = True
|
||||||
|
self.save_info()
|
||||||
|
return
|
||||||
|
# 跳转到搜索页面
|
||||||
|
self.driver.get(
|
||||||
|
self.get_url(
|
||||||
|
f"/web/geek/job?query={current_keyword}&city={city_info['code']}&page={base.deep_get(self.info, info_path)['page']}"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 所有检查
|
||||||
|
if core.detect():
|
||||||
|
self.send()
|
||||||
|
return
|
||||||
|
|
||||||
|
## 设置筛选标签
|
||||||
|
def get_level_ones():
|
||||||
|
if core.detect():
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
level_ones = self.driver.find_elements(By.CLASS_NAME, "placeholder-text")
|
||||||
|
level_ones_dict = {}
|
||||||
|
for item in level_ones:
|
||||||
|
s = item.find_element(By.XPATH, "./..")
|
||||||
|
box = s.find_element(By.XPATH, "./..")
|
||||||
|
level_ones_dict[item.text] = {}
|
||||||
|
level_ones_dict[item.text]["father"] = box
|
||||||
|
level_ones_dict[item.text]["self"] = item
|
||||||
|
if len(level_ones_dict)!=0:
|
||||||
|
return level_ones_dict
|
||||||
|
else:
|
||||||
|
Logger.error("筛选的一级菜单盒子查找失败")
|
||||||
|
except Exception as e:
|
||||||
|
Logger.error("筛选的一级菜单盒子查找失败")
|
||||||
|
|
||||||
|
|
||||||
|
# 设置公司行业
|
||||||
|
current_industry_site = 0
|
||||||
|
while current_industry_site < len(self.esumes[current_name]["industry"]):
|
||||||
|
try:
|
||||||
|
level_ones_dict = get_level_ones()
|
||||||
|
if level_ones_dict is None: return
|
||||||
|
is_select = False
|
||||||
|
level_one_box = level_ones_dict["公司行业"]["father"]
|
||||||
|
level_two_box = level_one_box.find_element(
|
||||||
|
By.CLASS_NAME, "filter-select-dropdown"
|
||||||
|
)
|
||||||
|
if level_two_box is None:
|
||||||
|
Logger.error("公司行业的二级菜单盒子查找失败")
|
||||||
|
level_twos = level_two_box.find_elements(By.TAG_NAME, "a")
|
||||||
|
if level_twos is None:
|
||||||
|
Logger.error("公司行业的二级菜单选项查找失败")
|
||||||
|
while not is_select and current_industry_site < len(
|
||||||
|
self.esumes[current_name]["industry"]
|
||||||
|
):
|
||||||
|
for item in level_twos:
|
||||||
|
tag_text = item.get_attribute("innerText")
|
||||||
|
if (
|
||||||
|
tag_text
|
||||||
|
== self.esumes[current_name]["industry"][
|
||||||
|
current_industry_site
|
||||||
|
]
|
||||||
|
):
|
||||||
|
core.human_move(level_ones_dict["公司行业"]["self"])
|
||||||
|
core.human_click(item)
|
||||||
|
is_select = True
|
||||||
|
WebDriverWait(self.driver, 10).until(EC.staleness_of(item))
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
Logger.error(
|
||||||
|
"设置公司行业失败",
|
||||||
|
e,
|
||||||
|
{
|
||||||
|
"industry": self.esumes[current_name]["industry"][
|
||||||
|
current_industry_site
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
current_industry_site += 1
|
||||||
|
|
||||||
|
|
||||||
|
Logger.info("设置公司行业完成")
|
||||||
|
time.sleep(1000)
|
||||||
|
|
||||||
|
|
||||||
|
core = BossCore()
|
||||||
|
|
||||||
|
# 配置日志输出
|
||||||
|
Logger.enable_log_save(core.output_path)
|
||||||
|
|
||||||
|
# 首次打开页面
|
||||||
|
core.request_await()
|
||||||
|
core.driver.get(core.get_url())
|
||||||
|
core.page_load_await()
|
||||||
|
# 添加cookies
|
||||||
|
# core.request_await()
|
||||||
|
# core.add_cookies()
|
||||||
|
# core.page_load_await()
|
||||||
|
# 投递
|
||||||
|
core.send()
|
||||||
|
|
||||||
|
core.driver.close()
|
18
consts.py
Normal file
18
consts.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from base import Info
|
||||||
|
|
||||||
|
Info.add_profile(
|
||||||
|
"custom_profile",
|
||||||
|
{
|
||||||
|
"keywords": ["全栈开发", "Rust", "React", "爬虫", "Linux"],
|
||||||
|
"experience": ["3-5年", "5-10年"], # 根据您的经验调整
|
||||||
|
"salary": "20-50K", # 根据您的期望调整
|
||||||
|
"sayHi": (
|
||||||
|
"您好,我是一名全栈开发工程师,精通React和Rust技术栈。在前端方面熟悉React、Dioxus并结合"
|
||||||
|
"Astro,Vite.js和Tailwind CSS进行现代响应式前端开发;后端方面熟悉Rust、Node.js,擅长使用Rocket和"
|
||||||
|
"Express框架。此外,我具备数据库管理、容器化部署、网络通信等多方面技能,有echoes和echoer等"
|
||||||
|
"项目经验。希望能有机会与您进一步交流,谢谢!"
|
||||||
|
),
|
||||||
|
"degree": ["本科", "硕士"], # 根据您的学历调整
|
||||||
|
"industry": ["互联网", "计算机软件", "计算机服务"], # 根据您的目标行业调整
|
||||||
|
},
|
||||||
|
)
|
13
test.py
Normal file
13
test.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
import base
|
||||||
|
import time
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
|
test = base.Core("test", "https://www.baidu.com", "/login", 100)
|
||||||
|
|
||||||
|
test.driver.get(test.get_url())
|
||||||
|
test.page_load_await()
|
||||||
|
submit = test.driver.find_element(By.ID, "su")
|
||||||
|
print(submit.text)
|
||||||
|
test.human_move(submit)
|
||||||
|
|
||||||
|
time.sleep(1000)
|
Loading…
Reference in New Issue
Block a user