From acd4b7ef36e37fd7f9878182aeba826f332ba235 Mon Sep 17 00:00:00 2001 From: lsy2246 Date: Mon, 27 May 2024 15:17:26 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=B7=E6=AD=8C=E5=9C=B0=E5=9B=BE=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E6=8F=90=E5=8F=96=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/spider/info/google_map/all_start.py | 18 ++++ python/spider/info/google_map/panel.py | 61 +++++++++++++ python/spider/info/google_map/script.py | 99 ++++++++++++++++++++++ 3 files changed, 178 insertions(+) create mode 100644 python/spider/info/google_map/all_start.py create mode 100644 python/spider/info/google_map/panel.py create mode 100644 python/spider/info/google_map/script.py diff --git a/python/spider/info/google_map/all_start.py b/python/spider/info/google_map/all_start.py new file mode 100644 index 0000000..c556d5f --- /dev/null +++ b/python/spider/info/google_map/all_start.py @@ -0,0 +1,18 @@ +from script import * +from panel import * + +class full_panel(reptile_Panel): + def __init__(self): + super().__init__() + + def submit_info(self, url, head, frequency): + google_map = google_map_script(url, head, frequency) + wx.MessageBox('开始提取', '提示', wx.OK | wx.ICON_INFORMATION) + google_map.start() + wx.MessageBox('提取完成', '提示', wx.OK | wx.ICON_INFORMATION) + + +app = wx.App() +full_panel().Show() +app.MainLoop() + diff --git a/python/spider/info/google_map/panel.py b/python/spider/info/google_map/panel.py new file mode 100644 index 0000000..31737d6 --- /dev/null +++ b/python/spider/info/google_map/panel.py @@ -0,0 +1,61 @@ +import wx +import re + + +class reptile_Panel(wx.Frame): + def __init__(self): + super().__init__(None, title="谷歌地图信息提取器", size=(300, 200)) + + main_panel = wx.Panel(self) + main_box = wx.BoxSizer(wx.VERTICAL) + + # URL区域 + url_box = wx.BoxSizer(wx.HORIZONTAL) + url_text = wx.StaticText(main_panel, label="url:") + url_box.Add(url_text, 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5) + self.url_input = wx.TextCtrl(main_panel) + url_box.Add(self.url_input, 1, wx.EXPAND) + main_box.Add(url_box, 0, wx.EXPAND | wx.ALL, 5) + + # 可视化切换 + view_box = wx.BoxSizer(wx.HORIZONTAL) + view_text = wx.StaticText(main_panel, label="是否开启过程可视化:") + view_box.Add(view_text, 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5) + self.view_button_yes = wx.RadioButton(main_panel, label="是", style=wx.RB_GROUP) + view_box.Add(self.view_button_yes, 0, wx.ALL, 5) + self.view_button_no = wx.RadioButton(main_panel, label="否") + view_box.Add(self.view_button_no, 0, wx.ALL, 5) + main_box.Add(view_box, 0, wx.EXPAND | wx.ALL, 5) + + # 范围选择 + range_box = wx.BoxSizer(wx.HORIZONTAL) + range_text = wx.StaticText(main_panel, label="商家范围(数字越大,范围越大):") + range_box.Add(range_text, 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5) + self.range_input = wx.SpinCtrl(main_panel, value='0', min=0, max=100) + range_box.Add(self.range_input, 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5) + main_box.Add(range_box, 0, wx.EXPAND | wx.ALL, 5) + + # 提交按钮 + submit_button = wx.Button(main_panel, size=(10, 40), label='提取') + main_box.Add(submit_button, 1, wx.ALL | wx.EXPAND, 5) + submit_button.Bind(wx.EVT_BUTTON, self.submit_function) + + # 设置主面板的布局器并显示窗口 + main_panel.SetSizer(main_box) + self.Show() + + def submit_function(self, event): + url = self.url_input.GetValue() + url_pattern = re.compile(r'^https://www\.google\.com.*?&entry=ttu$') + if not re.fullmatch(url_pattern, url): + wx.MessageBox('url填写错误', '提示', wx.OK | wx.ICON_INFORMATION) + return + if self.view_button_yes.GetValue(): + head = True + elif self.view_button_no.GetValue(): + head = False + frequency = int(self.range_input.GetValue()) + self.submit_info(url, head, frequency) + + def submit_info(self, url, head, frequency): + pass diff --git a/python/spider/info/google_map/script.py b/python/spider/info/google_map/script.py new file mode 100644 index 0000000..deb4f39 --- /dev/null +++ b/python/spider/info/google_map/script.py @@ -0,0 +1,99 @@ +from selenium import webdriver # 驱动 +from selenium.webdriver.common.by import By # 解析方式 +from selenium.webdriver import Keys # 模拟按键 +from selenium.webdriver.common.action_chains import ActionChains +import time +import csv +import datetime +# 开启无头浏览器 +from selenium.webdriver.chrome.options import Options + + +class google_map_script: + def __init__(self, url, head, frequency): + self.url = url + self.head = head + self.frequency = frequency + + def start(self): + if self.head: + driver = webdriver.Chrome() + else: + options = Options() + options.add_argument("--headless") + options.add_argument("--disable-gpu") + driver = webdriver.Chrome(options=options) # 创建浏览器对象 + self.work(driver) + + def work(self, driver): + driver.get(self.url) + # 获取当前日期和时间 + now = datetime.datetime.now() + formatted_date = now.strftime('%Y-%m-%d_%H-%M-%S') # 格式化日期和时间 + + # 指定CSV文件路径,文件名为当前时间 + file_path = f'google_map_{formatted_date}.csv' + + driver.get(self.url) + time.sleep(3) + scrollable_element = driver.find_element(By.XPATH, + '//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]') + + for i in range(int(self.frequency)): + scrollable_element.send_keys(Keys.PAGE_DOWN) + time.sleep(1) + + divs = driver.find_elements(By.XPATH, + '//div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]/div[not(@class)]') + for div in divs: + info_dict = {"店名": "", "地址": "", "电话": "", "网站": "", "Plus Code": "", "星期一": "", "星期二": "", + "星期三": "", + "星期四": "", "星期五": "", "星期六": "", "星期日": ""} + info_content = [] + div.click() + time.sleep(1) + info_path_1 = "//div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[7]" + info_path_2 = "//div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[8]" + title = driver.find_element(By.XPATH, + '//div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[2]/div/div[1]/div[1]/h1').text + info_dict["店名"] = title + info_divs = driver.find_element(By.XPATH, info_path_1) + + role_test = info_divs.get_attribute("role") + if role_test == "presentation": + info_divs = driver.find_element(By.XPATH, info_path_2) + + a_s = info_divs.find_elements(By.XPATH, './/a') + button_s = info_divs.find_elements(By.XPATH, './/button') + + for a in a_s: + a_info = a.get_attribute("aria-label") + if a is not None: + info_content.append(a_info) + + for button in button_s: + button_info = button.get_attribute("aria-label") + if button_info is not None: + info_content.append(button_info) + + for info in info_content: + tmp_content = info.split(":", 1) + if tmp_content[0] in ["地址", "电话", "Plus Code", "网站"]: + info_dict[tmp_content[0]] = tmp_content[1].strip() + tmp_content = info.split(",", 1) + if tmp_content[0] in ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]: + tmp_time = tmp_content[1].split("、", -1) + info_dict[tmp_content[0]] = tmp_time[0].strip() + + with open(file_path, mode='a', newline='', encoding='utf-8-sig') as file: + # CSV写入器 + writer = csv.DictWriter(file, fieldnames=info_dict.keys()) + + # 写入CSV标题,仅在文件为空时写入 + if file.tell() == 0: + writer.writeheader() + + # 写入数据 + writer.writerow(info_dict) + driver.quit() +