谷歌地图信息提取器
This commit is contained in:
parent
2d49b66911
commit
acd4b7ef36
18
python/spider/info/google_map/all_start.py
Normal file
18
python/spider/info/google_map/all_start.py
Normal file
@ -0,0 +1,18 @@
|
||||
from script import *
|
||||
from panel import *
|
||||
|
||||
class full_panel(reptile_Panel):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def submit_info(self, url, head, frequency):
|
||||
google_map = google_map_script(url, head, frequency)
|
||||
wx.MessageBox('开始提取', '提示', wx.OK | wx.ICON_INFORMATION)
|
||||
google_map.start()
|
||||
wx.MessageBox('提取完成', '提示', wx.OK | wx.ICON_INFORMATION)
|
||||
|
||||
|
||||
app = wx.App()
|
||||
full_panel().Show()
|
||||
app.MainLoop()
|
||||
|
61
python/spider/info/google_map/panel.py
Normal file
61
python/spider/info/google_map/panel.py
Normal file
@ -0,0 +1,61 @@
|
||||
import wx
|
||||
import re
|
||||
|
||||
|
||||
class reptile_Panel(wx.Frame):
|
||||
def __init__(self):
|
||||
super().__init__(None, title="谷歌地图信息提取器", size=(300, 200))
|
||||
|
||||
main_panel = wx.Panel(self)
|
||||
main_box = wx.BoxSizer(wx.VERTICAL)
|
||||
|
||||
# URL区域
|
||||
url_box = wx.BoxSizer(wx.HORIZONTAL)
|
||||
url_text = wx.StaticText(main_panel, label="url:")
|
||||
url_box.Add(url_text, 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
|
||||
self.url_input = wx.TextCtrl(main_panel)
|
||||
url_box.Add(self.url_input, 1, wx.EXPAND)
|
||||
main_box.Add(url_box, 0, wx.EXPAND | wx.ALL, 5)
|
||||
|
||||
# 可视化切换
|
||||
view_box = wx.BoxSizer(wx.HORIZONTAL)
|
||||
view_text = wx.StaticText(main_panel, label="是否开启过程可视化:")
|
||||
view_box.Add(view_text, 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
|
||||
self.view_button_yes = wx.RadioButton(main_panel, label="是", style=wx.RB_GROUP)
|
||||
view_box.Add(self.view_button_yes, 0, wx.ALL, 5)
|
||||
self.view_button_no = wx.RadioButton(main_panel, label="否")
|
||||
view_box.Add(self.view_button_no, 0, wx.ALL, 5)
|
||||
main_box.Add(view_box, 0, wx.EXPAND | wx.ALL, 5)
|
||||
|
||||
# 范围选择
|
||||
range_box = wx.BoxSizer(wx.HORIZONTAL)
|
||||
range_text = wx.StaticText(main_panel, label="商家范围(数字越大,范围越大):")
|
||||
range_box.Add(range_text, 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
|
||||
self.range_input = wx.SpinCtrl(main_panel, value='0', min=0, max=100)
|
||||
range_box.Add(self.range_input, 0, wx.ALIGN_CENTER_VERTICAL | wx.RIGHT, 5)
|
||||
main_box.Add(range_box, 0, wx.EXPAND | wx.ALL, 5)
|
||||
|
||||
# 提交按钮
|
||||
submit_button = wx.Button(main_panel, size=(10, 40), label='提取')
|
||||
main_box.Add(submit_button, 1, wx.ALL | wx.EXPAND, 5)
|
||||
submit_button.Bind(wx.EVT_BUTTON, self.submit_function)
|
||||
|
||||
# 设置主面板的布局器并显示窗口
|
||||
main_panel.SetSizer(main_box)
|
||||
self.Show()
|
||||
|
||||
def submit_function(self, event):
|
||||
url = self.url_input.GetValue()
|
||||
url_pattern = re.compile(r'^https://www\.google\.com.*?&entry=ttu$')
|
||||
if not re.fullmatch(url_pattern, url):
|
||||
wx.MessageBox('url填写错误', '提示', wx.OK | wx.ICON_INFORMATION)
|
||||
return
|
||||
if self.view_button_yes.GetValue():
|
||||
head = True
|
||||
elif self.view_button_no.GetValue():
|
||||
head = False
|
||||
frequency = int(self.range_input.GetValue())
|
||||
self.submit_info(url, head, frequency)
|
||||
|
||||
def submit_info(self, url, head, frequency):
|
||||
pass
|
99
python/spider/info/google_map/script.py
Normal file
99
python/spider/info/google_map/script.py
Normal file
@ -0,0 +1,99 @@
|
||||
from selenium import webdriver # 驱动
|
||||
from selenium.webdriver.common.by import By # 解析方式
|
||||
from selenium.webdriver import Keys # 模拟按键
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
import time
|
||||
import csv
|
||||
import datetime
|
||||
# 开启无头浏览器
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
|
||||
|
||||
class google_map_script:
|
||||
def __init__(self, url, head, frequency):
|
||||
self.url = url
|
||||
self.head = head
|
||||
self.frequency = frequency
|
||||
|
||||
def start(self):
|
||||
if self.head:
|
||||
driver = webdriver.Chrome()
|
||||
else:
|
||||
options = Options()
|
||||
options.add_argument("--headless")
|
||||
options.add_argument("--disable-gpu")
|
||||
driver = webdriver.Chrome(options=options) # 创建浏览器对象
|
||||
self.work(driver)
|
||||
|
||||
def work(self, driver):
|
||||
driver.get(self.url)
|
||||
# 获取当前日期和时间
|
||||
now = datetime.datetime.now()
|
||||
formatted_date = now.strftime('%Y-%m-%d_%H-%M-%S') # 格式化日期和时间
|
||||
|
||||
# 指定CSV文件路径,文件名为当前时间
|
||||
file_path = f'google_map_{formatted_date}.csv'
|
||||
|
||||
driver.get(self.url)
|
||||
time.sleep(3)
|
||||
scrollable_element = driver.find_element(By.XPATH,
|
||||
'//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]')
|
||||
|
||||
for i in range(int(self.frequency)):
|
||||
scrollable_element.send_keys(Keys.PAGE_DOWN)
|
||||
time.sleep(1)
|
||||
|
||||
divs = driver.find_elements(By.XPATH,
|
||||
'//div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]/div[not(@class)]')
|
||||
for div in divs:
|
||||
info_dict = {"店名": "", "地址": "", "电话": "", "网站": "", "Plus Code": "", "星期一": "", "星期二": "",
|
||||
"星期三": "",
|
||||
"星期四": "", "星期五": "", "星期六": "", "星期日": ""}
|
||||
info_content = []
|
||||
div.click()
|
||||
time.sleep(1)
|
||||
info_path_1 = "//div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[7]"
|
||||
info_path_2 = "//div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[8]"
|
||||
title = driver.find_element(By.XPATH,
|
||||
'//div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[2]/div/div[1]/div[1]/h1').text
|
||||
info_dict["店名"] = title
|
||||
info_divs = driver.find_element(By.XPATH, info_path_1)
|
||||
|
||||
role_test = info_divs.get_attribute("role")
|
||||
if role_test == "presentation":
|
||||
info_divs = driver.find_element(By.XPATH, info_path_2)
|
||||
|
||||
a_s = info_divs.find_elements(By.XPATH, './/a')
|
||||
button_s = info_divs.find_elements(By.XPATH, './/button')
|
||||
|
||||
for a in a_s:
|
||||
a_info = a.get_attribute("aria-label")
|
||||
if a is not None:
|
||||
info_content.append(a_info)
|
||||
|
||||
for button in button_s:
|
||||
button_info = button.get_attribute("aria-label")
|
||||
if button_info is not None:
|
||||
info_content.append(button_info)
|
||||
|
||||
for info in info_content:
|
||||
tmp_content = info.split(":", 1)
|
||||
if tmp_content[0] in ["地址", "电话", "Plus Code", "网站"]:
|
||||
info_dict[tmp_content[0]] = tmp_content[1].strip()
|
||||
tmp_content = info.split(",", 1)
|
||||
if tmp_content[0] in ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]:
|
||||
tmp_time = tmp_content[1].split("、", -1)
|
||||
info_dict[tmp_content[0]] = tmp_time[0].strip()
|
||||
|
||||
with open(file_path, mode='a', newline='', encoding='utf-8-sig') as file:
|
||||
# CSV写入器
|
||||
writer = csv.DictWriter(file, fieldnames=info_dict.keys())
|
||||
|
||||
# 写入CSV标题,仅在文件为空时写入
|
||||
if file.tell() == 0:
|
||||
writer.writeheader()
|
||||
|
||||
# 写入数据
|
||||
writer.writerow(info_dict)
|
||||
driver.quit()
|
||||
|
Loading…
Reference in New Issue
Block a user