谷歌地图爬虫雏形
This commit is contained in:
parent
ef1ef4f46a
commit
2e1a69fa98
70
python/test/main.py
Normal file
70
python/test/main.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
from selenium import webdriver # 驱动
|
||||||
|
from selenium.webdriver.common.by import By # 解析方式
|
||||||
|
from selenium.webdriver import Keys # 模拟按键
|
||||||
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
# 开启无头浏览器
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
|
||||||
|
options = Options()
|
||||||
|
options.add_argument("--headless")
|
||||||
|
options.add_argument("--disable-gpu")
|
||||||
|
|
||||||
|
driver = webdriver.Chrome() # 创建浏览器对象
|
||||||
|
|
||||||
|
driver.get(
|
||||||
|
"https://www.google.com/maps/search/%E6%B1%BD%E4%BF%AE/@22.2354762,113.7531542,11z/data=!4m6!2m5!3m4!2s22.2492,+113.8819!4m2!1d113.8819137!2d22.2492291?entry=ttu")
|
||||||
|
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
scrollable_element = driver.find_element(By.XPATH,
|
||||||
|
'//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]')
|
||||||
|
|
||||||
|
for i in range(10):
|
||||||
|
scrollable_element.send_keys(Keys.PAGE_DOWN)
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
divs = driver.find_elements(By.XPATH,
|
||||||
|
'//div/div/div[1]/div[2]/div/div[1]/div/div/div[1]/div[1]/div[not(@class)]')
|
||||||
|
for div in divs:
|
||||||
|
info_dict = {"店名": "", "地址": "", "电话": "", "网站": "", "Plus Code": "", "星期一": "", "星期二": "", "星期三": "",
|
||||||
|
"星期四": "", "星期五": "", "星期六": "", "星期日": ""}
|
||||||
|
info_content = []
|
||||||
|
div.click()
|
||||||
|
time.sleep(1)
|
||||||
|
info_path_1 = "//div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[7]"
|
||||||
|
info_path_2 = "//div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[8]"
|
||||||
|
title = driver.find_element(By.XPATH,
|
||||||
|
'//div/div/div[1]/div[3]/div/div[1]/div/div/div[2]/div[2]/div/div[1]/div[1]/h1').text
|
||||||
|
info_dict["店名"] = title
|
||||||
|
info_divs = driver.find_element(By.XPATH, info_path_1)
|
||||||
|
|
||||||
|
role_test = info_divs.get_attribute("role")
|
||||||
|
if role_test == "presentation":
|
||||||
|
info_divs = driver.find_element(By.XPATH, info_path_2)
|
||||||
|
|
||||||
|
a_s = info_divs.find_elements(By.XPATH, './/a')
|
||||||
|
button_s = info_divs.find_elements(By.XPATH, './/button')
|
||||||
|
|
||||||
|
for a in a_s:
|
||||||
|
a_info = a.get_attribute("aria-label")
|
||||||
|
if a is not None:
|
||||||
|
info_content.append(a_info)
|
||||||
|
|
||||||
|
for button in button_s:
|
||||||
|
button_info = button.get_attribute("aria-label")
|
||||||
|
if button_info is not None:
|
||||||
|
info_content.append(button_info)
|
||||||
|
|
||||||
|
for info in info_content:
|
||||||
|
tmp_content = info.split(":", 1)
|
||||||
|
if tmp_content[0] in ["地址", "电话", "Plus Code", "网站"]:
|
||||||
|
info_dict[tmp_content[0]] = tmp_content[1].strip()
|
||||||
|
tmp_content = info.split(",", 1)
|
||||||
|
if tmp_content[0] in ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]:
|
||||||
|
tmp_time = tmp_content[1].split("、", -1)
|
||||||
|
info_dict[tmp_content[0]] = tmp_time[0].strip()
|
||||||
|
|
||||||
|
print(info_dict)
|
Loading…
Reference in New Issue
Block a user