diff --git a/python/spider/info/weather/tianqi/spiders/__init__.py b/python/spider/info/weather/tianqi/spiders/__init__.py deleted file mode 100644 index ebd689a..0000000 --- a/python/spider/info/weather/tianqi/spiders/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# This package will contain the spiders of your Scrapy project -# -# Please refer to the documentation for information on how to create and manage -# your spiders. diff --git a/python/spider/info/weather/tianqi/spiders/weather.py b/python/spider/info/weather/tianqi/spiders/weather.py deleted file mode 100644 index 51076bc..0000000 --- a/python/spider/info/weather/tianqi/spiders/weather.py +++ /dev/null @@ -1,76 +0,0 @@ -import scrapy -from urllib.parse import urljoin -from ..items import WeatherItem - -class WeatherSpider(scrapy.Spider): - name = "weather" - allowed_domains = ["www.weather.com.cn"] - start_urls = ["http://www.weather.com.cn/textFC/hb.shtml"] - - def parse(self, response): - areas = response.xpath('/html/body/div[4]/div[2]/div/div/ul[1]/li') - for area in areas: - area_suffix = area.xpath('.//a/@href').extract_first() - area_url = urljoin(response.url, area_suffix) - yield scrapy.Request(area_url, callback=self.each_page) - - def each_page(self, response): - all_days_info = response.xpath("//div[@class='hanml']/div[not(@style='display: none;')]//table") - region = response.xpath('//title/text()').get().replace('天气预报', '') - print(f"正在爬取{region}") - for days_info in all_days_info: - info_trs = days_info.css('tr') - - data_time = info_trs[0].xpath(".//td[3]/text()").extract_first().replace('白天', '').replace('夜间', '') - provincial = info_trs[2].xpath(".//td[1]/a/text()").extract_first() - - loop_count = 1 - for info_tr in info_trs: - loop_count += 1 - if loop_count < 4: - continue - if loop_count == 4: - city = info_tr.xpath(".//td[2]/a/text()").extract_first() - - weather_1 = info_tr.xpath(".//td[3]/text()").extract_first() - wind_direction_1 = info_tr.xpath(".//td[4]/span[1]/text()").extract_first() - wind_power_1 = info_tr.xpath(".//td[4]/span[2]/text()").extract_first() - temperature_1 = (info_tr.xpath(".//td[5]/text()").extract_first()) - - weather_2 = info_tr.xpath(".//td[6]/text()").extract_first() - wind_direction_2 = info_tr.xpath(".//td[7]/span[1]/text()").extract_first() - wind_power_2 = info_tr.xpath(".//td[7]/span[2]/text()").extract_first() - temperature_2 = (info_tr.xpath(".//td[8]/text()").extract_first()) - else: - city = info_tr.xpath(".//td[1]/a/text()").extract_first() - - weather_1 = info_tr.xpath(".//td[2]/text()").extract_first() - wind_direction_1 = info_tr.xpath(".//td[3]/span[1]/text()").extract_first() - wind_power_1 = info_tr.xpath(".//td[3]/span[2]/text()").extract_first() - temperature_1 = (info_tr.xpath(".//td[4]/text()").extract_first()) - - weather_2 = info_tr.xpath(".//td[5]/text()").extract_first() - wind_direction_2 = info_tr.xpath(".//td[6]/span[1]/text()").extract_first() - wind_power_2 = info_tr.xpath(".//td[6]/span[2]/text()").extract_first() - temperature_2 = (info_tr.xpath(".//td[7]/text()").extract_first()) - weather_item = WeatherItem() - weather_item['region'] = region - weather_item['provincial'] = provincial - weather_item['data_time'] = data_time - weather_item['city'] = city - weather_item['weather_1'] = weather_1 - weather_item['weather_2'] = weather_2 - weather_item['wind_direction_1'] = wind_direction_1 - weather_item['wind_power_1'] = wind_power_1 - weather_item['temperature_1'] = temperature_1 - weather_item['weather_2'] = weather_2 - weather_item['wind_direction_2'] = wind_direction_2 - weather_item['wind_power_2'] = wind_power_2 - weather_item['temperature_2'] = temperature_2 - yield weather_item - - - - - - diff --git a/python/spider/info/weather/tianqi/weather.xlsx b/python/spider/info/weather/tianqi/weather.xlsx deleted file mode 100644 index abda57a..0000000 Binary files a/python/spider/info/weather/tianqi/weather.xlsx and /dev/null differ diff --git a/python/spider/info/weather/tianqi/~$weather.xlsx b/python/spider/info/weather/tianqi/~$weather.xlsx deleted file mode 100644 index 1e0c8b3..0000000 Binary files a/python/spider/info/weather/tianqi/~$weather.xlsx and /dev/null differ diff --git a/python/spider/info/weather/weather_matplotlib.py b/python/spider/info/weather/weather_matplotlib.py new file mode 100644 index 0000000..f1921ec --- /dev/null +++ b/python/spider/info/weather/weather_matplotlib.py @@ -0,0 +1,60 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt + +_data = {} +file_path = 'weather.xlsx' +_xls = pd.ExcelFile(file_path, engine='openpyxl') +for sheet_name in _xls.sheet_names: + _area = sheet_name + if _area not in _data: + _data[_area] = {} + _xls_content = pd.read_excel(file_path, _area) + for _index, _single in _xls_content.iterrows(): + _data_time = _single['日期时间'] + if _data_time not in _data[_area]: + _data[_area][_data_time] = {'白天温度': [], '夜间温度': []} + _data_day = _single['白天温度'] + _data_night = _single['夜间温度'] + _data[_area][_data_time]['白天温度'].append(_data_day) + _data[_area][_data_time]['夜间温度'].append(_data_night) + +last_dick = {} +for _area, _date_time in _data.items(): + last_dick[_area] = {"时间": [], "白天": [], "夜间": []} + for _data_single_time, _data_single in _date_time.items(): + if _data_single_time not in last_dick[_area]['时间']: + last_dick[_area]['时间'].append(_data_single_time) + np_day = np.array(_data_single['白天温度']).mean() + np_night = np.array(_data_single['夜间温度']).mean() + last_dick[_area]['白天'].append(float(np_day)) + last_dick[_area]['夜间'].append(float(np_night)) + +_fig, _axes = plt.subplots(nrows=len(last_dick), ncols=1, figsize=(10, 30)) +plt.rcParams['font.sans-serif'] = ['SimHei'] +if len(last_dick) == 1: + _axes = [_axes] + +for _ax, (_area, _data_info) in zip(_axes, last_dick.items()): + _data_s = _data_info['时间'] + _day_s = _data_info['白天'] + _night_s = _data_info['夜间'] + _ax.plot(_data_s, _day_s, label='白天', marker='o') + for _x, _y in zip(_data_s, _day_s): + _ax.annotate(f'{_y:.1f}°', (_x, _y), xytext=(5, 10), textcoords='offset points', ha='center') + for _x, _y in zip(_data_s, _night_s): + _ax.annotate(f'{_y:.1f}°', (_x, _y), xytext=(5, -15), textcoords='offset points', ha='center') + + _ax.plot(_data_s, _night_s, label='夜间', marker='o') + _ax.set_title(_area) + _ax.set_yticks(range(0, 45, 5)) + _ax.set_yticklabels([f"{i}°" for i in range(0, 45, 5)]) + _ax.grid(alpha=0.5, linestyle='--') + _ax.legend() + _ax.set_xlim([_data_s[0], _data_s[len(_data_s) - 1]]) + _ax.text(_ax.get_xlim()[0], _ax.get_ylim()[1]+0.2, f"最高温度{np.max(_day_s):.1f}°") + _ax.text(_ax.get_xlim()[0]+0.8, _ax.get_ylim()[1]+0.2, f"最低温度{np.min(_night_s):.1f}°") + _ax.text(_ax.get_xlim()[0]+1.6, _ax.get_ylim()[1]+0.2, f"平均温度{np.mean(_night_s):.1f}°") + +_fig.savefig('weather_matplotlib.svg', bbox_inches='tight', transparent=True) +plt.show()