From 04d9de88e83d38c804b6940917e2ee3c7caaac86 Mon Sep 17 00:00:00 2001
From: lsy2246 <lsy200546@hotmail.com>
Date: Wed, 27 Mar 2024 18:37:21 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0wz?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 python/spider/blog/wz.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/python/spider/blog/wz.py b/python/spider/blog/wz.py
index e17cb9b..3603117 100644
--- a/python/spider/blog/wz.py
+++ b/python/spider/blog/wz.py
@@ -2,24 +2,29 @@ import requests
 from threading import Thread
 from lxml import etree
 import re
+import os
 
 
 def get_html(url):
+    if not os.path.exists("wz"):
+        os.mkdir("wz")
     resp = requests.get(url)
     resp.close()
     html = etree.HTML(resp.text)
     title = html.xpath('/html/body/div[2]/div/main/div[1]/div/h1/text()')
     content = html.xpath('/html/body/div[2]/div/main/article/div')
     title = ''.join(list(title[0]))
-    file = open(f"wz\\{title}.txt", "a+")
-    file.write(f"URL : {url}\n")
+    file = open(f"wz\\{title}.txt", "a+", errors='ignore')
+    URL = (f"URL : {url}\n")
+    file.write(URL)
+    print(f"正在抓取{title}")
     ## 获取内容
     for i in content:
         text = i.xpath('.//text()')
         for word in text:
-            word_encode = word.encode('utf-8')
             file.write(word)
     file.close()
+    print(f"{title}抓取成功")
 
 
 for index in range(1, 4):