import requests
from threading import Thread
from lxml import etree
import re


def get_html(url):
    resp = requests.get(url)
    resp.close()
    html = etree.HTML(resp.text)
    title = html.xpath('/html/body/div[2]/div/main/div[1]/div/h1/text()')
    content = html.xpath('/html/body/div[2]/div/main/article/div')
    title = ''.join(list(title[0]))
    file = open(f"wz\\{title}.txt", "a+")
    file.write(f"URL : {url}\n")
    ## 获取内容
    for i in content:
        text = i.xpath('.//text()')
        for word in text:
            word_encode = word.encode('utf-8')
            file.write(word)
    file.close()


for index in range(1, 4):
    domain = f"https://wyc21.com/index.php/page/{index}"
    resp = requests.get(domain)
    resp.close()
    html = etree.HTML(resp.text)
    dev = html.xpath("/html/body/div[2]/div/main/div[1]/div")
    urls = []
    for item in dev:
        url = item.xpath("./div[2]/div[1]/a/@href")
        urls.append(url[0])

    for url in urls:
        get_html(url)