python教程

小姐姐图片网站Python爬虫脚本分享

python教程 51源码 2023-05-08 人阅读

发现一个质量非常不错的高清小姐姐图片网站,当然要收藏起来,用了刚学的python爬虫非常简单,只用了多线程,没有作查重处理。图片保存在J:\xiezhen\文件夹下,可自行修改。

import time
import requests
from lxml import etree
import os
import concurrent.futures
 
def download_image(url, img_path):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    img_name = url.split('/')[-1]
    with open(os.path.join(img_path, img_name), 'wb') as f:
        f.write(response.content)
        print(f'图片:{img_path}' + '/' + f'{img_name}下载完成!')
 
def process_page(page):
    url = f'https://www.xiezhen.xyz/page/{page}'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    html = etree.HTML(response.content)
    mail_url = html.xpath('//div[@class="excerpts"]/article/a/@href')
    for url in mail_url:
        response = requests.get(url, headers=headers)
        html = etree.HTML(response.content)
        sub_url = html.xpath('//article/p/img')
        img_title = html.xpath('//title/text()')[0].split('-')[0]
        img_path = f'J:/xiezhen/{img_title}'
        if not os.path.exists(img_path):
            os.makedirs(img_path)
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = []
            for s_url in sub_url:
                img_url = s_url.attrib['src']
                futures.append(executor.submit(download_image, img_url, img_path))
            for future in concurrent.futures.as_completed(futures):
                pass
        time.sleep(0.5)
 
if __name__ == '__main__':
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = []
        for page in range(1, 573):
            futures.append(executor.submit(process_page, page))
        for future in concurrent.futures.as_completed(futures):
            pass


版权声明:文章搜集于网络,如有侵权请联系本站,转载请说明出处:https://www.51yma.cn/jiaocheng/python/1242.html
文章来源:
标签 python爬虫 
下一篇: 返回列表