# -*- codeing = utf-8 -*-
# @Time: 2022/4/8 0:02
# @Software: PyCharm
# @File: wallhavenBiZhi.py
import requests
from lxml import etree
import time
import random
class BZ():
# 实例化etree
def tree(self,e):
return etree.HTML(e)
#获取到图片的content
def getBZ(self,url):
# 翻页10页
for page in range(1,10):
headers = {
# 'referer': 'https://wallhaven.cc/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36',
}
print(time.strftime("%H:%M:%S"))
#随机爬取第1到50页
# page=random.randint(1,50)
print("第{}页".format(page))
url1 = url.format(page)
# 一级页面请求
html = requests.get(url=url1, headers=headers, timeout=5.0).text
data = self.tree(html)
li_list = data.xpath('.//div[@id="thumbs"]//li')
# print(li_list)
# 每一张图片的url地址
for li in li_list:
img = li.xpath('.//img[@class="lazyload"]/@data-src')
if img:
img=img[0]
img_end=img[-10:]
tupian_url='https://w.wallhaven.cc/full/'+img[-10:-8]+'/wallhaven-'+img_end
tupian = requests.get(url=tupian_url, headers=headers, timeout=5.0)
if tupian.status_code == 404:
# print(img_end)
img_end = img_end[:-3] + 'png'
tupian_url = 'https://w.wallhaven.cc/full/' + img[-10:-8] + '/wallhaven-' + img_end
tupian = requests.get(url=tupian_url, headers=headers, timeout=5.0)
content_picture = tupian.content
self.save_picture(img_end,content_picture)
print(tupian_url)
# 保存图片
def save_picture(self,img_end,content_picture):
# 保存路径
with open('C:/Users/19873/Pictures/bizhi/' + img_end, 'wb') as file:
file.write(content_picture)
print('保存完成' + time.strftime("%H:%M:%S"))
if __name__ == '__main__':
url='https://wallhaven.cc/hot?page={}'
bz=BZ()
bz.getBZ(url)python教程
Python爬取wallhaven壁纸脚本
python教程
51源码
2022-11-01
共人阅读
下一篇: 返回列表
热门推荐
-
01利用Python开发个简单版的磁力搜索工具源码 0
-
02批量转存百度网盘分享资源Python脚本 0
-
03Python爬取豆瓣电影top250排行榜 0
-
04微信小程序羊了个羊python全自动刷榜脚本方法 0
-
05羊了个羊刷通关次数python版本 0
