python 只下载福吧汇总第三页的美女图片

from concurrent.futures import ThreadPoolExecutor

import requests

from lxml import etree

# 获取页面源代码

def get_source(url):

headers = {

‘user-agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36’

}

resp = requests.get(url=url, headers=headers)

resp.encoding = ‘utf-8’

resp.close()

return resp.text

# 源代码解析 要求只获取jpg格式的链接

def parse_source(html):

jpg_urls = []

tree = etree.HTML(html)

imgs_url = tree.xpath(“/html/body/section/div[1]/div/article/p//img/@src”)

for img_url in imgs_url:

if img_url.split(‘.’)[-1] == ‘jpg’:

jpg_urls.append(img_url)

return jpg_urls

# 下载jpg图片

def download(jpg_url):

resp = requests.get(url=jpg_url)

down_name = jpg_url.split(‘/’)[-1]

with open(‘D:/jpg_download/’ + down_name, mode=’wb’) as f:

f.write(resp.content)

print(down_name, ‘下载完毕…’)

# 主程序,线程池下载

def main():

url = ‘https://fuliba2021.net/2021177.html/3’

html = get_source(url)

jpg_urls = parse_source(html)

with ThreadPoolExecutor(20) as t:

for jpg_url in jpg_urls:

t.submit(download, jpg_url)

if __name__ == ‘__main__’:

main()

复制代码最近又学了线程池,于是汇总一下自己所学到的爬虫知识,写了一个批量下载2022第一期福利汇总第三页的图片,最近的三次元图片太给力啦,美胸美腿都有

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注