python爬虫程序 weheartit.com美女头像图片下载

 Pala   2017-10-14 14:07   51 人阅读   条评论
import requests, urllib, time, random, re
from bs4 import BeautifulSoup
def get_page(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml')
    imgs = soup.select('img[width="300"]')
    # outputer_img = []
    for img in imgs:
        new_img =img.get('src')
        print(new_img)
        imgs_name = re.compile(r'/(\d+)/').search(new_img).group(1)
        # outputer_img.append(new_img)
        # img_name = re.match(new_img,'\d{9}')
        # print(img_name)
        urllib.request.urlretrieve(new_img,r'D:\python3\pratice\weheartit_imgas\{}.jpg.'.format(imgs_name))
        # 检查网址抓取是否正确
        # print(outputer_img)
def get_more_page(urls):
    for url in urls:
        get_page(url)
        time.sleep(random.randint(1,5))
urls =['http://weheartit.com/inspirations/taylorswift?page={}'.format(str(i)) for i in range(1,20)]
get_more_page(urls)


本文地址:http://chenxm.cc/post/377.html
版权声明:本文为原创文章,版权归 Pala 所有,欢迎分享本文,转载请保留出处!