from bs4 import BeautifulSoup
import requests
import os
import shutil
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8",
"Connection": "close",
"Cookie": "_gauges_unique_hour=1; _gauges_unique_day=1; _gauges_unique_month=1; _gauges_unique_year=1; _gauges_unique=1",
"Referer": "http://www.infoq.com",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER"
}
# url = 'https://www.infoq.com/presentations/'
def download_jpg(imageurl,image_location_path):
resopnse = requests.get(imageurl,stream=True)
if resopnse.status_code == 200:
with open(image_location_path,'wb') as f:
resopnse.raw.deconde_content = True
shutil.copyfileobj(resopnse.raw,f)
def craw3(url):
response = requests.get(url,headers=headers)
soup = BeautifulSoup(response.text,'lxml')
isExists = os.path.exists('./download_pic')
if not isExists:
os.mkdir('./download_pic')
for pic_href in soup.find_all('div',class_ = 'card__content'):
print(pic_href.find_all('img'))
for pic in pic_href.find_all('img'):
imgurl = pic.get('src')
dir = os.path.abspath('./download_pic')
filename = os.path.basename(imgurl)
imgpath = os.path.join(dir,filename)
print('开始下载 %s' %imgurl)
download_jpg(imgurl,imgpath)
for i in range(12, 37, 12):
url = 'http://www.infoq.com/cn/presentations' + str(i)
craw3(url)