本帖最后由 ivor 于 2016-3-9 20:51 编辑
回复 1# wzf1024
- # coding:utf-8
- # 功能:自动检测下载文章中的图片http://pmmp.cnki.net/Operation/Details.aspx?id=xxxx
-
- import bs4
- import urllib.request as url
- import re
- import os
-
- website = 'http://pmmp.cnki.net'
- link = 'http://pmmp.cnki.net/Operation/Details.aspx?id=0075'
- for j in range(10001,19999):
- try:
- page = link + str(j)[1:]
- print(page)
- response = url.urlopen(page)
- data = bs4.BeautifulSoup(response,'html.parser')
- if not os.path.exists(str(j)[1:]):
- os.mkdir(str(j)[1:])
- for i in data.findAll('img'):
- pic = i['src'][2:]
- sum = '%s%s' % (website,url.quote(url.unquote(pic)))
- jpg = url.urlopen(sum)
- with open('./' + str(j)[1:] + '/' + re.split('/', pic)[-1], 'w+b') as pic_file:
- pic_file.write(jpg.read())
- except:
- pass
复制代码
|