本帖最后由 ivor 于 2020-5-27 12:58 编辑
- #! /usr/bin/env python3
- # coding:utf-8
-
- from bs4 import BeautifulSoup
- import requests
- import re
- import os
-
- site = "http://www.ting89.com/books/68.html"
- domain = re.search(r'(https?)://.*?/', site).group(0)
-
- html = requests.get(site)
- soup = BeautifulSoup(html.content, "html.parser")
- chapter = soup.find("div", class_="compress").findAll("a")
- title = soup.find("div", class_="conlist").find('h1').text
-
- for i in chapter:
- chapter_html = requests.get(domain + i['href']).content.decode('gbk')
- mp3_site = re.search(r'(?<=datas=\(")https?.*\.mp3', chapter_html).group(0)
- title = re.search(r'(?<=/)[\u4E00-\u9FA5_\d]*?(?=/\d)', mp3_site).group(0)
- if not os.path.exists(title):
- os.mkdir(title)
- mp3_name = re.search(r"\d+(\(完\))?\.mp3", mp3_site).group(0)
- if os.path.exists('{0}/{1}'.format(title, mp3_name)):
- print('{0}/{1} exist. pass!!!'.format(title, mp3_name))
- continue
- print('{0}/{1} is downloading.'.format(title, mp3_name))
- success = True
- while success:
- try:
- mp3 = requests.get(mp3_site).content
- success = False
- except:
- success = True
- print("网络重置,继续尝试访问。")
- with open('{0}/{1}'.format(title, mp3_name), 'wb+') as file:
- file.write(mp3)
复制代码 跳过已下载章节,运行环境:python3,不是批处理!
结果演示:
三体/001.mp3 exist. pass!!!
三体/002.mp3 exist. pass!!!
三体/003.mp3 exist. pass!!!
三体/004.mp3 is downloading.
...... |