标题: [原创代码] [Perl]P站视频下载 [打印本页]
作者: 523066680 时间: 2021-10-10 16:16 标题: [Perl]P站视频下载
P站大家懂的
用到的模块:
JE (Javascript解析库)
Mojolicious (网络)
使用方法,将指定视频网址末尾15位的keyword复制放到 @list 数组中。以及找到Agent的地址更新到proxy设置- use utf8;
- use Encode;
- use JE;
- use Modern::Perl;
- use File::Slurp;
- use Mojo::UserAgent;
- use Win32::Unicode::File;
- use JSON qw/from_json to_json/;
- STDOUT->autoflush(1);
-
- my $JE = new JE;
- my $ua = Mojo::UserAgent->new();
- init_ua();
-
- my %headers;
- init_headers(\%headers);
-
- # 这里放准备下载的 keyword 清单
- my @list = qw/
- ph60e6662d3221e
- ph5dc9ba7f1b...
- /;
-
- grep { getVideo($_) } @list;
-
- sub getVideo
- {
- my $viewkey = shift;
- my $url = "https://cn.pornhub.com/view_video.php?viewkey=${viewkey}";
-
- my $res = $ua->get( $url )->result;
- #print $html->body;
- my $js = $res->dom->at(".video-wrapper script")->all_text;
- my $id = $res->dom->at("#player")->attr("data-video-id");
- my $title = $res->dom->at("title")->text; #unicode
- $title =~s/\s+- Pornhub\.com//i;
- $title =~s/[\\\/:*?"<>|]/ /g; #替换部分windows文件名不支持的字符
- printf "%s %s\n", $id, gbk($title);
-
- my $file = "E:/迅雷下载/${viewkey} ${title}.mp4";
- if ( file_type( 'e' => $file ) ) {
- printf "%s: file already exists\n", $viewkey;
- return;
- }
-
- write_file("src.js", $js);
-
- $JE->eval( $js );
- my $x = $JE->value;
- # 第三个节点对应 720P,->value 转换为Perl字符串
- my $videolink = $x->{"qualityItems_${id}"}[2]{"url"}->value;
- say $videolink;
-
- my $tx = $ua->get( $videolink );
- print $tx->error ? "\nDownloading failed: ".$tx->error->{message} : "\nDownloading finished!\n";
- my $fh = Win32::Unicode::File->new("wb", $file);
- $fh->write( $tx->result->body );
- $fh->close();
- }
-
- sub init_headers
- {
- my $ref = shift;
- %$ref =
- (
- 'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng',
- 'accept-encoding' => 'gzip, deflate, br',
- 'accept-language' => 'zh-CN,zh;q=0.9,zh-TW;q=0.8',
- 'user-agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
- );
- }
-
- sub init_ua
- {
- $ua = $ua->max_redirects(5);
- $ua = $ua->connect_timeout(10);
-
- # Proxy server to use for HTTPS and WebSocket requests.
- # https://docs.mojolicious.org/Mojo/UserAgent/Proxy#https
- $ua->proxy->https("http://sri:secret\@127.0.0.1:10809")->http("http://sri:secret\@127.0.0.1:10809");
- # 代理的具体地址,在代理工具设置、菜单中寻找
-
- # 进度显示
- $ua->on(start => sub {
- my ($ua, $tx) = @_;
- $tx->req->once(finish => sub {
- $tx->res->on(progress => sub {
- state $prev = 0;
- my $msg = shift;
- return unless my $len = $msg->headers->content_length;
- my $size = $msg->content->progress;
- my $progress = int($size / ($len / 100));
- if ( $progress ne $prev ) {
- printf "%d%%\t", $progress;
- }
- $prev = $progress;
- });
- });
- });
- }
-
-
- sub gbk { encode('gbk', $_[0]) }
- sub utf8 { encode('utf8', $_[0]) }
- sub u2gbk { encode('gbk', decode('utf8', $_[0])) }
- sub uni { decode('utf8', $_[0]) }
复制代码
作者: slimay 时间: 2021-10-10 16:58
B站视频怎么搞.
作者: 523066680 时间: 2021-10-10 17:41
本帖最后由 523066680 于 2021-10-10 17:42 编辑
回复 2# slimay
B站没试过,之前用手机保存本地(格式好像要手动改一下,音频也是独立的),从手机复制出来
油猴好像有现成脚本
https://greasyfork.org/zh-CN/scripts/413228-bilibili视频下载
作者: 老刘1号 时间: 2021-10-10 19:02
回复 2# slimay
安卓缓存目录,拿ffmpeg合并一下音视频流就可以了
作者: slimay 时间: 2021-10-10 22:24
回复 4# 老刘1号
老刘, 你多会成了音视频专业户了, 厉害啊
作者: slimay 时间: 2021-10-10 22:24
回复 3# 523066680
脚本不错
作者: 523066680 时间: 2021-10-10 23:09
回复 6# slimay
小小脚本,拿来冒泡,不足挂齿
作者: slimay 时间: 2021-10-10 23:32
回复 7# 523066680
外贸赚钱吗
作者: 523066680 时间: 2021-10-11 00:28
本帖最后由 523066680 于 2021-10-11 15:46 编辑
回复 8# slimay
外贸赚钱,我不赚钱
跑题跑远了。
众所周知P站是一个学习网站,大胆补一张P站的截图
作者: 523066680 时间: 2021-12-12 23:57 标题: 2021-11 更新 m3u8 片段下载+合并的代码 RE: [Perl]P站视频下载
本帖最后由 523066680 于 2021-12-12 23:59 编辑
- =info
- Author: 523066680
- Date: 2021-11
- P站m3u8视频片段批量下载+合并
- 11月更新后不再出现直接的视频地址,所以只能从m3u8清单下载了。
-
- 需要用到的重点模块:
- Mojolicious
- JE
- Win32::Unicode:File
-
- 需要有第三方命令工具:
- ffmpeg.exe
- =cut
-
- use utf8;
- use Encode;
- use JE;
- use Modern::Perl;
- use File::Slurp;
- use Mojo::UserAgent -signatures;
- use Win32::Unicode::File;
- use JSON qw/from_json to_json/;
- STDOUT->autoflush(1);
-
- my $JE = new JE;
- my $ua = Mojo::UserAgent->new()->with_roles('+Queued');
- my $loop = Mojo::IOLoop->singleton;
- $loop = $loop->max_accepts(3);
- $loop = $loop->max_connections(3);
- $ua->max_active(5); # 最大在线同时请求数量
-
- init_ua();
-
- my %headers;
- init_headers(\%headers);
-
- my @list = qw/
- ph61668f9c9____视频ID1
- ph61668f9c9____视频ID2
- /;
-
- grep { getVideo($_) } @list;
-
- sub getVideo
- {
- my $viewkey = shift;
- my $url = "https://cn.{P站}.com/view_video.php?viewkey=${viewkey}";
-
- my ( $title, $major, $m3u8_content ) = get_m3u8( $url );
-
- my $cache = gbk("E:/temp/ts");
- my $file = "E:/temp/${viewkey} ${title}.mp4"; #unicode
- if ( file_type( 'e' => $file ) ) {
- printf "%s: file already exists\n", $viewkey;
- return;
- }
- mkdir $cache unless -e $cache;
-
- my $buff = ""; #保存清单,用于ffmpeg合成视频
- for my $e ( grep { /^seg/ } split(/\r?\n/, $m3u8_content) )
- {
- $e=~/(seg.*\.ts)/;
- $buff .= "file ${1}\r\n";
-
- my $tsfile = $cache ."/". $1;
-
- # 考虑个别片段下载失败的情况,临时判断是10240byte.
- # 最好是用 head 请求获取文件的实际大小做判断
- #unless ( -e $tsfile and ( (-s $tsfile) > 10240 ) ) {
- $ua->get( $major . $e, closure->($tsfile) );
- #} else {
- # printf "File already exists: %s\n", $e;
- #}
- }
-
- $loop->start unless $loop->is_running;
- write_file( $cache ."/". "content.txt", $buff);
-
- if ( -e "$cache/temp.mp4" )
- {
- printf "temp.mp4 already exists ? \n";
- unlink "$cache/temp.mp4" if -e "$cache/temp.mp4";
- }
-
- system("ffmpeg -y -f concat -i \"$cache/content.txt\" -vcodec h264_nvenc -vb 2M \"$cache/temp.mp4\"");
- moveW decode('gbk', "$cache/temp.mp4"), $file or warn $!;
-
- # 基于某种原因,temp.mp4可能没有移除?下载多个视频时,前面视频的ffmpeg操作被忽略,直接进行了 moveW
- # 可能是判断问题或者上一次中断导致没有删除temp.mp4
- # if ( ! -e "$cache/temp.mp4" ) {
- # system("ffmpeg -y -f concat -i \"$cache/content.txt\" -vcodec h264_nvenc -vb 2M \"$cache/temp.mp4\"");
- # }
- # moveW decode('gbk', "$cache/temp.mp4"), $file or warn $!;
- }
-
- sub get_m3u8
- {
- my ($url) = @_;
-
- my $res = $ua->get( $url )->result;
- #print $html->body;
- my $js = $res->dom->at(".video-wrapper #player script")->all_text;
- my $id = $res->dom->at("#player")->attr("data-video-id");
- my $title = $res->dom->at("title")->text; #unicode
- $title =~s/\s+- P...hub\.com//i;
- $title =~s/[\\\/:*?"<>|]/ /g; #替换部分windows文件名不支持的字符
- printf "%s %s\n", $id, gbk($title);
-
- # 获取 m3u8 实际地址
- $JE->eval( $js );
- # media_3 对应 720P,->value 转换为Perl字符串
- my ($master_m3u8_url) = $JE->{"media_3"}->value;
- $res = $ua->get( $master_m3u8_url )->result;
- my ($major) = ($master_m3u8_url =~ /(.+\/)[^\/]+$/); # m3u8 链接主地址 末尾已经包含/
- write_file("master.m3u8", $res->body);
-
- my ($index_m3u8) = ($res->body =~ /(index.+)\s/i); # 获取索引文件
- printf "%s\n", $major . $index_m3u8; # 主地址+尾部详细地址
- $res = $ua->get( $major . $index_m3u8 )->result;
- write_file("index.m3u8", $res->body);
-
- return ($title, $major, $res->body);
- }
-
- sub closure ($file)
- {
- return
- sub ($ua, $tx) {
- printf "%s\n", $file;
- if ( $tx->result->is_success ) { # 如果失败则不写入文件
- write_file( $file, {binmode=>":raw"}, $tx->result->body );
- } else {
- die "Failed to get segment $file\n";
- }
- }
- }
-
- sub init_headers
- {
- my $ref = shift;
- %$ref =
- (
- 'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
- 'accept-encoding' => 'gzip, deflate, br',
- 'accept-language' => 'zh-CN,zh;q=0.9,zh-TW;q=0.8',
- 'upgrade-insecure-requests' => '1',
- 'user-agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
- );
- }
-
- sub init_ua
- {
- $ua = $ua->max_redirects(5);
- $ua = $ua->connect_timeout(10);
-
- # 设置代理
- # Proxy server to use for HTTPS and WebSocket requests.
- # https://docs.mojolicious.org/Mojo/UserAgent/Proxy#https
- $ua->proxy->https("http://sri:secret\@127.0.0.1:10809")->http("http://sri:secret\@127.0.0.1:10809");
- #$ua->proxy(["http", "https"], "socks://127.0.0.1:1080");
- }
-
-
- sub gbk { encode('gbk', $_[0]) }
- sub utf8 { encode('utf8', $_[0]) }
- sub u2gbk { encode('gbk', decode('utf8', $_[0])) }
- sub uni { decode('utf8', $_[0]) }
复制代码
为了避嫌,其中的关键网址 用 {P站} 代替,所以脚本并不能直接运行,需要自行替换。
但是我猜很少有人会真的下载,所以就当是刷一下存在感了。
欢迎光临 批处理之家 (http://www.bathome.net/) |
Powered by Discuz! 7.2 |