标题: [原创教程] Mojo::UserAgent 批量获取 nes 游戏资源 [打印本页]
作者: 523066680 时间: 2019-1-15 14:40 标题: Mojo::UserAgent 批量获取 nes 游戏资源
本帖最后由 523066680 于 2019-1-15 14:46 编辑
获取midi(背景音乐)、nes、图片、作弊码、以及PDF(通常是手册)
运行环境:Strawberry Perl v5.26
路径设置:our $wdir = "F:/temp/nesgames";- =info
- Mojo::UserAgent 批量获取 nes 游戏资源
- 523066680/vicyang
- 2018-12
- =cut
-
- use File::Slurp;
- use File::Path;
- use File::Basename;
- use Mojo::UserAgent;
- use Mojo::DOM;
- use Try::Tiny;
- STDOUT->autoflush(1);
-
- our $wdir = "F:/temp/nesgames";
- our $main = "http://www.nesfiles.com";
- our $games = "http://www.nesfiles.com/Games";
-
- mkpath $wdir unless -e $wdir;
- chdir $wdir;
-
- our $ua = Mojo::UserAgent->new();
- our @headers = ( "User-Agent" => "Firefox/63.0" );
-
- get_games_list($games);
-
- sub get_games_list
- {
- our ($ua, $main, @heaee);
- my ($link) = @_;
- my $res = $ua->get( $link, \@headers )->res;
- my $dom = $res->dom;
-
- for my $e ( $dom->find(".nesfilesTable a")->each )
- {
- #printf "%s %s\n", $e->attr("href"), $e->text;
- get_files( $main .$e->attr("href"), $e->text );
- }
- }
-
- sub get_files
- {
- our ($main, $ua, @headers);
- my ($link, $name) = @_;
- my $title = basename($link);
- my ($res, $dom);
-
- my $fname = "${title}.html";
- if ( -e $fname ) {
- my $html = read_file($fname);
- $dom = Mojo::DOM->new( $html );
- } else {
- $res = $ua->get( $link, \@headers )->res;
- $dom = $res->dom;
- write_file( $fname, {binmode=>":raw"}, $res->body );
- }
-
- # 获取资源明细,略过 Ebay 相关的条目
- mkdir $title unless -e $title;
- my ($head, $list, $res2);
- for my $section ($dom->find(".GameSection")->each)
- {
- $head = $section->at("header")->text;
- last if $head=~/Ebay$/i;
-
- # 秘籍/代码
- if ($head=~/Codes/i) {
- write_file( $title ."/Codes_Cheats.txt", $section->all_text );
- next;
- }
-
- if ($head=~/Screenshots/i) {
- # 如果是屏幕截图
- $list = $section->find("img")->map(attr=>"src");
- } else {
- # 其他情况获取 href
- $list = $section->find("a")->map(attr=>"href");
- }
-
- printf "%s\n", $head;
- for my $href ( $list->each )
- {
- printf "%s\n", $href;
- $fname = $title ."/". basename($href);
-
- next if -e $fname; # 跳过已经存在的文件
- $res2 = try_to_get( "${main}$href" );
- next unless defined $res2; # 如果获取失败
-
- write_file( $fname, {binmode=>":raw"}, $res2->body);
- }
- }
- }
-
- sub try_to_get
- {
- our ($ua, @headers);
- my ($link) = @_;
- my $res;
- my $times = 0;
-
- while (1)
- {
- try { $res = $ua->get( $link )->result; }
- catch { printf "Error %s, retry: %d\n", $_, $times; };
- $times++;
- last if (defined $res and $res->is_success);
- return undef if ( $times > 5 );
- }
- return $res;
- }
复制代码
欢迎光临 批处理之家 (http://www.bathome.net/) |
Powered by Discuz! 7.2 |