| |
| |
| |
| |
| |
| use Modern::Perl; |
| use threads; |
| use threads::shared; |
| use File::Slurp; |
| use File::Path qw/make_path/; |
| use File::Basename; |
| use Mojo::UserAgent; |
| use Mojo::DOM; |
| use Try::Tiny; |
| use Time::HiRes qw/sleep time/; |
| use Term::ReadKey; |
| use IO::Handle; |
| STDOUT->autoflush(1); |
| |
| our $main = "http://www.elitebabes.com/model/katherine-a"; |
| our $workdir = "D:/Hex/w4b_models/". basename($main); |
| make_path $workdir unless ( -e $workdir ); |
| chdir $workdir; |
| mkdir "links" unless ( -e "links" ); |
| |
| our $progress :shared; |
| our $total :shared; |
| our @ths; |
| our @mission :shared; |
| our %headers = ( |
| 'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0', |
| 'Referer' => 'http://www.elitebabes.com', |
| 'Connection' => 'keep-alive', |
| ); |
| |
| say "Step1"; |
| get_models_data($main); |
| |
| say "Step2"; |
| @mission = glob "'$workdir/links/*.txt'"; |
| |
| $progress = 0; |
| $total = scalar( @mission ); |
| |
| |
| grep { push @ths, threads->create( \&thread_func, $_ ) } ( 0 .. 3 ); |
| |
| |
| while ( threads->list(threads::running) ) { sleep 0.2 }; |
| |
| |
| grep { $_->detach() } threads->list(threads::all); |
| |
| quit(); |
| |
| sub get_models_data |
| { |
| my ( $page ) = @_; |
| my ($title, $subpg, $count); |
| |
| my $ua = Mojo::UserAgent->new(); |
| my $res; |
| |
| $res = $ua->get( $page, \%headers )->result; |
| my $dom = $res->dom; |
| get_info( $page, $dom, "info.txt" ); |
| |
| $count = 0; |
| for my $e ( $dom->find("ul.gallery-a a")->each ) |
| { |
| $count++; |
| $subpg = $e->attr("href"); |
| $title = $e->attr("title"); |
| $title = basename( $subpg ) if ( $title eq "" ); |
| $title =~s/\s+$//; |
| |
| get_piclinks_of_subpage( $ua, $title, $subpg, $count ); |
| } |
| } |
| |
| sub get_info |
| { |
| my ( $page, $dom, $file ) = @_; |
| my @data = ($page); |
| my ($like, $unlike); |
| |
| return if ( -e $file ); |
| |
| for my $e ( $dom->at("ul.list-a")->find('li')->each ) { |
| push @data, $e->at('span')->text . $e->text ; |
| } |
| |
| $like = $dom->at("span#thelike")->text; |
| $unlike = $dom->at("span#thedown")->text; |
| push @data, "like: $like"; |
| push @data, "unlike: $unlike"; |
| write_file( $file, join("\n", @data) ); |
| } |
| |
| sub get_piclinks_of_subpage |
| { |
| my ($ua, $title, $subpage, $count) = @_; |
| |
| my ( $res, $dom, $href ); |
| my ($times); |
| my @links; |
| my $file = "./links/${title}.txt"; |
| |
| if ( -e $file ) |
| { |
| printf "%03d - %s file already exists\n", $count, $title; |
| return; |
| } |
| |
| $times = 0; |
| while (1) |
| { |
| try { $res = $ua->get($subpage)->result } |
| catch { printf "getting subpage, retry: %d\n", $times++; }; |
| last if ( defined $res and $res->is_success ); |
| return if ( $times > 10 ); |
| } |
| |
| $dom = $res->dom; |
| |
| for my $e ( $dom->find(".gallery-b a")->each ) { |
| push @links, $e->attr("href"); |
| } |
| |
| |
| for my $e ( $dom->find("video,.my_video*")->each ) { |
| push @links, $e->at("source")->attr("src"); |
| } |
| |
| if ( $#links < 0 ) { printf "fail to get media\n" } |
| else |
| { |
| printf "%03d - %s\n", $count, $title; |
| write_file( $file, join("\n", @links) ); |
| } |
| } |
| |
| sub thread_func |
| { |
| our (@mission, @headers, $progress, $total); |
| my $idx = shift; |
| my $time_a; |
| my $target; |
| my $subfold; |
| my @links; |
| my $ua = Mojo::UserAgent->new(); |
| $ua = $ua->max_redirects(5); |
| |
| $SIG{'BREAK'} = sub { threads->exit() }; |
| |
| while ( $#mission >= 0 ) |
| { |
| $progress++; |
| $target = shift @mission; |
| @links = read_file( $target ); |
| |
| |
| ($subfold, undef, undef) = fileparse($target, qr/\.[^.]*$/); |
| printf "[%d] [%03d/%03d] %s\n", threads->tid(), $progress, $total, $subfold; |
| mkdir $subfold unless -e $subfold; |
| |
| get_pics( threads->tid(), $ua, $subfold, \@links ); |
| } |
| } |
| |
| sub get_pics |
| { |
| our %headers; |
| my ($id, $ua, $fold, $links) = @_; |
| my $res; |
| my $filepath; |
| my $times; |
| |
| for my $e ( @$links ) |
| { |
| $e=~s/\r?\n//; |
| next if ( $e !~ /(jpg|png|bmp|gif)/i ); |
| $filepath = $fold ."/". basename($e); |
| if ( -e $filepath and ( check_jpg_file_tail( $filepath ) == 1 ) ) |
| { |
| |
| next; |
| } |
| |
| printf " [%d] %s\n", $id, $filepath; |
| $times = 0; |
| while (1) |
| { |
| try { $res = $ua->get($e, \%headers)->result; } |
| catch { printf "getting pics, retry: %d\n", $times++; }; |
| last if ( defined $res and $res->is_success ); |
| return if ( $times > 10 ); |
| } |
| |
| $res->content->asset->move_to( $filepath ); |
| } |
| } |
| |
| sub check_jpg_file_tail |
| { |
| my $file = shift; |
| my ($fh, $buff); |
| open $fh, "<:raw", $file or warn "$!"; |
| seek($fh, -2, 2); |
| read($fh, $buff, 2); |
| if ( $buff eq "\xFF\xD9" ) { return 1 } |
| else { return 0 } |
| } |
| |
| sub quit |
| { |
| print "Press Any Key to Continue ..."; |
| ReadKey -1; |
| }COPY |