Board logo

标题: [原创代码] [Perl]批量抓取(非阻塞)跨境电商平台市场大盘数据 [打印本页]

作者: 523066680    时间: 2021-12-18 22:30     标题: [Perl]批量抓取(非阻塞)跨境电商平台市场大盘数据

本帖最后由 523066680 于 2021-12-18 22:31 编辑

环境 Win10, Strawberry Perl
Mojolicious的非阻塞请求时不限制请求量的,为了避免阻塞,额外安装模块 Mojo::UserAgent::Role::Queued,用于限制同一时间的请求数
因为非阻塞请求是给 $ua 传一个回调函数,这种情况参数是固定的($ua, $tx),为了给回调函数增加一个参数(文件名),用了闭包实现匿名函数传参
$ua->get( $url, form => $args, closure->($file) ) ;
  1. sub closure ($file)
  2. {
  3.     return
  4.     sub ($ua, $tx) {
  5.         printf "%s\n", $file;
  6.         write_file( $file, to_json( $tx->result->json, {pretty => 1, utf8 => 1} ) );
  7.     }
  8. }
复制代码
Login函数是一个黑盒,总之是登录平台用的。
  1. use Login;
复制代码
  1. =info
  2.     AliExpress 市场大盘数据
  3.     Author: 523066680/vicyang
  4.     2020-04
  5. =cut
  6. use File::Slurp;
  7. use utf8;
  8. use Encode;
  9. use Modern::Perl;
  10. use Mojo::UserAgent -signatures;
  11. use JSON qw/from_json to_json/;
  12. use List::Util qw/sum/;
  13. use Try::Tiny;
  14. use DateTime;
  15. STDOUT->autoflush(1);
  16. use FindBin;
  17. use lib encode('gbk',"D:/发货辅助/lib");
  18. use Login;
  19. my $ua = Mojo::UserAgent->new()->with_roles('+Queued');
  20. my $loop = Mojo::IOLoop->singleton;
  21. $loop   = $loop->max_accepts(5);
  22. $loop   = $loop->max_connections(5);
  23. $ua->max_active(3);
  24. $ua->request_timeout(10);
  25. Login::login_by_cookies($ua);
  26. our $lastday = DateTime->last_day_of_month(year => 2021, month => 11);
  27. #our $lastday = DateTime->last_day_of_month(year => 2021, month => 10, day => 26);
  28. our $firstday = DateTime->new(year => 2021, month => 11, day => 1);
  29. our $wdir = sprintf("./Data_%d%02d_Month", $firstday->year(), $firstday->month());
  30. mkdir $wdir unless -e $wdir;
  31. my $content = read_file( "Category.json" );
  32. my $data = from_json( $content );
  33. # 获取子类目数据
  34. recur_tree( $data->{44}{child}, 44, 1 );
  35. # 获取一级类目数据
  36. getjson( 0, 44, $data->{44}{label}, 0 );
  37. sub recur_tree
  38. {
  39.     my ( $node, $parentID, $lv ) = @_;
  40.     for my $id ( keys %$node )
  41.     {
  42.         printf "%s%s %s\n", " "x($lv*2), u2gbk($node->{$id}{label}), $id;
  43.         getjson( $parentID, $id, $node->{$id}{label}, $lv );
  44.         recur_tree( $node->{$id}{child}, $id, $lv+1 ) if exists $node->{$id}{child};
  45.     }
  46. }
  47. $loop->start unless $loop->is_running;
  48. sub getjson
  49. {
  50.     my ($parentID, $cateID, $name, $level) = @_;
  51.     my $res;
  52.     my $url;
  53.     my %args;
  54.     my $file;
  55.     my $dateRange = sprintf "%s|%s", $firstday->ymd('-'), $lastday->ymd('-');
  56.     #  数据摘要
  57.     $file = "${wdir}/${cateID}_core.json";
  58.     $url = 'https://sycm.aliexpress.com/api/market-dashboard/core-indicators';
  59.     %args = (
  60.         'dateType' => 'month',
  61.         'dateRange' => $dateRange,
  62.         'country' => 'ALL',
  63.         'platform' => 'ALL',
  64.         'cateId' => $cateID,
  65.         'cateLevel' => $level,
  66.         'parentCateId' => $parentID == 0 ? -9999 : $parentID,
  67.         #'_' => time(),
  68.     );
  69.     get_and_dump( $ua, $url, \%args, $file );
  70.     #  趋势数据
  71.     $file = "${wdir}/${cateID}_trend.json";
  72.     $url = 'https://sycm.aliexpress.com/api/market-dashboard/indicator-trend';
  73.     get_and_dump( $ua, $url, \%args, $file );
  74.     #  国家构成
  75.     $file = "${wdir}/${cateID}_country_cst.json";
  76.     $url = 'https://sycm.aliexpress.com/api/market-dashboard/country-constitute/core-indicators';
  77.     $args{'orderBy'} = "uvIndex";
  78.     $args{'indexCode'} = "uvIndex,visitedItemCnt,vstItemPercent,supplyDemandIndex,payPerBuyerAmt,itemAddCartBuyerCnt,wishlistBuyerCnt";
  79.     get_and_dump( $ua, $url, \%args, $file );
  80. }
  81. sub get_and_dump
  82. {
  83.     my ($ua, $url, $args, $file) = @_;
  84.     return if -e $file;
  85.     $ua->get( $url, form => $args, closure->($file) ) ;
  86. }
  87. sub closure ($file)
  88. {
  89.     return
  90.     sub ($ua, $tx) {
  91.         printf "%s\n", $file;
  92.         write_file( $file, to_json( $tx->result->json, {pretty => 1, utf8 => 1} ) );
  93.     }
  94. }
  95. sub gbk { encode('gbk', $_[0]) }
  96. sub utf8 { encode('utf8', $_[0]) }
  97. sub u2gbk { encode('gbk', decode('utf8', $_[0])) }
复制代码





欢迎光临 批处理之家 (http://www.bathome.net/) Powered by Discuz! 7.2