Perl - 画像ダウンロード
いかにしておっぱい画像をダウンロードするか〜2012 - ゆーすけべー日記
をWindows Azure Marketplace上のapiを使って検索するように変えてみた。
追記:AnyEventで並列化したバージョンも書いてみた -> Perl - 画像ダウンロード 2 - It'll be
#!/usr/bin/env perl use strict; use warnings; use Encode qw{encode_utf8 decode_utf8}; use utf8; use LWP::UserAgent; use HTTP::Request; use Digest::MD5 qw/md5_hex/; use Path::Class qw/dir file/; use URI; use JSON qw{decode_json}; my $query = @ARGV ? encode_utf8(shift) : 'AKB48'; my $account_key = 'YOUR ACCOUNT KEY'; my $url = URI->new('https://api.datamarket.azure.com/Bing/Search/Image'); my $ua = LWP::UserAgent->new; my $dir = dir('./data'); my $page_count = 0; my $download_count = 0; while (1) { my $skip = $page_count * 50; $url->query_form( 'Query' => qq{'$query'}, '$top' => 50, # number of results '$skip' => $skip, # the offset requested # for the starting point of results '$format' => 'json', ); my $req = HTTP::Request->new(GET => $url); $req->authorization_basic('', $account_key); my $res = $ua->request($req); die $res->status_line if !$res->is_success; my $json = decode_json $res->content; last if !defined $json->{d}{results}; for my $entry (@{ $json->{d}{results} }) { my $media_url = $entry->{MediaUrl}; next unless $media_url =~ /\.jpg$/; $download_count++; my $filename = md5_hex(encode_utf8($media_url)) . '.jpg'; my $filepath = $dir->file($filename); next if -f $filepath; print encode_utf8("$download_count : download... $media_url\n"); $res = $ua->get( $media_url, ':content_file' => $filepath->stringify ); unless ( $res->content_type =~ m/^image/ ) { unlink $filepath; } } $page_count++; }