查看文章
 
perl 百度MP3_歌曲TOP500 自动下载
2008-03-29 12:02

偶然看到ubuntu论坛上有人发百度新歌100的下载程序,但只看到python的,看来还是python比perl要阳春白雪啊。索性写个perl的吧。

tips:改成多线程的了,默认为5,只下载mp3(可改),只下载文件大于3M的(可改),wget太慢换成mytget,以后再想到什么再改吧:)

原始地址:http://hi.baidu.com/ximix/blog/item/7dd00c24d4513e37c995593a.html

—————以下代码—————

#!/usr/bin/perl
#Author: yisudong
#Date: Sat Mar 29 12:10:57 CST 2008
#Contact: yisudong at gmail dot com or http://hi.baidu.com/ximix

use strict;
use Thread;
use HTTP::Request::Common;
use HTTP::Status qw(is_client_error is_server_error is_redirect);
use Data::Dumper;
require LWP::UserAgent;

my $down_PATH = '/usr/tmp/mp3';
#my $url = "http://list.mp3.baidu.com/list/newhits.html?top1";
my $url = "http://list.mp3.baidu.com/topso/mp3topsong.html?top2";
my $MAX_THREADS = 5;

my $ua = new LWP::UserAgent;
$ua->agent('yisuD_Robot');

my $request = HTTP::Request->new(GET => "$url");
my $response = $ua->simple_request( $request );
my $str = $response->content;
#print "$str";

my @aa = $str =~ m/class=\"border\"\>(\d+)\.\<\/td\>[\s\S]+?href\=\"(.*)\"\starget\=_blank\>/mg;
my %bb = @aa;


foreach my $n (sort { $a <=> $b } keys %bb)
{
        print "$n.$bb{$n}\n";
        my $url2 = $bb{$n};
        my $request = HTTP::Request->new(GET => "$url2");
        my $response = $ua->simple_request( $request );
        my $str = $response->content;

        my @cc = $str =~ m/\<td\sclass\=tdn\>\d+\<\/td\>[\s\S]+?href\=\"(.*)\"\s+title[\s\S]+?\<td\>(\d+\.\d)\sM\<\/td\>/mg;
        my %t ;
        foreach my $n(0..$#cc)
        {
                next if($n % 2 == 0);
                push @{$t{$cc[$n]}},$n-1;
        }

#       print Dumper(\%t);

        my @t_arr = Thread->list();
        my $t_num = @t_arr;
        print "[$t_num]\n";
        if($t_num < $MAX_THREADS)
                {
                        Thread->new(\&down,\%t,\@cc,$n);
                }
        sleep(5);

}


sub down
{
my ($tt_p,$ccc_p,$n) = @_;
my %tt = %$tt_p;
my @ccc = @$ccc_p;

L:{
        foreach my $nn (reverse sort keys %tt)
        {
                next if($nn < 3);
                foreach my $nnn (@{$tt{$nn}})
                {
                        my $url3 = $ccc[$nnn];
                        my $request = HTTP::Request->new(GET => "$url3");
                        my $response = $ua->simple_request( $request );
                        my $str = $response->content;

                        my ($song_url) = $str =~ m/\<a\shref\=\"(.*)\"\>/m;
                        next if($song_url !~ m/\.mp3/i);#just wanna mp3
                        print "$n.$nn.$nnn.$song_url\n";

#                       my $wget = "wget -T 300 -t 3 -q $song_url -O $down_PATH/$n.mp3";#wget单线程太慢了……
                        my $wget = "mytget -n 10 -c 3 -d $down_PATH -f $n.mp3 '$song_url'";
                        print "\t$wget\n";
                        if(system($wget) == 0 and (-s "$down_PATH/$n.mp3") >3000000 )
                        {
                                last L;
                        }
                        else
                        {
                                system("rm -rf $down_PATH/$n.mp3");
                        }

                }
        }
}

my $retval= Thread->self->eval();
if ($@) { warn "thread failed: $@"; } else { print "thread returned $retval\n"; }

}

exit;


类别:Perl||添加到搜藏 |分享到i贴吧|浏览(915)|评论 (0)
 
最近读者:
 
网友评论:
发表评论:
姓 名:
网址或邮箱: (选填)
内 容:
     

   
帮助中心 | 空间客服 | 投诉中心 | 空间协议
©2012 Baidu