download author.pl
Language: Perl
LOC: 46
Project Info
test-akihito-project - This is Test Project(test-akihito-project)
Server: Google
Type: svn
...ito‑project\trunk\perl\bot\
   author.pl
   flickr.pl
   ikea-xpath.pl
   ikea.pl
   japanese_kotowaza.pl
   nirayama.pl
   nitori.pl
   webscpaper_filter.pl

#!/usr/bin/perl

use Web::Scraper;
use LWP::UserAgent;
use YAML;
use YAML::Dumper;

my $dumper = YAML::Dumper->new;

my $url = 'http://ja.wikiquote.org/wiki/Category:%E4%BD%9C%E5%AE%B6';
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
#$ua->proxy(['http', 'ftp'], 'http://proxy:8000/');

my $response = $ua->get($url);

my $content;
if ($response->is_success) {
    $content = $response->content;
}

my $scraper_wiki = scraper {
   process 'title','title[]' => 'TEXT'; 
   process 'div#mw-pages li','terms[]' => scraper {
            process 'li>a','name' => 'TEXT';
            process 'li>a','link' => '@href';
        };
   result qw/title terms/
}->scrape($content);


my $data;
for( @{$scraper_wiki->{terms}} ){
    my $phrase;
    my $url = 'http://ja.wikiquote.org'.$_->{link};
    my $response = $ua->get($url);
    if($response->is_success){
        $content = $response->content;
        my $scraper_description = scraper {
           process 'title','title[]' => 'TEXT'; 
           process 'div#bodyContent>ul>li','phrase[]' => 'TEXT';
           result qw/title phrase/
        }->scrape($content);

        $phrase = $scraper_description->{phrase} 
          if($scraper_description->{phrase}); 
    }
    push @$data,{
                name           => $_->{name},
                link           => $url || '',
                phrase         => $phrase || '',
    };
    sleep(3);
}         

my $yaml = $dumper->dump($data);
print "$yaml\n";


__END__



About Koders | Resources | Downloads | Support | Black Duck | Submit Project | Terms of Service | DMCA | Privacy Policy | Site Map| Contact Us