A
download japanese_kotowaza.pl
Language: Perl
LOC: 46
Project Info
test-akihito-project - This is Test Project(test-akihito-project)
Server: Google
Type: svn
...ito‑project\trunk\perl\bot\
   author.pl
   flickr.pl
   ikea-xpath.pl
   ikea.pl
   japanese_kotowaza.pl
   nirayama.pl
   nitori.pl
   webscpaper_filter.pl

#!/usr/bin/perl

use Web::Scraper;
use LWP::UserAgent;
use YAML;
use YAML::Dumper;

my $url = 'http://ja.wikiquote.org/wiki/%E6%97%A5%E6%9C%AC%E3%81%AE%E8%AB%BA';
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->proxy(['http', 'ftp'], 'http://proxy:8099/');

my $response = $ua->get($url);

my $content;
if ($response->is_success) {
    $content = $response->content;
}


my $scraper_wiki = scraper {
   process 'title','title[]' => 'TEXT'; 
   process 'div#bodyContent li','terms[]' => scraper {
            process 'li','phrase' => 'TEXT';
            process 'li>a.extiw','link' => '@href';
        };
   result qw/title terms/
}->scrape($content);


my $data;
my $dumper = YAML::Dumper->new;
for( @{$scraper_wiki->{terms}} ){
    my $description;
    my $response = $ua->get($_->{link});
    if($response->is_success){
        $content = $response->content;
        my $scraper_description = scraper {
           process 'title','title[]' => 'TEXT'; 
           process 'ol li','description[]' => 'TEXT';
           result qw/title description/
        }->scrape($content);

        $description = $scraper_description->{description} 
          if($scraper_description->{description}); 
    }
    push @$data,{
                phrase         => $_->{phrase},
                link           => $_->{link} || '',
                description    => $description || '',
    };
    sleep(3);
}         

my $yaml = $dumper->dump($data);
print "$yaml \n";


__END__



About Koders | Resources | Downloads | Support | Black Duck | Terms of Service | DMCA | Privacy Policy | Contact Us