#!/usr/bin/perl
use LWP::UserAgent;
use Web::Scraper;
use URI;
use YAML;
my $ua = LWP::UserAgent->new;
$ua->proxy('http', 'http://proxy.internal:8080');
$Web::Scraper::UserAgent = $ua;
my $domein = 'http://www.ikea.com';
my $url = 'http://www.ikea.com/jp/ja/catalog/news/range/';
my $scraper = scraper {
process 'title','title[]' => 'TEXT';
process 'div.productNavigation div.productItem span.prodName','products[]' =>
scraper {
process 'a','category' => 'TEXT';
process 'a','list' => sub{
my $cat_url = $domein.$_->attr_get_i('href');
print $cat_url." < \n";
scraper {
process 'div.productPadding ','data[]' =>
scraper {
process 'a','link' => '@HREF';
process 'img','image' => '@SRC';
process 'span.prodName','name' => 'TEXT';
process 'span.prodDesc','dec' => 'TEXT';
process 'span.prodPrice','price' => 'TEXT';
};
result qw/data/
}->scrape(URI->new($cat_url));
};
};
result qw/title products/
}->scrape(URI->new($url));
print YAML::Dump($scraper);
__END__