Python: How to scrape allegro.pl with scrapy
It is example code to scrape it:
# date: 2017.12.10
# https://stackoverflow.com/a/47744135/1832058
import scrapy
#from allegro.items import AllegroItem
#class AllegroItem(scrapy.Item):
# product_name = scrapy.Field()
# product_sale_price = scrapy.Field()
# product_seller = scrapy.Field()
class AllegroPrices(scrapy.Spider):
name = "AllegroPrices"
allowed_domains = ["allegro.pl"]
start_urls = [
"http://allegro.pl/diablo-ii-lord-of-destruction-2-pc-big-box-eng-i6896736152.html",
"http://allegro.pl/diablo-ii-2-pc-dvd-box-eng-i6961686788.html",
"http://allegro.pl/star-wars-empire-at-war-2006-dvd-box-i6995651106.html",
"http://allegro.pl/heavy-gear-ii-2-pc-eng-cdkingpl-i7059163114.html"
]
def parse(self, response):
title = response.xpath('//h1[@class="title"]//text()').extract()
sale_price = response.xpath('//div[@class="price"]//text()').extract()
seller = response.xpath('//div[@class="btn btn-default btn-user"]/a/span/text()').extract()
title = title[0].strip()
print(title, sale_price, seller)
yield {'title': title, 'price': sale_price, 'seller': seller}
#items = AllegroItem()
#items['product_name'] = ''.join(title).strip()
#items['product_sale_price'] = ''.join(sale_price).strip()
#items['product_seller'] = ''.join(seller).strip()
#yield items
# --- run it as standalone script without project and save in CSV ---
from scrapy.crawler import CrawlerProcess
#c = CrawlerProcess()
c = CrawlerProcess({
# 'USER_AGENT': 'Mozilla/5.0',
'FEED_FORMAT': 'csv',
'FEED_URI': 'output.csv'
})
c.crawl(AllegroPrices)
c.start()
If you like it
Buy a Coffee
