Search on blog:

Python: How to scrape fundrazr.com

It is example code to scrape it:

#
# https://stackoverflow.com/a/47495628/1832058
#

import scrapy
import pyquery

class MySpider(scrapy.Spider):

    name = 'myspider'

    start_urls = ['https://fundrazr.com/find?category=Health']

    def parse(self, response):
        print('--- css 1 ---')
        for title in response.css('h2'):
            print('>>>', title)

        print('--- css 2 ---')
        for title in response.css('h2'):
            print('>>>', title.extract()) # without _first())
            print('>>>', title.css('a').extract_first())
            print('>>>', title.css('a ::text').extract_first())
            print('-----')

        print('--- css 3 ---')
        for title in response.css('h2 a ::text'):
            print('>>>', title.extract()) # without _first())

        print('--- pyquery 1 ---')
        p = pyquery.PyQuery(response.body)
        for title in p('h2'):
            print('>>>', title, title.text, '<<<') # `title.text` gives "\n"

        print('--- pyquery 2 ---')
        p = pyquery.PyQuery(response.body)
        for title in p('h2').text():
            print('>>>', title)
        print(p('h2').text())

        print('--- pyquery 3 ---')
        p = pyquery.PyQuery(response.body)
        for title in p('h2 a'):
            print('>>>', title, title.text)

# ---------------------------------------------------------------------

from scrapy.crawler import CrawlerProcess

process = CrawlerProcess({
    'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
})

process.crawl(MySpider)
process.start()
If you like it
Buy a Coffee