Articles for tag: beautifusoup

Search on blog:

Python: How to scrape bloomberg.com with requests, BS

It is example code to scrape it:

# if it get data then it works event with less headers
# but when it get title `Bloomberg - Are you a robot?`
# then it can get recaptcha which you may see when you open page in browser.
# Sometimes it needs all headers again and …

read more | czytaj więcej

Python: How to scrape bluebet.com.au with scrapy

It is example code to scrape it:

#
# https://stackoverflow.com/a/47679861/1832058
#

class BlueBet(scrapy.Spider):
    name = "BlueBet"
    start_urls = ['https://www.bluebet.com.au/api/sports/SportsMasterCategory?withLevelledMarkets=true&id=100']

    custom_settings = {
        'FEED_FORMAT': 'csv',
        'FEED_URI': 'odds.csv',
        'FEED_EXPORT_ENCODING': 'utf-8',
    }

    def parse(self, response):
        data = json.loads(response.body)

        for …

read more | czytaj więcej

Python: How to scrape booksy.com with requests

It is example code to scrape it:

#!/usr/bin/env python3 

# date: 2019.11.21
# https://stackoverflow.com/questions/58964487/beautifulsoup-scraping-other-pages-if-there-is-no-change-in-link-or-href-avail

import requests

headers = {
    'X-Api-Key': 'web-e3d812bf-d7a2-445d-ab38-55589ae6a121'
}

url = 'https://booksy.com/api/pl/2/customer_api/businesses/17101/reviews?reviews_page={}&reviews_per_page=5'

for x in range(1, 6):
    print('--- page:', x, '---')

    r …

read more | czytaj więcej

Python: How to scrape cafe.daum.net with selenium

It is example code to scrape it:

#!/usr/bin/env python3

# date: 2020.02.23
# https://stackoverflow.com/questions/60362610/python-selenium-click-a-button/

import selenium.webdriver

url = 'http://cafe.daum.net/WekiMeki'

driver = selenium.webdriver.Chrome()
#driver = selenium.webdriver.Firefox()
driver.get(url)

frame = driver.find_element_by_id('down')
driver.switch_to.frame(frame)

driver …

read more | czytaj więcej

Python: How to scrape cargurus.com with requests, BS

It is example code to scrape it:

#
# https://stackoverflow.com/a/47933667/1832058
#

from bs4 import BeautifulSoup
import requests

params = {
    'zip': '03062',
    'address': 'Nashua,+NH',
    'latitude': "42.73040008544922",
    'longitude': '-71.49479675292969',
    'distance': 50000,
    'selectedEntity': 'c24578',
    'entitySelectingHelper.selectedEntity2': 'c25202',
    'minPrice': '',
    'maxPrice': '', 
    'minMileage': '',   
    'maxMileage': '',   
    'transmission': 'ANY',
    'bodyTypeGroup': '',    
    'serviceProvider': '',  
    'page': 1,
    'filterBySourcesString': '',
    'filterFeaturedBySourcesString …

read more | czytaj więcej

Python: How to scrape ceneo.pl with scrapy

It is example code to scrape it:

#!/usr/bin/env python3

#
# https://stackoverflow.com/a/47888293/1832058
# 

import scrapy

data = '''https://www.ceneo.pl/48523541, 1362
https://www.ceneo.pl/46374217, 2457'''


class MySpider(scrapy.Spider):

    name = 'myspider'

    start_urls = ['https://www.ceneo.pl/33022301']

    def start_requests(self):
        # get data from …

read more | czytaj więcej

Python: How to scrape cgtrader.com with scrapy

It is example code to scrape it:

#!/usr/bin/env python3

import scrapy
#from scrapy.commands.view import open_in_browser
#import json

class FileDownloaderItem(scrapy.Item):
    file_urls = scrapy.Field()
    files = scrapy.Field()
    full_urls = scrapy.Field()

class MySpider(scrapy.Spider):

    name = 'myspider'

    allowed_domains = ['www.cgtrader.com']
    start_urls = ['https://www.cgtrader.com/free-3d-print-models …

read more | czytaj więcej

Python: How to scrape claytoncountyga.gov with selenium with iframe

It is example code to scrape it:

import selenium.webdriver

url = "https://www.claytoncountyga.gov/government/sheriff/inmate-search"
driver = selenium.webdriver.Firefox()
driver.get(url)

iframes = driver.find_elements_by_tag_name('iframe')
print('iframes:', iframes)

driver.switch_to.frame(iframes[0])

item = driver.find_element_by_id('name')
print('name:', item)
item.send_keys("John")

item = driver.find_element_by_name …

read more | czytaj więcej

Python: How to scrape cnbc.com with requests

It is example code to scrape it:

#
# https://stackoverflow.com/a/47744797/1832058
#

from bs4 import BeautifulSoup
import requests

html = requests.get("https://www.cnbc.com/2017/12/07/pinterest-hires-former-facebook-exec-gary-johnson-to-run-corporate-dev.html").text
soup = BeautifulSoup(html, 'html5lib')

all_paragraphs = soup.find_all('p')

for p in all_paragraphs:
    #print(p) # all HTML
    print(p …

read more | czytaj więcej

Python: How to scrape cnmv.es with requests, BS

It is example code to scrape it:

# author: https://blog.furas.pl
# date: 2020.08.04
# link: https://stackoverflow.com/questions/63246707/python-scraping-create-payload-cnmv-es-and-render-javascript/

import requests
from bs4 import BeautifulSoup

url = 'https://www.cnmv.es/portal/Consultas/BusquedaPorEntidad.aspx' # '?lang=en'
search_text = 'aaa' # 'abc'

r = requests.get(url)
#print(response.text …

read more | czytaj więcej

Python: How to scrape cnnvd.org.cn with requests, BS

It is example code to scrape it:

#!/usr/bin/env python3

#
# https://stackoverflow.com/a/47940659/1832058
#

from bs4 import BeautifulSoup
import requests

link = "http://www.cnnvd.org.cn/web/vulnerability/querylist.tag"

req = requests.get(link)
web = req.text
soup = BeautifulSoup(web, "lxml")

cve_name = []
cve_link = []

for par_ in soup …

read more | czytaj więcej

Python: How to scrape coinbase.com with requests, BS

It is example code to scrape it:

#!/usr/bin/env python3 

# date: 2019.12.02
# https://stackoverflow.com/questions/59132449/what-is-the-proper-syntax-for-find-in-bs4

import requests
from bs4 import BeautifulSoup

url = 'https://www.coinbase.com/charts'
r = requests.get(url, headers=headers)

soup = BeautifulSoup(r.text, 'html.parser')

all_tr = soup.find_all('tr')

data …

read more | czytaj więcej

Python: How to scrape coinmarketcap.com (1) with requests

It is example code to scrape it:

import requests
import datetime
import csv

start_date = '2016.01.01'
finish_date = '2017.01.01'

start_date = datetime.datetime.strptime(start_date, '%Y.%m.%d')
finish_date = datetime.datetime.strptime(finish_date, '%Y.%m.%d')

start_timestamp = int(start_date.timestamp() * 1000)
one_day = datetime.timedelta(days=1)
finish_timestamp = int(finish_date …

read more | czytaj więcej

Python: How to scrape coinmarketcap.com (2) with requests, lxml

It is example code to scrape it:

# date: 2019.05.09
# author: Bartłomiej 'furas' Burek
# https://stackoverflow.com/questions/56059703/how-can-i-make-lxml-save-two-pages-to-the-pages-so-it-can-be-read-by-the-tree

from lxml import html
import requests

data = {
    'BTC': 'id-bitcoin',
    'TRX': 'id-tron',
    # ...
    'HC': 'id-hypercash',
    'XZC': 'id-zcoin',
}

all_results = {}

for url in ('https://coinmarketcap.com/', 'https://coinmarketcap.com/2'):
    page = requests.get …

read more | czytaj więcej

Python: How to scrape coinmarketcap.com (3) with pandas

It is example code to scrape it:

# author: https://blog.furas.pl
# date: 2020.07.25
# link: https://stackoverflow.com/questions/63075215/read-html-where-required-table-needs-users-input/

import pandas as pd

all_dfs = pd.read_html('https://coinmarketcap.com/exchanges/bitfinex/')

df = all_dfs[2]

df[ df['Pair'].str.endswith('USD') ]

read more | czytaj więcej

Python: How to scrape collegiate-ac.com with scrapy

It is example code to scrape it:

#!/usr/bin/env python3

#
# https://stackoverflow.com/a/47729218/1832058
#

import scrapy

class CollegiateSpider(scrapy.Spider):

    name = 'Collegiate'

    allowed_domains = ['collegiate-ac.com']

    start_urls = ['https://collegiate-ac.com/uk-student-accommodation/']

    # Step 1 - Get the area links

    def parse(self, response):
        for url in response.xpath('//*[@id="top …

read more | czytaj więcej

Python: How to scrape comics.panini.it with scrapy

It is example code to scrape it:

#!/usr/bin/env python3

# date: 2019.08.06
# https://stackoverflow.com/questions/57366488/how-to-pass-the-single-link-in-a-nested-url-scrape

import scrapy

def clean(text):
    text = text.replace('\xa0', ' ')
    text = text.strip().split('\n')
    text = ' '.join(x.strip() for x in text)
    return text

class PaniniSpider(scrapy.Spider):

    name …

read more | czytaj więcej

Python: How to scrape corporate.dow.com with selenium

It is example code to scrape it:

#!/usr/bin/env python3 

# date: 2019.11.24
# https://stackoverflow.com/questions/59019810/python-web-scraping-ahref-link-and-articles-not-showing-up-in-source-code

import selenium.webdriver

url = 'https://corporate.dow.com/en-us/news.html'
driver = selenium.webdriver.Firefox()
driver.get(url)

all_items = driver.find_elements_by_xpath('//ul[@class="results__list"]/li')
for item in all_items …

read more | czytaj więcej

Python: How to scrape coursetalk.com with scrapy

It is example code to scrape it:

#!/usr/bin/env python3

#
# https://stackoverflow.com/a/48017689/1832058
#

import scrapy

class MySpider(scrapy.Spider):

    name = 'myspider'

    start_urls = ['https://www.coursetalk.com/subjects/data-science/courses']

    def parse(self, response):
        print('url:', response.url)

        for item in response.xpath('.//*[@class="as-table-cell"]/a/@href …

read more | czytaj więcej

Python: How to scrape craigslist.org with requests

It is example code to scrape it:

#!/usr/bin/env python3

#
# https://stackoverflow.com/a/47720827/1832058
# 

import requests
from bs4 import BeautifulSoup
import csv

filename = "output.csv"

f = open(filename, 'w', newline="", encoding='utf-8')

csvwriter = csv.writer(f)

csvwriter.writerow( ["Date", "Location", "Title", "Price"] )

offset = 0

while True:
    print …

read more | czytaj więcej

« Page: 2 / 11 »