Python – Scrapy with Selenium

Create Folder coinmarketcap & cd coinmarketcap

install Scrapy

pip install scrapy

Create Project

scrapy startproject coinmarket

cd coinmarket

Install Selenium

pip install selenium

Download Driver & Put in the Project Folder

Chrome :
https://sites.google.com/a/chromium.org/chromedriver/downloads

Generate Spiders

scrapy genspider coins coinmarketcap.com
Example 1 :

import scrapy
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which


class CoinSpiderSelenium(scrapy.Spider):
    name = 'coin_selenium'
    allowed_domains = ['www.livecoin.net/en'] 
    start_urls = [
        'https://www.livecoin.net/en'
    ]

    def __init__(self):
        chrome_options = Options()
        chrome_options.add_argument("--headless")

        chrome_path = which("chromedriver")

        driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
        driver.set_window_size(1920, 1080)
        # driver.maximize_window()
        driver.get("https://www.livecoin.net/en")

        rur_tab = driver.find_elements_by_class_name("filterPanelItem___2z5Gb")
        rur_tab[4].click()

        self.html = driver.page_source
        driver.close()

    def parse(self, response):
        resp = Selector(text=self.html)
        for currency in resp.xpath("//div[contains(@class, 'ReactVirtualized__Table__row tableRow___3EtiS ')]"):
            yield {
                'currency pair': currency.xpath(".//div[1]/div/text()").get(),
                'volume(24h)': currency.xpath(".//div[2]/span/text()").get()
            }
Example 2 (pip install scrapy-selenium) :

import scrapy
from scrapy.selector import Selector
from scrapy_selenium import SeleniumRequest
from selenium.webdriver.common.keys import Keys


class ExampleSpider(scrapy.Spider):
    name = 'example'

    def start_requests(self):
        yield SeleniumRequest(
            url='https://duckduckgo.com',
            wait_time=3,
            screenshot=True,
            callback=self.parse
        )

    def parse(self, response):
        
        driver = response.meta['driver']
        search_input = driver.find_element_by_xpath("(//input[contains(@class, 'js-search-input')])[1]")
        search_input.send_keys('Hello World')

        search_input.send_keys(Keys.ENTER)

        html = driver.page_source
        response_obj = Selector(text=html)

        links = response_obj.xpath("//div[@class='result__extras__url']/a")
        for link in links:
            yield {
                'URL': link.xpath(".//@href").get()
            }

How to Run

scrapy crawl coins

Leave a Reply

Your email address will not be published. Required fields are marked *