Python – Scrapy & Selenium Store data to SQLite

Flashdeal.py (spiders) :

import scrapy
from selenium import webdriver
from shutil import which
import time

class FlashdealSpider(scrapy.Spider):
    name = 'flashdeal'
    allowed_domains = ['www.tokopedia.com']
    start_urls = ['http://www.tokopedia.com/discovery/kejar-diskon']

    def parse(self, response):

        chrome_path = which("chromedriver")

        driver = webdriver.Chrome(executable_path=chrome_path)
        driver.get("https://www.tokopedia.com/discovery/kejar-diskon")

        # time.sleep(3)
        # driver.refresh()
        # tab = driver.find_elements_by_class_name("css-1dfa2xf")
        # tab[0].click()

        times = 20
        for x in range(times):
            time.sleep(3)
            driver.execute_script("window.scrollBy(400,600)")

        product_names = driver.find_elements_by_xpath("//div[@class='css-18c4yhp']")
        disc_prices = driver.find_elements_by_xpath("//div[@class='css-rhd610']")
        normal_prices = driver.find_elements_by_xpath("//div[@data-testid='lblProductSlashPrice']")
        img_urls = driver.find_elements_by_xpath("//img[@class='success fade']")
        links = driver.find_elements_by_xpath("//a[@class='pcv3__info-content css-1qnnuob']")

        count = len(product_names)

        for x in range(count):
            product_name = product_names[x].text
            disc_price = disc_prices[x].text
            normal_price = normal_prices[x].text
            img_url = img_urls[x].get_attribute('src')
            link = links[x].get_attribute('href')

            yield {
                'product_name': product_name,
                'disc_price': disc_price,
                'normal_price': normal_price,
                'image_url': img_url,
                'product_url': link
            }

Pipelines.py :

from itemadapter import ItemAdapter

import logging
import sqlite3

class TokopediaPipeline(object):
    def open_spider(self, spider):
        self.connection = sqlite3.connect("tokopedia.db")
        self.c = self.connection.cursor()
        try:
            self.c.execute('''
                CREATE TABLE flashdeal(
                    product_name TEXT,
                    disc_price TEXT,
                    normal_price TEXT,
                    image_url TEXT,
                    product_url TEXT
                )

            ''')
            self.connection.commit()
        except sqlite3.OperationalError:
            pass

    def close_spider(self, spider):
        self.connection.close()

    def process_item(self, item, spider):
        self.c.execute('''
            INSERT INTO flashdeal (product_name,disc_price,normal_price,image_url,product_url) VALUES(?,?,?,?,?)

        ''', (
            item.get('product_name'),
            item.get('disc_price'),
            item.get('normal_price'),
            item.get('image_url'),
            item.get('product_url')
        ))
        self.connection.commit()
        return item

Settings.py :

Uncomment :

ITEM_PIPELINES = {
    'tokopedia.pipelines.TokopediaPipeline': 300,
}

add :

FEED_EXPORT_ENCODING = 'utf-8'

Leave a Reply

Your email address will not be published. Required fields are marked *