Pyhton – Web Scraping Tokopedia Flashsale with Selenium Headless

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
import time
import csv

chrome_path = which("chromedriver")

user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36"

options = webdriver.ChromeOptions()
options.headless = True
options.add_argument(f'user-agent={user_agent}')
options.add_argument("--window-size=1920,1080")
options.add_argument('--ignore-certificate-errors')
options.add_argument('--allow-running-insecure-content')
options.add_argument("--disable-extensions")
options.add_argument("--proxy-server='direct://'")
options.add_argument("--proxy-bypass-list=*")
options.add_argument("--start-maximized")
options.add_argument('--disable-gpu')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--no-sandbox')

driver = webdriver.Chrome(executable_path=chrome_path, options=options)
driver.get("https://www.tokopedia.com/discovery/kejar-diskon")

#time.sleep(3)
#driver.refresh()
#tab = driver.find_elements_by_class_name("css-1dfa2xf")
#tab[0].click()

times = 20
for x in range(times):
    time.sleep(3)
    driver.execute_script("window.scrollBy(400,600)")


product_names = driver.find_elements_by_xpath("//div[@class='css-18c4yhp']")
disc_prices = driver.find_elements_by_xpath("//div[@class='css-rhd610']")
normal_prices = driver.find_elements_by_xpath("//div[@data-testid='lblProductSlashPrice']")
img_urls = driver.find_elements_by_xpath("//img[@class='success fade']")
links = driver.find_elements_by_xpath("//a[@class='pcv3__info-content css-1qnnuob']")

count = len(product_names)

products = []

for x in range(count):
    product_name = product_names[x].text
    disc_price = disc_prices[x].text
    normal_price = normal_prices[x].text
    img_url = img_urls[x].get_attribute('src')
    link = links[x].get_attribute('href')

    data = [product_name, disc_price, normal_price, img_url, link]
    products.append(data)

#print(products)
with open('products.csv', mode='w', newline='') as csv_file:
    # Create object
    writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

    # Write
    writer.writerow(["PRODUCT NAME", "DISC PRICE", "NORMAL PRICE", "IMAGE URL", "LINK"])

    for product in products:
        writer.writerow(product)

print("Writing Done!")

Leave a Reply

Your email address will not be published. Required fields are marked *