Forums

Session Expires Automatically while scraping a website and running on PythonAnyWhere Server

I am scraping a website using python selenium. My code was working absolutely file on my local server. But when running on remote server(PythonAnyWhere), my session expires automatically and I get invalid session ID exception. This is my code(I have simplified it so that It can easily be understandable):

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time
import csv
from datetime import date


today = date.today()
date_today = today.strftime("%Y-%b-%d")
file_name = 'name_of_file.csv'
my_file = open(file_name, 'w')
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
browser = webdriver.Chrome(options=chrome_options)
browser.get('website to scrape')
browser.find_element_by_class_name('userIDInput').send_keys('username')
browser.find_element_by_class_name('passwordInput').send_keys('password')
browser.find_element_by_class_name("signInButton").click()
time.sleep(10)
element = browser.find_element_by_class_name("nav-link")
browser.execute_script("arguments[0].click();", element)
var = 0
i = 0
with open('filename.csv') as csvDataFile:
    csvReader = csv.DictReader(csvDataFile)
    with my_file:
        headers = ['sku','brand','name','clrcode','color','barcode','size','present','availability','available_by']
        writer = csv.DictWriter(my_file, fieldnames=headers)
        writer.writeheader()
        brand_name = ''
        for row in csvReader:
            sku_name = row['sku']
            brand_name = row['brand']
            name = row['name']
            color_code = row['clrcode']
            colour = row['color']
            barcode = row['barcode']
            size = row['size']
            present = row['present']
            availability = row['availability']
            available_by = row['available_by']
            brand_name = brand_name.upper()
            if(var != 0):
                element = browser.find_element_by_class_name("nav-link")   //error comes on this line on second/third iteration
                browser.execute_script("arguments[0].click();", element)
            var = var+1
            try:
                element = browser.find_element_by_xpath("//*[contains(text(), '" + brand_name + "')]")
                browser.execute_script("arguments[0].click();", element)
                time.sleep(10)
                browser.find_element_by_xpath("//*[contains(text(), '" + name + "')]").click()
                time.sleep(10)
                elements = browser.find_elements_by_class_name("style-list__body.ng-star-inserted")
                for element in elements:
                    div = element.find_element_by_tag_name("div")
                    colour_code = div.find_element_by_tag_name("span").text
                    code_name, colour_name = colour_code.split(' ',1)
                    colour = colour.upper()
            except Exception as e:
                print(e)
            var = var + 1

At first loop, there is no error. But on running the loop second time, error comes on this line: element = browser.find_element_by_class_name("nav-link") Error detail:

Traceback (most recent call last):
  File "/home/user/marchon.py", line 50, in <module>
    element = browser.find_element_by_class_name("nav-link")
  File "/home/user/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 564, in find_element_by_class_name
    return self.find_element(by=By.CLASS_NAME, value=name)
  File "/home/user/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 976, in find_element
    return self.execute(Command.FIND_ELEMENT, {
  File "/home/user/.local/lib/python3.8/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)
  File "/home/user/.local/lib/python3.8/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: invalid session id
  (Driver info: chromedriver=2.42.591071 (0b695ff80972cc1a65a5cd643186d2ae582cd4ac),platform=Linux 5.4.0-1029-aws x86_64)

I have tried many solutions. But unable to fix this error. Please help.

https://stackoverflow.com/questions/56483403/selenium-common-exceptions-webdriverexception-message-invalid-session-id-using

Remove the try-except as it looks like you are hiding the errors from yourself. Prints go to the server log, not the error log.