I have a web-scraping script that works fine when I run it locally via Python 3.6 with Splinter. When I try to run it via pythonanywhere in a Jupyter Notebook, it throws an attribute error after the 2nd page of scraping (it's looping through a list of pages and going "back" to the list after scraping each sub-page). The error is:
AttributeError: 'ElementList' object has no attribute 'value'
It doesn't even get that far when using the regular Python interpreter (also on pythonanywhere).
Without seeing the website being scraped, I'm guessing it is loading more slowly than when run locally, which is causing it to not find the object as before.
from pyvirtualdisplay import Display
from splinter import Browser
import pandas as pd
import time
with Display():
browser = Browser()
url = "https://www.niche.com/k12/search/best-public-high-schools/"
browser.visit(url)
schpgs = browser.find_by_tag('h2') #Get all links to school pages
schpgs = list(schpgs)
counter=1
pages = int(browser.find_by_tag('option')[-1].value)
for p in range(pages):
print(counter)
if counter>1:
browser.visit(url+"?page=%d"%counter)
for s in range(len(schpgs)):
#URLs.append(url)
#Refresh DOM (important!)
schpgs = browser.find_by_tag('h2') #Get all links to school pages
schpgs[s].click()
#schpgs.click()
print(browser.find_by_id('header').value)
print(browser.find_by_tag('h1')[1].value)
try:
print(browser.find_by_id('ranking-tease').value)
except:
print('n/a')
try:
print(browser.find_by_id('about').value)
except:
print(browser.find_by_xpath('/html/body/div[1]/div/section/div[2]/div/div[2]/div/div[2]/section[4]').value)
print(browser.find_by_id('academics').value)
#May not work consistently...
if browser.find_by_tag('span')[5].value == 'Add to List':
print(browser.find_by_tag('span')[4].value)
print(browser.find_by_tag('span')[3].value)
elif browser.find_by_tag('span')[6].value == 'Add to List':
print(browser.find_by_tag('span')[5].value)
print(browser.find_by_tag('span')[4].value)
print(browser.find_by_id('students').value)
print(browser.find_by_id('teachers').value)
browser.back()
time.sleep(1)
browser.quit()
Does this sound reasonable?