Dear All,
I am trying to run the following code:
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
from selenium import webdriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--headless")
chrome_options.add_argument("--ignore-certificate-errors")
browser = webdriver.Chrome(options=chrome_options)
def scroll_page(driver, scroll_pause_time=1.5, max_scrolls=10):
scroll_count = 0
while scroll_count < max_scrolls:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
scroll_count += 1
driver.implicitly_wait(scroll_pause_time)
driver = webdriver.Chrome(options=chrome_options)
url = 'https://bettingtips1x2.com/'
driver.get(url)
# Scroll through the page to load all content
scroll_page(driver)
# Parse the page source with BeautifulSoup
soup = BeautifulSoup(driver.page_source, 'html.parser')
# Find the table with class "results"
table = soup.find('table', class_='results')
# Find all the rows (tr) inside the table and extract data
table_data = []
for tr in table.find_all('tr'):
row_data = [td.text for td in tr.find_all('td')]
table_data.append(row_data)
# Explicitly kill the Chrome process
driver.service.stop()
# Create a DataFrame from the scraped data
df = pd.DataFrame(table_data)
# Drop rows with missing values
df = df.dropna()
# Split the '3' column into two columns based on the hyphen separator
# Split the 'Event Name' based on 'v' and expand into multiple columns
split_columns = df[3].str.split(' - ', expand=True)
split_score = df[7].str.split(':', expand=True)
# Assign the split columns to the desired column names
df['Home Team'] = split_columns[0] # The first part before 'v' becomes 'Home Team'
df['Away Team'] = split_columns[1] # The second part after 'v' becomes 'Away Team'
df['Home Score'] = split_score[0] # The first part before 'v' becomes 'Home Team'
df['Away Score'] = split_score[1] # The second part after 'v' becomes 'Away Team'
# Get the current system date
sysdate = datetime.now()
# Create the new 'Date' column with the value of "THISYEAR-SYSDATE"
df['Date'] = f"{sysdate.year}-{sysdate.month:02d}-{sysdate.day:02d}"
df["Home Score"] = df["Home Score"].astype(int)
df["Away Score"] = df["Away Score"].astype(int)
# Create the "Predicted Result" column based on the logic
def get_predicted_result(row):
if row["Home Score"] > row["Away Score"]:
return row["Home Team"]
elif row["Away Score"] > row["Home Score"]:
return row["Away Team"]
else:
return "Draw"
df["Predicted Result"] = df.apply(get_predicted_result, axis=1)
# Drop the unwanted columns
df = df.drop(columns=[3, 7, 'Home Score', 'Away Score'])
# Reorder the columns as 'Date', 'Home Team', 'Away Team', 'Predicted Result'
df = df[['Date', 'Home Team', 'Away Team', 'Predicted Result']]
bet1x2_results = df
print("Success")
It seems to be working fine on my local notebook but not on here. Please help.