Hi,
I'm trying to run a scrapper that takes data from ESPN's NBA page. I am unsure whether I need to configure the proxy or if the ESPN NBA is not whitelisted as a website. I specifically use read_html from pandas because it makes extremely easy to output the table. The ESPN website I'm using for this script is this
Traceback (most recent call last):
File "/usr/local/lib/python3.10/urllib/request.py", line 1348, in do_open h.request(req.get_method(), req.selector, req.data, headers,
File "/usr/local/lib/python3.10/http/client.py", line 1282, in request self._send_request(method, url, body, headers, encode_chunked)
File "/usr/local/lib/python3.10/http/client.py", line 1328, in _send_request self.endheaders(body, encode_chunked=encode_chunked)
File "/usr/local/lib/python3.10/http/client.py", line 1277, in endheaders self._send_output(message_body, encode_chunked=encode_chunked)
File "/usr/local/lib/python3.10/http/client.py", line 1037, in _send_output self.send(msg)
File "/usr/local/lib/python3.10/http/client.py", line 975, in send self.connect()
File "/usr/local/lib/python3.10/http/client.py", line 1447, in connect super().connect()
File "/usr/local/lib/python3.10/http/client.py", line 951, in connect self._tunnel()
File "/usr/local/lib/python3.10/http/client.py", line 924, in _tunnel raise OSError(f"Tunnel connection failed: {code} {message.strip()}")
OSError: Tunnel connection failed: 403 Forbidden
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "mypath/myproject/myscript.py", line 129, in <module> table_roster_init = pd.read_html(roster_url)[0]
File "mypath/.virtualenvs/myvirtualenv/lib/python3.10/site-packages/pandas/util/_decorators.py", line 331, in wrapper return func(args, *kwargs)
File "mypath/.virtualenvs/myvirtualenv/lib/python3.10/site-packages/pandas/io/html.py", line 1205, in read_html return _parse(
File "mypath/.virtualenvs/myvirtualenv/lib/python3.10/site-packages/pandas/io/html.py", line 986, in _parse tables = p.parse_tables()
File "mypath/.virtualenvs/myvirtualenv/lib/python3.10/site-packages/pandas/io/html.py", line 262, in parse_tables tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
File "mypath/.virtualenvs/myvirtualenv/lib/python3.10/site-packages/pandas/io/html.py", line 821, in _build_doc raise e
File "mypath/.virtualenvs/myvirtualenv/lib/python3.10/site-packages/pandas/io/html.py", line 802, in _build_doc with urlopen(self.io) as f:
File "mypath/.virtualenvs/myvirtualenv/lib/python3.10/site-packages/pandas/io/common.py", line 265, in urlopen return urllib.request.urlopen(args, *kwargs)
File "/usr/local/lib/python3.10/urllib/request.py", line 216, in urlopen return opener.open(url, data, timeout)
File "/usr/local/lib/python3.10/urllib/request.py", line 519, in open response = self._open(req, data)
File "/usr/local/lib/python3.10/urllib/request.py", line 536, in _open result = self._call_chain(self.handle_open, protocol, protocol +
File "/usr/local/lib/python3.10/urllib/request.py", line 496, in _call_chain result = func(*args)
File "/usr/local/lib/python3.10/urllib/request.py", line 1391, in https_open return self.do_open(http.client.HTTPSConnection, req,
File "/usr/local/lib/python3.10/urllib/request.py", line 1351, in do_open raise URLError(err)
urllib.error.URLError: <urlopen error Tunnel connection failed: 403 Forbidden>