Hmm... confused with scraper of news : Forums : PythonAnywhere

Thank you in advance to the admins at PythonAnywhere for helping me set up a new mysql table. I've not had much chance since I was made redundant.

Anyway, I created a jupyter notebook in VSCode and want to transfer it to a table on PythonAnywhere.

My web code looks like this. So far, so boring:

@app.route("/nationals")

"""I've imported pandas and feedparser at this stage""" def scrape_nationals(): feeds = [{"type": "news","title": "BBC", "url": "http://feeds.bbci.co.uk/news/uk/rss.xml"}, {"type": "news","title": "The Economist", "url": "https://www.economist.com/international/rss.xml"}, {"type": "news","title": "The New Statesman", "url": "https://www.newstatesman.com/feed"}, {"type": "news","title": "The New York Times", "url": "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml"}, {"type": "news","title": "Metro UK","url": "https://metro.co.uk/feed/"}, {"type": "news", "title": "Evening Standard", "url": "https://www.standard.co.uk/rss.xml"}, {"type": "news","title": "Daily Mail", "url": "https://www.dailymail.co.uk/articles.rss"}, {"type": "news","title": "Sky News", "url": "https://news.sky.com/feeds/rss/home.xml"}, {"type": "news", "title": "The Mirror", "url": "https://www.mirror.co.uk/news/?service=rss"}, {"type": "news", "title": "The Sun", "url": "https://www.thesun.co.uk/news/feed/"}, {"type": "news", "title": "Sky News", "url": "https://news.sky.com/feeds/rss/home.xml"}, {"type": "news", "title": "The Guardian", "url": "https://www.theguardian.com/uk/rss"}, {"type": "news", "title": "The Independent", "url": "https://www.independent.co.uk/news/uk/rss"}, #{"type": "news", "title": "The Telegraph", "url": "https://www.telegraph.co.uk/news/rss.xml"}, {"type": "news", "title": "The Times", "url": "https://www.thetimes.co.uk/?service=rss"}] print(feeds)

data = []                               # <---- initialize empty list here
for feed in feeds:
    parsed_feed = feedparser.parse(feed['url'])
    #print("Title:", feed['title'])
    #print("Number of Articles:", len(parsed_feed.entries))
    #print("\n")
    for entry in parsed_feed.entries:

        title = entry.title
        print(title)
        url = entry.link
        #print(entry.summary)
        try:
            summary = entry.summary[:400] or "No summary available" 
        except:
            #print("no summary")
            summary = "none"
        try:
            date = pd.to_datetime(entry.published)#
            #or "No data available"     # I simplified the ternary operators here
        except:
            #print("date")
            date = pd.to_datetime("01-01-1970")
        data.append([title, url, summary, date])          # <---- append data from each entry here
#Create dataframe with scraped data
df = pd.DataFrame(data, columns = ['title', 'url', 'summary', 'date'])
#Load in existing data
articles = pd.read_sql('nationals', con = engine)
articles = articles.drop_duplicates()
#Drop duplicates and then append new articles, making sure to drop_duplicates
df = df.append(articles)
df = df.drop_duplicates()
df.to_sql('nationals', con = engine, if_exists = 'replace', index = False)

Ultimately, I want to render a summary of the data, but I can't work out why the table doesn't update. Confused and thanks in advance.

PS: Yes, if admins want to explore my gory code, please go ahead. (One day I'll pay again.)

elksie5000 | 47 posts | Feb. 25, 2023, 1:09 p.m. | permalink