Forums

Getting 429 loadbalancer after a certain portion of the DB is commited, however the DB keeps getting added to

So I am calling the Spotipy API to add a users song, album, artist, and user_info to a db. The total number of records inserted are around 3-4,000. The db will update, however after the songs have been committed the HTML page will throw a load balancing error. The db continues to update after this error but my webpage is stuck on a error screen.

The order of data inserted is:

user
songs (429 error thrown on HTML after commit)
albums
artists
userInfo (closed pipe error after commit)

When the database is fully created I then get the following error:

2021-04-23 14:30:12 Fri Apr 23 14:30:12 2021 - SIGPIPE: writing to a closed pipe/socket/fd (probably the client disconnected) on request /uploadFile (ip 10.0.0.93) !!!
2021-04-23 14:30:12 Fri Apr 23 14:30:12 2021 - uwsgi_response_writev_headers_and_body_do(): Broken pipe [core/writer.c line 306] during POST /uploadFile (10.0.0.93)

As you can see the error is thrown during uploadFile. I have tried setting the request_timeout and retries for the Spotipy API to 30 and 10 respectively.

I feel like I can figure out the Spotipy portion but am stumped on why I am receiving the 429 error

My code:

# A very simple Flask Hello World app for you to get started with...

import os
import string
import random
import json
import urllib
# from pysnap import Snapchat

from flask import Flask, render_template, request, session, make_response
from data import getBasicInfo, countArtistsDesc, countSongsDesc, getHighestPlayedDays, getPlaylist, countSongsFull, countArtists
from spotifyApiGrabber import getSong, getArtist, getBigArtist, getBigSong
from werkzeug.utils import secure_filename

from instagrapi import Client
from instagrapi.types import Location, StoryMention, StoryLocation, StoryLink, StoryHashtag
from instagrapi.story import StoryBuilder

from sqlalchemy import Table, Column, Integer, ForeignKey, String, create_engine
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func

import datetime
import math
import array

from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw

app = Flask(__name__)
app.secret_key = "SECRET_KEY"

ALLOWED_EXTENSIONS = {'json'}

shArray = []
testFile = ""
artistDict = {}
songDict = {}
albumDict = {}

def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

SQLALCHEMY_DATABASE_URI = "mysql+mysqlconnector://{username}:{password}@{hostname}/{databasename}".format(
    username="Spotify365",
    password="A$troworld85",
    hostname="Spotify365.mysql.pythonanywhere-services.com",
    databasename="Spotify365$Spotify365DB",
)
app.config["SQLALCHEMY_DATABASE_URI"] = SQLALCHEMY_DATABASE_URI
app.config["SQLALCHEMY_POOL_RECYCLE"] = 299
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False

Base = declarative_base()
engine = create_engine(SQLALCHEMY_DATABASE_URI, echo=True)
Session = sessionmaker(bind=engine)
dbSession = Session()

class User(Base):
    __tablename__ = "Users"
    id = Column(Integer, primary_key=True)
    username = Column(String(100))
    basicInfo = relationship("UserInfo")
    artistLibrary = relationship("Artist")
    songLibrary = relationship("Song")
    albumLibrary = relationship("Album")
    def __repr__(self):
        return '<User %r, %s>' % (self.id, self.username)

class Artist(Base):
    __tablename__ = "Artist_Library"
    id = Column(Integer, primary_key=True)
    artistName = Column(String(100))
    time_played = Column(String(100))
    clicks = Column(String(100))
    songLibrary = relationship("Song")
    albumLibrary = relationship("Album")
    user_id = Column(Integer, ForeignKey('Users.id'))
    artist_uri = Column(String(100))
    # def __repr__(self):
        # return '<Artist %r, %s, %r, %s>' % (self.id, self.artistName, self.time_played, self.artist_uri)


class Song(Base):
    __tablename__ = "Song_Library"
    id = Column(Integer, primary_key=True)
    trackName = Column(String(100))
    artist = Column(String(100))
    albumName = Column(String(100))
    time_played = Column(String(100))
    clicks = Column(String(100))
    user_id = Column(Integer, ForeignKey('Users.id'))
    artist_id = Column(Integer, ForeignKey('Artist_Library.id'))
    song_uri = Column(String(100))
    # def __repr__(self):
        # return '<Song %r, %s, %s, %s, %r, %r, %r, %s>' % (self.id, self.trackName, self.artist, self.time_played, self.clicks, self.user_id, self.artist_id, self.song_uri)

class Album(Base):
    __tablename__ = "Album_Library"
    id = Column(Integer, primary_key=True)
    albumName = Column(String(100))
    artist = Column(String(100))
    time_played = Column(String(100))
    clicks = Column(String(100))
    user_id = Column(Integer, ForeignKey('Users.id'))
    artist_id = Column(Integer, ForeignKey('Artist_Library.id'))
    album_uri = Column(String(100))

class UserInfo(Base):
    __tablename__ = "User_Info"
    id = Column(Integer, primary_key=True)
    spotifyUn = Column(String(100))
    spotifyDn = Column(String(100))
    spotifyCD = Column(String(100))
    numFwr = Column(Integer)
    numFol = Column(Integer)
    nP = Column(Integer)
    dL = Column(String(100))
    nOS = Column(Integer)
    tL = Column(String(100))
    user_id = Column(Integer, ForeignKey('Users.id'))


def createDB():
    Base.metadata.create_all(engine, checkfirst=True)

def addAlbum(user, alN, aN, tP, tC, uri):
    createdAlbum = Album(albumName = alN, artist = aN, time_played = tP, clicks = tC, album_uri = uri, user_id = user.id)
    try:
        dbSession.add(createdAlbum)
        dbSession.commit()
    except:
        dbSession.rollback()
        dbSession.add(createdAlbum)
        dbSession.commit()

def addSong(user, tN, aN, alN, tP, tC, uri):
    createdSong = Song(trackName = tN, artist = aN, albumName = alN, time_played = tP, clicks = tC, song_uri=uri, user_id = user.id)
    try:
        dbSession.add(createdSong)
        dbSession.commit()
    except:
        dbSession.rollback()
        dbSession.add(createdSong)
        dbSession.commit()

def addArtist(user, aN, tP, tC, uri):
    createdArtist = Artist(artistName = aN, time_played = tP, clicks = tC, artist_uri=uri, user_id = user.id)
    try:
        dbSession.add(createdArtist)
        dbSession.commit()
    except:
        dbSession.rollback()
        dbSession.add(createdArtist)
        dbSession.commit()


def addToDB():
    global shArray
    shArray.clear()

    # dbSession.rollback()

    # Create a new user and add their basic info
    un = session["username"]
    user = User(username = un)

    # Add user's listening history to the database
    try:
        dbSession.add(user)
        dbSession.commit()
    except:
        dbSession.rollback()
        dbSession.add(user)
        dbSession.commit()

    prefixed = [filename for filename in os.listdir('/home/Spotify365/mysite/users/' + str(un) + '/json') if filename.startswith("StreamingHistory")]
    for a in range(0, len(prefixed)):
        with open('/home/Spotify365/mysite/users/' + str(un) + '/json/StreamingHistory' + str(a) + ".json", encoding="utf8") as f:
            shArray.append(json.load(f))

    global songDict
    global artistDict
    global albumDict
    artistDict = {}
    songDict = {}                               # MS Played
    albumDict = {}
    for i in range(0, len(shArray)):
        for j in range(0, len(shArray[i])):  # For every song
            if shArray[i][j][
                'trackName'] == "Vintage Chanel (feat. Joey Bada$$, Kirk Knight, Meechy Darko & Zombie Juice)":
                shArray[i][j][
                    'trackName'] = "Vintage Chanel (feat. Joey Badass, Kirk Knight, Meechy Darko & Zombie Juice)"
            trackName = shArray[i][j]['trackName']
            artistName = shArray[i][j]['artistName']
            timePlayed = int(shArray[i][j]['msPlayed'])
            if shArray[i][j]['artistName'] in artistDict:
                artistDict[artistName][0] += shArray[i][j]['msPlayed']
                artistDict[artistName][1] += 1
            else:
                artistDict.update({artistName: [timePlayed, 1]})
            if trackName not in songDict:
                songDict.update({trackName: [artistName, int(timePlayed), 1]})
            else:
                songDict[trackName][1] += int(timePlayed)
                # Add 1 to the number of clicks
                songDict[trackName][2] += 1


    sortedSList = dict(sorted(songDict.items(), key=lambda item: item[1][1], reverse=True))
    finalSList = list(sortedSList.items())
    sortedAList = dict(sorted(artistDict.items(), key=lambda item: item[1], reverse=True))
    finalAList = list(sortedAList.items())

    for i in range(0, len(finalSList)):
        songName = finalSList[i][0]
        artistName = finalSList[i][1][0]
        tP = finalSList[i][1][1]
        tC = finalSList[i][1][2]
        songResponse = list(json.loads(getSong(artistName, songName)))
        albumName = str(songResponse[0])
        uri = songResponse[1]
        addSong(user, songName, artistName, albumName, tP, tC, uri)

        if albumName not in albumDict.keys():
            albumDict.update({albumName: [artistName, int(tP), int(tC), uri]})
        else:
            albumDict[albumName][1] += int(tP)
            albumDict[albumName][2] += int(tC)

    sortedAlbumList = dict(sorted(albumDict.items(), key=lambda item: item[1][1], reverse=True))
    finalAlbumList = list(sortedAlbumList.items())
    for i in range(0, len(finalAlbumList)):
        addAlbum(user, str(finalAlbumList[i][0]), str(finalAlbumList[i][1][0]), str(finalAlbumList[i][1][1]), str(finalAlbumList[i][1][2]), str(finalAlbumList[i][1][3]))

    for i in range(0, len(finalAList)):
        aN = finalAList[i][0]
        tP = finalAList[i][1][0]
        tC = finalAList[i][1][1]
        uri = getArtist(finalAList[i][0])
        addArtist(user, aN, tP, tC, uri)

    addToBI(user)

    """
    try:
        dbSession.close()
    except:
        dbSession.rollback()
        dbSession.close()
    """


def addToBI(user):
    session["username"] = str(user.username)

    bi = json.loads(getBasicInfo(user.username))

    # pNameList = []
    spotifyUser = bi["Username"]
    displayName = bi["Display Name"]
    accountCreationDate = bi["Account Creation Date"]
    following = bi["Following"]
    followers = bi["Followers"]
    playlistCount = bi["Number of playlists"]

    timeStamp = bi["Data Lifecycle"]
    totalNumberSongs = bi["Total Number of Songs Analyzed"]

    sessionUserId = dbSession.query(User.id).filter_by(username=user.username).scalar()
    totalTimeListened = dbSession.query(func.sum(Song.time_played)).filter(Song.user_id == sessionUserId).scalar()
    randomVariable = str(totalTimeListened)

    usersBI = UserInfo(spotifyUn=spotifyUser, spotifyDn=displayName, spotifyCD=accountCreationDate, numFwr=followers, numFol=following, nP=playlistCount, dL=timeStamp, nOS=totalNumberSongs, tL=randomVariable)
    try:
        dbSession.add(usersBI)
        dbSession.commit()
    except:
        dbSession.rollback()
        dbSession.add(usersBI)
        dbSession.commit()


def scrapeBasicDB():
    # dbSession.rollback()
    basicInfoDict = {}
    # Get user id of user associated with session username
    try:
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    except:
        dbSession.rollback()
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    un = dbSession.query(UserInfo.spotifyUn).filter_by(user_id=sessionUserId).scalar()
    spotifyDn = dbSession.query(UserInfo.spotifyDn).filter_by(user_id=sessionUserId).scalar()
    spotifyCD = dbSession.query(UserInfo.spotifyCD).filter_by(user_id=sessionUserId).scalar()
    numFwr = dbSession.query(UserInfo.numFwr).filter_by(user_id=sessionUserId).scalar()
    numFol = dbSession.query(UserInfo.numFol).filter_by(user_id=sessionUserId).scalar()
    nP = dbSession.query(UserInfo.nP).filter_by(user_id=sessionUserId).scalar()
    dL = dbSession.query(UserInfo.dL).filter_by(user_id=sessionUserId).scalar()
    nOS = dbSession.query(UserInfo.nOS).filter_by(user_id=sessionUserId).scalar()
    tL = dbSession.query(UserInfo.tL).filter_by(user_id=sessionUserId).scalar()
    basicInfoDict.update({"Username": un})
    basicInfoDict.update({"Display Name": spotifyDn})
    basicInfoDict.update({"Account Creation Date": spotifyCD})
    basicInfoDict.update({"Following": str(numFol)})
    basicInfoDict.update({"Followers": str(numFwr)})
    basicInfoDict.update({"Number of playlists": str(nP)})
    basicInfoDict.update({"Total Number of Songs Analyzed": str(nOS)})
    basicInfoDict.update({"Data Lifecycle": dL})
    basicInfoDict.update({"Time Listened": tL})
    basicInfoDict = json.dumps(basicInfoDict, separators=(',', ':'))
    return basicInfoDict

def scrapeSongDB():
    songDBDict = []
    try:
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    except:
        dbSession.rollback()
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    for song in dbSession.query(Song).filter_by(user_id=sessionUserId).all():
        tp = int(str(song.time_played))
        songDBDict.append([[song.trackName, song.artist], [tp, song.song_uri, int(song.clicks)]])
    return json.dumps(songDBDict)
    """
    # dbSession.rollback()
    songDBDict = {}
    try:
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    except:
        dbSession.rollback()
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    for song in dbSession.query(Song).filter_by(user_id=sessionUserId).all():
        tp = int(str(song.time_played))
        songDBDict.update({song.trackName: [tp, song.song_uri, int(song.clicks), song.artist]})
    # sortedList = dict(sorted(songDBDict.items(), key=lambda item: item[1][0], reverse=True))
    # return json.dumps(sortedList, separators=(',', ':'))
    return json.dumps(songDBDict, separators=(',', ':'))
    """

def scrapeAlbumDB():
    albumDBDict = []
    try:
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    except:
        dbSession.rollback()
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    for album in dbSession.query(Album).filter_by(user_id=sessionUserId).all():
        albumDBDict.append([[album.albumName, album.artist], [int(album.time_played), album.album_uri, album.clicks]])
    return json.dumps(albumDBDict)

def scrapeArtistDB():
    artistDBDict = {}
    try:
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    except:
        dbSession.rollback()
        sessionUserId = dbSession.query(User.id).filter_by(username=session["username"]).scalar()
    for artist in dbSession.query(Artist).filter_by(user_id=sessionUserId).all():
        artistDBDict.update({artist.artistName: [int(artist.time_played), artist.artist_uri, artist.clicks]})
    # sortedList = dict(sorted(artistDBDict.items(), key=lambda item: item[1][0], reverse=True))
    # return json.dumps(sortedList, separators=(',', ':'))
    return json.dumps(artistDBDict, separators=(',', ':'))

Finally mySpotipyApiGrabber code: (request_timeout and retries specified on Spotify object creation)

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import sys
import os
import json
from flask import Flask, request, jsonify

client_credentials_manager = SpotifyClientCredentials(client_id="3ba622095a8d40fb9b88707d8b729e34", client_secret="005f6088d6474797bd26912f60863af5")
spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager, requests_timeout=30, retries = 10)

top50Pods = [
    "The Joe Rogan Experience",
    "Crime Junkie",
    "Call Her Daddy",
    "The Daily",
    "Renegades: Born in the USA",
    "Pardon My Take",
    "Impaulsive with Logan Paul",
    "H3 Podcast"
]
def getSong(aN, sN):
    global top50Pods
    song = ""
    top50Pods = [
        "The Joe Rogan Experience",
        "Crime Junkie",
        "Call Her Daddy",
        "The Daily",
        "Renegades: Born in the USA",
        "Pardon My Take",
        "Impaulsive with Logan Paul",
        "H3 Podcast"
    ]

    songN = str(sN)
    artistN = str(aN)
    # print(artistN)
    song = spotify.search(q='artist:' + artistN + ' track:' + songN)
    if artistN in top50Pods:
        song = spotify.search(q=artistN, type="show", market="US")
        song = song["shows"]["items"][0]["images"][0]["url"]
        albumName = "N/A"
        return json.dumps([albumName, song])
    else:
        song = spotify.search(q='artist:' + artistN + ' track:' + songN)
        items = str(song["tracks"]["items"])
        if items == "[]" or str(song["tracks"]["items"][0]) == "None" or str(song["tracks"]["items"][0]["album"]["images"]) == "[]":
            song = "https://spotify365.pythonanywhere.com/static/unknown"
            albumName = "N/A"
            return json.dumps([albumName, song])
        else:
            albumName = song["tracks"]["items"][0]["album"]["name"]
            song = song["tracks"]["items"][0]["album"]["images"][0]["url"]
            # print(albumName, song)
            return json.dumps([albumName, song])


def getArtist(search):
    global top50Pods
    value = str(search)
    if value in top50Pods:
        artist = spotify.search(q=value, type="show", market="US")
        artist = artist["shows"]["items"][0]["images"][0]["url"]
        return json.dumps(artist)
    else:
        artist = spotify.search(q="artist:" + value, type="artist")
        items = str(artist["artists"]["items"])
        if items == "[]":
            artist ="https://spotify365.pythonanywhere.com/static/unknown"
        else:
            images = str(artist["artists"]["items"][0]["images"])
            if images == "[]":
                artist ="https://spotify365.pythonanywhere.com/static/unknown"
            else:
                artist = artist["artists"]["items"][0]["images"][0]["url"]
        return json.dumps(artist)

I'd guess that spotify has some sort of timeout for the requests that it makes and that your view is taking longer than that. Try reducing your view to something that you can respond with very quickly to confirm.