Forums

Twitter Streamer as Task / Doesn't write file

Hey guys,

I would like to run the script below (I am a noob, so don't be to hard) as a task. However, each time the script is executed as a task, the log says that everything was alright but... the script never appends anything to the testfile.csv. However, if I run the script manually, everything works just fine. Any ideas? Thanks in advance! And yes, all the files are in the same folder.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/python3.7
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import pandas as pd
import json
import auth_data

#global variable for later use
temp_list = []

#### TWITTER STREAMER CLASS ######
class CreateStreamListener():
'''
Class for the TwitterStreamer, based on MyStreamListener (see below)
'''

   def streamTweets(self, fetched_tweets_filename, track_list):
        listener = MyStreamListener(fetched_tweets_filename, 0)
        auth = OAuthHandler(auth_data.CONSUMER_KEY, auth_data.CONSUMER_SECRET)
        auth.set_access_token(auth_data.ACCESS_TOKEN, auth_data.ACCESS_TOKEN_SECRET)
        stream = Stream(auth, listener)
        stream.filter(track=track_list, is_async = False)
        stream.disconnect()

##### StreamListener Class from tweepy lib

class MyStreamListener(StreamListener):
'''
My personal Stream Listener; particularly overwriting the on_data() method
'''

    def __init__(self, fetched_data_file, n):
        self.fetched_data_file = fetched_data_file
        self.n = n

    def on_data(self, data):
        try:
           with open(self.fetched_data_file, "a", encoding="utf-8") as file:
                if (self.n < 51 and "RT @" not in data):
                    file.write(data)
                    temp_list.append(str(data))
                    self.n = self.n + 1
                    return True
                elif (self.n > 50):
                    print("Shutting down Stream!")
                    return False
        except BaseException as e:
            print("Something went wrong with on_data: %s" % str(e))
        return True

    def on_error(self, status):
        print(status)

#### Class with tools for preparation and examination of Tweets ####

class AnalyzeTweets():
'''
main class with several methods to store, prepare, and analyze tweets
'''

    def createDataFrameFromTweets(self, temp_list):
        dict_tweets = {"text": [], "timestamp": [], "lang": []}
        for tweet in temp_list:
            if("{" in tweet and "text" in tweet and "timestamp" in tweet and "lang" in tweet):
                json_tweet = json.loads(tweet)
                if(("holy" in json_tweet["text"] or "Holy" in json_tweet["text"] or "holiness" in json_tweet["text"] or     "Holiness" in json_tweet["text"] or "heilig" in json_tweet["text"] or "Heilig" in json_tweet["text"])):
                dict_tweets["text"].append(json_tweet["text"].encode("utf-8"))
                dict_tweets["timestamp"].append(json_tweet["timestamp_ms"])
                dict_tweets["lang"].append(json_tweet["lang"])
        df = pd.DataFrame(dict_tweets)
        df.to_csv("testfile.csv", sep=';', mode="a", encoding='utf-8')

#### Main Algorithm

if __name__=="__main__":
    tracks_list = ["holy", "holiness", "heilig"]
    file_name = "tweet_archiv.txt"
    newListener = CreateStreamListener()
    newListener.streamTweets(file_name, tracks_list)
    time.sleep(5)
    analyzor = AnalyzeTweets()
    analyzor.createDataFrameFromTweets(temp_list)

I suspect it's writing the file, just not to the location that you expect. Check out this help page, in particular the section about half-way down with the title "Make sure you take account of the working directory".

That's exactly what was going on. Thanks for the help!

Excellent, thanks for confirming!