barchart
This commit is contained in:
parent
077dcc9f23
commit
1dfb6672fa
|
|
@ -14,9 +14,9 @@ def read_json(filename):
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main(arguments=list(sys.argv[1:])):
|
||||||
participants = []
|
participants = []
|
||||||
file_list = list(sys.argv[1:])
|
file_list = arguments
|
||||||
|
|
||||||
chat_data = read_json(file_list[0])
|
chat_data = read_json(file_list[0])
|
||||||
|
|
||||||
|
|
@ -36,15 +36,19 @@ def main():
|
||||||
|
|
||||||
WORDS_IN_CHAT = count_all_words(participants)
|
WORDS_IN_CHAT = count_all_words(participants)
|
||||||
for p in participants:
|
for p in participants:
|
||||||
|
print(p.name)
|
||||||
p.count_words()
|
p.count_words()
|
||||||
|
|
||||||
print("The participants of this chat:")
|
print("The participants of this chat:")
|
||||||
for p in participants:
|
for p in participants:
|
||||||
print(f"{p.name}\n")
|
print(f"{p.name}\n")
|
||||||
|
|
||||||
|
# print(participants[0].message_count)
|
||||||
|
|
||||||
analyzing.make_wordcloud(WORDS_IN_CHAT)
|
analyzing.make_wordcloud(WORDS_IN_CHAT)
|
||||||
# analyzing.make_timeline(participants)
|
analyzing.make_timeline(participants)
|
||||||
# make_final.assemble_image(participants)
|
analyzing.make_barchart(participants, WORDS_IN_CHAT)
|
||||||
|
make_final.assemble_image(participants)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,11 @@ def incidents_of_words(words):
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def set_sender(s, p):
|
||||||
|
s = p.name
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
def create_dataframe(participants):
|
def create_dataframe(participants):
|
||||||
skeleton = {"month": [],
|
skeleton = {"month": [],
|
||||||
"counts": [],
|
"counts": [],
|
||||||
|
|
@ -90,3 +95,51 @@ def make_timeline(participants):
|
||||||
})
|
})
|
||||||
|
|
||||||
fig.write_image(f"{RESOURCE_LOCATION}/timeline.png",format="png", width=1500, height=600, scale=3)
|
fig.write_image(f"{RESOURCE_LOCATION}/timeline.png",format="png", width=1500, height=600, scale=3)
|
||||||
|
|
||||||
|
|
||||||
|
def make_barchart(participants, WORDS_IN_CHAT):
|
||||||
|
words = {}
|
||||||
|
df = pd.DataFrame({"sender": [], "message": [], "counts": []})
|
||||||
|
|
||||||
|
for key in list(clean_text(WORDS_IN_CHAT, HUNGARIAN_STOPWORDS).keys())[:50]:
|
||||||
|
# print(key, WORDS_IN_CHAT[key])
|
||||||
|
words[key] = WORDS_IN_CHAT[key]
|
||||||
|
|
||||||
|
for p in participants:
|
||||||
|
df_c = pd.DataFrame({"message": [], "counts": [], "sender": []})
|
||||||
|
df_p = p.words.loc[p.words['message'].isin(list(words.keys()))]
|
||||||
|
|
||||||
|
words_count = df_p['message'].value_counts().sort_index()
|
||||||
|
df_c = words_count.rename_axis('message').reset_index(name='counts')
|
||||||
|
# print(df_c)
|
||||||
|
df_c['sender'] = p.name#df_c.apply(lambda row : set_sender(row['sender'], p), axis = 1)
|
||||||
|
|
||||||
|
df = pd.concat([df, df_c], ignore_index=True)
|
||||||
|
# print(df_c)
|
||||||
|
# sender=[]
|
||||||
|
# sender += [p.name]*df.count()
|
||||||
|
# df["sender"] = sender
|
||||||
|
|
||||||
|
fig = px.bar(df, x="counts", y="message", color="sender", orientation='h', height=5000)
|
||||||
|
fig.update_layout({ "showlegend": False,
|
||||||
|
"bargap": 0.5,
|
||||||
|
"title": {"text": "MOST USED WORDS",
|
||||||
|
"x": 0.5,
|
||||||
|
"font": {"color": "white"}},
|
||||||
|
"xaxis": {"showgrid": False,
|
||||||
|
"title": "",
|
||||||
|
"nticks": 5,
|
||||||
|
"color": "white"},
|
||||||
|
"yaxis": {"color": "white",
|
||||||
|
"nticks": 50,
|
||||||
|
"categoryorder": "sum ascending",
|
||||||
|
"title": {"text": ""},
|
||||||
|
"color": "white"},
|
||||||
|
"paper_bgcolor": 'rgba(255,255,255,0)',
|
||||||
|
"plot_bgcolor": 'rgba(255,255,255,0)'
|
||||||
|
})
|
||||||
|
|
||||||
|
fig.write_image(f"{RESOURCE_LOCATION}/barchart.png",format="png", width=500, height=1200, scale=3)
|
||||||
|
|
||||||
|
# df = pd.DataFrame({"message": list(words.keys())})
|
||||||
|
# print(df)
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ def generate_images_from_text(x, y, txt):
|
||||||
return img
|
return img
|
||||||
|
|
||||||
|
|
||||||
def assemble_image(participants, wordcloudimg="wordcloud.png", timelineimg="timeline.png"):
|
def assemble_image(participants, wordcloudimg="wordcloud.png", timelineimg="timeline.png", barchartimg="barchart.png"):
|
||||||
if participants[0].chat_type == "Regular":
|
if participants[0].chat_type == "Regular":
|
||||||
names = []
|
names = []
|
||||||
for p in participants:
|
for p in participants:
|
||||||
|
|
@ -42,21 +42,24 @@ def assemble_image(participants, wordcloudimg="wordcloud.png", timelineimg="time
|
||||||
|
|
||||||
number_of_messages = 0
|
number_of_messages = 0
|
||||||
for p in participants:
|
for p in participants:
|
||||||
number_of_messages += len(p.messages)
|
number_of_messages += p.message_count
|
||||||
|
|
||||||
n_o_m = generate_images_from_text(750, 350, str(number_of_messages))
|
n_o_m = generate_images_from_text(750, 350, str(number_of_messages))
|
||||||
# n_o_m.save(f'{RESOURCE_LOCATION}/number_of_messages.png')
|
# n_o_m.save(f'{RESOURCE_LOCATION}/number_of_messages.png')
|
||||||
n_o_m_text = generate_images_from_text(750, 400, "TOTAL NUMBER OF MESSAGES")
|
n_o_m_text = generate_images_from_text(750, 400, "TOTAL NUMBER OF MESSAGES")
|
||||||
# n_o_m_text.save(f'{RESOURCE_LOCATION}/number_of_messages_text.png')
|
# n_o_m_text.save(f'{RESOURCE_LOCATION}/number_of_messages_text.png')
|
||||||
|
|
||||||
final = Image.new('RGB', (5000, 8000), (0, 0, 0))
|
final = Image.new('RGBA', (5000, 8000), (35, 35, 35, 255))
|
||||||
final.paste(names, (0, 0))
|
final.paste(names, (0, 0),mask=names)
|
||||||
final.paste(n_o_m_text, (3250, 4400))
|
final.paste(n_o_m_text, (3250, 4400), mask=n_o_m_text)
|
||||||
final.paste(n_o_m, (3250, 4750))
|
final.paste(n_o_m, (3250, 4750), mask=n_o_m)
|
||||||
wordcloud = Image.open(f"{RESOURCE_LOCATION}/{wordcloudimg}")
|
wordcloud = Image.open(f"{RESOURCE_LOCATION}/{wordcloudimg}")
|
||||||
final.paste(wordcloud, (1500, 600))
|
# alpha = wordcloud.convert('RGBA').split()[-1]
|
||||||
|
final.paste(wordcloud, (1500, 600), mask=wordcloud)
|
||||||
timeline = Image.open(f"{RESOURCE_LOCATION}/{timelineimg}")
|
timeline = Image.open(f"{RESOURCE_LOCATION}/{timelineimg}")
|
||||||
final.paste(timeline, (250, 2600))
|
final.paste(timeline, (250, 2600), mask=timeline)
|
||||||
|
barchart = Image.open(f"{RESOURCE_LOCATION}/{barchartimg}")
|
||||||
|
final.paste(barchart, (1750, 4400), mask=barchart)
|
||||||
|
|
||||||
|
|
||||||
final.save(f"{RESOURCE_LOCATION}/final.png")
|
final.save(f"{RESOURCE_LOCATION}/final.png")
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,12 @@ def count_all_words(participants):
|
||||||
words_count = words['message'].value_counts().sort_index()
|
words_count = words['message'].value_counts().sort_index()
|
||||||
df_out = words_count.rename_axis('message').reset_index(name='counts')
|
df_out = words_count.rename_axis('message').reset_index(name='counts')
|
||||||
df_out['message'] = df_out.apply(lambda row : to_uppercase(row['message']), axis = 1)
|
df_out['message'] = df_out.apply(lambda row : to_uppercase(row['message']), axis = 1)
|
||||||
return df_out.sort_values(by="counts", ascending=True).set_index("message").to_dict()["counts"]
|
# print(df_out.sort_values(by="counts", ascending=True).set_index("message"))
|
||||||
|
d = df_out.sort_values(by="counts", ascending=True).set_index("message").to_dict()["counts"]
|
||||||
|
marklist = sorted(d.items(), key=lambda x:x[1], reverse=True)
|
||||||
|
sortdict = dict(marklist)
|
||||||
|
# print(sortdict)
|
||||||
|
return sortdict
|
||||||
|
|
||||||
class Participant:
|
class Participant:
|
||||||
def __init__(self, name, title, chat_type):
|
def __init__(self, name, title, chat_type):
|
||||||
|
|
@ -29,14 +34,17 @@ class Participant:
|
||||||
self.chat_type = chat_type
|
self.chat_type = chat_type
|
||||||
self.messages_df = pd.DataFrame({"timestamp": [], "message": []})
|
self.messages_df = pd.DataFrame({"timestamp": [], "message": []})
|
||||||
self.words = pd.DataFrame()
|
self.words = pd.DataFrame()
|
||||||
|
self.message_count = 0
|
||||||
|
|
||||||
def add_message(self, timestamp, message):
|
def add_message(self, timestamp, message):
|
||||||
self.messages_df = self.messages_df.append(dict(zip(self.messages_df.columns,[str(datetime.fromtimestamp(timestamp/1000)), ftfy.ftfy(message)])), ignore_index=True)
|
self.messages_df = self.messages_df.append(dict(zip(self.messages_df.columns,[str(datetime.fromtimestamp(timestamp/1000)), ftfy.ftfy(message)])), ignore_index=True)
|
||||||
|
|
||||||
def count_words(self):
|
def count_words(self):
|
||||||
words = self.messages_df.set_index(['timestamp']).apply(lambda x: x.str.split(' ').explode()).reset_index()
|
words = self.messages_df.set_index(['timestamp']).apply(lambda x: x.str.split(' ').explode()).reset_index()
|
||||||
words_count = words['message'].value_counts().sort_index()
|
# words_count = words['message'].value_counts().sort_index()
|
||||||
self.words = words_count.rename_axis('message').reset_index(name='counts')
|
words['message'] = words.apply(lambda row : to_uppercase(row['message']), axis = 1)
|
||||||
|
self.words = words.rename_axis('message')#.reset_index(name='counts')
|
||||||
|
self.message_count = self.messages_df.count()[0]
|
||||||
|
|
||||||
def get_words(self, longer_than=0):
|
def get_words(self, longer_than=0):
|
||||||
words = []
|
words = []
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue