barchart
This commit is contained in:
		
							parent
							
								
									077dcc9f23
								
							
						
					
					
						commit
						1dfb6672fa
					
				|  | @ -14,9 +14,9 @@ def read_json(filename): | ||||||
|         return json.load(f) |         return json.load(f) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def main(): | def main(arguments=list(sys.argv[1:])): | ||||||
|     participants = [] |     participants = [] | ||||||
|     file_list = list(sys.argv[1:]) |     file_list = arguments | ||||||
| 
 | 
 | ||||||
|     chat_data = read_json(file_list[0]) |     chat_data = read_json(file_list[0]) | ||||||
| 
 | 
 | ||||||
|  | @ -36,15 +36,19 @@ def main(): | ||||||
|      |      | ||||||
|     WORDS_IN_CHAT = count_all_words(participants) |     WORDS_IN_CHAT = count_all_words(participants) | ||||||
|     for p in participants: |     for p in participants: | ||||||
|  |         print(p.name) | ||||||
|         p.count_words() |         p.count_words() | ||||||
| 
 | 
 | ||||||
|     print("The participants of this chat:") |     print("The participants of this chat:") | ||||||
|     for p in participants: |     for p in participants: | ||||||
|         print(f"{p.name}\n") |         print(f"{p.name}\n") | ||||||
|  |      | ||||||
|  |     # print(participants[0].message_count) | ||||||
| 
 | 
 | ||||||
|     analyzing.make_wordcloud(WORDS_IN_CHAT) |     analyzing.make_wordcloud(WORDS_IN_CHAT) | ||||||
|     # analyzing.make_timeline(participants) |     analyzing.make_timeline(participants) | ||||||
|     # make_final.assemble_image(participants) |     analyzing.make_barchart(participants, WORDS_IN_CHAT) | ||||||
|  |     make_final.assemble_image(participants) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|  |  | ||||||
|  | @ -23,6 +23,11 @@ def incidents_of_words(words): | ||||||
|     return d |     return d | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def set_sender(s, p): | ||||||
|  |     s = p.name | ||||||
|  |     return s | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def create_dataframe(participants): | def create_dataframe(participants): | ||||||
|     skeleton = {"month": [], |     skeleton = {"month": [], | ||||||
|                 "counts": [], |                 "counts": [], | ||||||
|  | @ -90,3 +95,51 @@ def make_timeline(participants): | ||||||
|                         }) |                         }) | ||||||
| 
 | 
 | ||||||
|     fig.write_image(f"{RESOURCE_LOCATION}/timeline.png",format="png", width=1500, height=600, scale=3) |     fig.write_image(f"{RESOURCE_LOCATION}/timeline.png",format="png", width=1500, height=600, scale=3) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def make_barchart(participants, WORDS_IN_CHAT): | ||||||
|  |     words = {} | ||||||
|  |     df = pd.DataFrame({"sender": [], "message": [], "counts": []}) | ||||||
|  |      | ||||||
|  |     for key in list(clean_text(WORDS_IN_CHAT, HUNGARIAN_STOPWORDS).keys())[:50]: | ||||||
|  |         # print(key, WORDS_IN_CHAT[key]) | ||||||
|  |         words[key] = WORDS_IN_CHAT[key] | ||||||
|  |      | ||||||
|  |     for p in participants: | ||||||
|  |         df_c = pd.DataFrame({"message": [], "counts": [], "sender": []}) | ||||||
|  |         df_p = p.words.loc[p.words['message'].isin(list(words.keys()))] | ||||||
|  | 
 | ||||||
|  |         words_count = df_p['message'].value_counts().sort_index() | ||||||
|  |         df_c = words_count.rename_axis('message').reset_index(name='counts') | ||||||
|  |         # print(df_c) | ||||||
|  |         df_c['sender'] = p.name#df_c.apply(lambda row : set_sender(row['sender'], p), axis = 1) | ||||||
|  | 
 | ||||||
|  |         df = pd.concat([df, df_c], ignore_index=True) | ||||||
|  |         # print(df_c) | ||||||
|  |         # sender=[] | ||||||
|  |         # sender += [p.name]*df.count() | ||||||
|  |         # df["sender"] = sender | ||||||
|  |      | ||||||
|  |     fig = px.bar(df, x="counts", y="message", color="sender", orientation='h', height=5000) | ||||||
|  |     fig.update_layout({ "showlegend": False, | ||||||
|  |                         "bargap": 0.5, | ||||||
|  |                         "title": {"text": "MOST USED WORDS", | ||||||
|  |                                 "x": 0.5, | ||||||
|  |                                 "font": {"color": "white"}}, | ||||||
|  |                         "xaxis": {"showgrid": False, | ||||||
|  |                                     "title": "", | ||||||
|  |                                     "nticks": 5, | ||||||
|  |                                     "color": "white"}, | ||||||
|  |                         "yaxis": {"color": "white", | ||||||
|  |                                     "nticks": 50, | ||||||
|  |                                     "categoryorder": "sum ascending", | ||||||
|  |                                     "title": {"text": ""}, | ||||||
|  |                                     "color": "white"}, | ||||||
|  |                         "paper_bgcolor": 'rgba(255,255,255,0)', | ||||||
|  |                         "plot_bgcolor": 'rgba(255,255,255,0)' | ||||||
|  |                         }) | ||||||
|  |   | ||||||
|  |     fig.write_image(f"{RESOURCE_LOCATION}/barchart.png",format="png", width=500, height=1200, scale=3) | ||||||
|  |      | ||||||
|  |     # df = pd.DataFrame({"message": list(words.keys())}) | ||||||
|  |     # print(df) | ||||||
|  |  | ||||||
|  | @ -28,7 +28,7 @@ def generate_images_from_text(x, y, txt): | ||||||
|     return img |     return img | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def assemble_image(participants, wordcloudimg="wordcloud.png", timelineimg="timeline.png"): | def assemble_image(participants, wordcloudimg="wordcloud.png", timelineimg="timeline.png", barchartimg="barchart.png"): | ||||||
|     if participants[0].chat_type == "Regular": |     if participants[0].chat_type == "Regular": | ||||||
|         names = [] |         names = [] | ||||||
|         for p in participants: |         for p in participants: | ||||||
|  | @ -42,21 +42,24 @@ def assemble_image(participants, wordcloudimg="wordcloud.png", timelineimg="time | ||||||
| 
 | 
 | ||||||
|     number_of_messages = 0 |     number_of_messages = 0 | ||||||
|     for p in participants: |     for p in participants: | ||||||
|         number_of_messages += len(p.messages) |         number_of_messages += p.message_count | ||||||
|      |      | ||||||
|     n_o_m = generate_images_from_text(750, 350, str(number_of_messages)) |     n_o_m = generate_images_from_text(750, 350, str(number_of_messages)) | ||||||
|     # n_o_m.save(f'{RESOURCE_LOCATION}/number_of_messages.png') |     # n_o_m.save(f'{RESOURCE_LOCATION}/number_of_messages.png') | ||||||
|     n_o_m_text = generate_images_from_text(750, 400, "TOTAL NUMBER OF MESSAGES") |     n_o_m_text = generate_images_from_text(750, 400, "TOTAL NUMBER OF MESSAGES") | ||||||
|     # n_o_m_text.save(f'{RESOURCE_LOCATION}/number_of_messages_text.png') |     # n_o_m_text.save(f'{RESOURCE_LOCATION}/number_of_messages_text.png') | ||||||
| 
 | 
 | ||||||
|     final = Image.new('RGB', (5000, 8000), (0, 0, 0)) |     final = Image.new('RGBA', (5000, 8000), (35, 35, 35, 255)) | ||||||
|     final.paste(names, (0, 0)) |     final.paste(names, (0, 0),mask=names) | ||||||
|     final.paste(n_o_m_text, (3250, 4400)) |     final.paste(n_o_m_text, (3250, 4400), mask=n_o_m_text) | ||||||
|     final.paste(n_o_m, (3250, 4750)) |     final.paste(n_o_m, (3250, 4750), mask=n_o_m) | ||||||
|     wordcloud = Image.open(f"{RESOURCE_LOCATION}/{wordcloudimg}") |     wordcloud = Image.open(f"{RESOURCE_LOCATION}/{wordcloudimg}") | ||||||
|     final.paste(wordcloud, (1500, 600)) |     # alpha = wordcloud.convert('RGBA').split()[-1] | ||||||
|  |     final.paste(wordcloud, (1500, 600), mask=wordcloud) | ||||||
|     timeline = Image.open(f"{RESOURCE_LOCATION}/{timelineimg}") |     timeline = Image.open(f"{RESOURCE_LOCATION}/{timelineimg}") | ||||||
|     final.paste(timeline, (250, 2600)) |     final.paste(timeline, (250, 2600), mask=timeline) | ||||||
|  |     barchart = Image.open(f"{RESOURCE_LOCATION}/{barchartimg}") | ||||||
|  |     final.paste(barchart, (1750, 4400),  mask=barchart) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|     final.save(f"{RESOURCE_LOCATION}/final.png") |     final.save(f"{RESOURCE_LOCATION}/final.png") | ||||||
|  |  | ||||||
|  | @ -19,7 +19,12 @@ def count_all_words(participants): | ||||||
|     words_count = words['message'].value_counts().sort_index() |     words_count = words['message'].value_counts().sort_index() | ||||||
|     df_out = words_count.rename_axis('message').reset_index(name='counts') |     df_out = words_count.rename_axis('message').reset_index(name='counts') | ||||||
|     df_out['message'] = df_out.apply(lambda row : to_uppercase(row['message']), axis = 1) |     df_out['message'] = df_out.apply(lambda row : to_uppercase(row['message']), axis = 1) | ||||||
|     return df_out.sort_values(by="counts", ascending=True).set_index("message").to_dict()["counts"] |     # print(df_out.sort_values(by="counts", ascending=True).set_index("message")) | ||||||
|  |     d = df_out.sort_values(by="counts", ascending=True).set_index("message").to_dict()["counts"] | ||||||
|  |     marklist = sorted(d.items(), key=lambda x:x[1], reverse=True) | ||||||
|  |     sortdict = dict(marklist) | ||||||
|  |     # print(sortdict) | ||||||
|  |     return sortdict | ||||||
| 
 | 
 | ||||||
| class Participant: | class Participant: | ||||||
|     def __init__(self, name, title, chat_type): |     def __init__(self, name, title, chat_type): | ||||||
|  | @ -29,14 +34,17 @@ class Participant: | ||||||
|         self.chat_type = chat_type |         self.chat_type = chat_type | ||||||
|         self.messages_df = pd.DataFrame({"timestamp": [], "message": []}) |         self.messages_df = pd.DataFrame({"timestamp": [], "message": []}) | ||||||
|         self.words = pd.DataFrame() |         self.words = pd.DataFrame() | ||||||
|  |         self.message_count = 0 | ||||||
|      |      | ||||||
|     def add_message(self, timestamp, message): |     def add_message(self, timestamp, message): | ||||||
|         self.messages_df = self.messages_df.append(dict(zip(self.messages_df.columns,[str(datetime.fromtimestamp(timestamp/1000)), ftfy.ftfy(message)])), ignore_index=True) |         self.messages_df = self.messages_df.append(dict(zip(self.messages_df.columns,[str(datetime.fromtimestamp(timestamp/1000)), ftfy.ftfy(message)])), ignore_index=True) | ||||||
|      |      | ||||||
|     def count_words(self): |     def count_words(self): | ||||||
|         words = self.messages_df.set_index(['timestamp']).apply(lambda x: x.str.split(' ').explode()).reset_index() |         words = self.messages_df.set_index(['timestamp']).apply(lambda x: x.str.split(' ').explode()).reset_index() | ||||||
|         words_count = words['message'].value_counts().sort_index() |         # words_count = words['message'].value_counts().sort_index() | ||||||
|         self.words = words_count.rename_axis('message').reset_index(name='counts') |         words['message'] = words.apply(lambda row : to_uppercase(row['message']), axis = 1) | ||||||
|  |         self.words = words.rename_axis('message')#.reset_index(name='counts') | ||||||
|  |         self.message_count = self.messages_df.count()[0] | ||||||
|      |      | ||||||
|     def get_words(self, longer_than=0): |     def get_words(self, longer_than=0): | ||||||
|         words = [] |         words = [] | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue