From d2007a731ade6f1a469432c7745e407784d31440 Mon Sep 17 00:00:00 2001 From: retoor Date: Sun, 24 Nov 2024 07:09:50 +0100 Subject: [PATCH] Updated dataset --- Makefile | 16 +++++++++++++++- src/drstats/dataset.py | 36 ++++++++++++++++++++++++++---------- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index c5a4178..f421c65 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: build sync_excempt export_dataset export_stats merge_images +all: build sync_excempt export_dataset export_stats merge_images export_statistics export_mentions build: pip install build @@ -27,6 +27,20 @@ export_dataset: @echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt" dr.dataset > export/0_dataset.txt +export_statistics: + @echo "Exporting statisticts. Result will be ./export/2_statistics.txt" + cat export/dataset.txt | grep "Statistics: " + cat export/dataset.txt | grep "Statistics: " > export/2_statistics.txt + + +export_mentions: + @echo "Exporting mentions. Result will be ./export/3_mentions.txt" + cat export/dataset.txt | grep "times ment" + cat export/dataset.txt | grep "times ment" > export/3_mentions.txt + + merge_images: @echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png." python merge_images.py + + diff --git a/src/drstats/dataset.py b/src/drstats/dataset.py index d407164..c1ded4a 100644 --- a/src/drstats/dataset.py +++ b/src/drstats/dataset.py @@ -1,20 +1,21 @@ from drstats import db import functools import sys +import pathlib printr = functools.partial(print, file=sys.stderr) def dump(): statistics_text = [ - f"devRant(developer community) haves {db.get_user_count()} active users(ranters)." - f"All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total." - f"The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)." + f"===devRant(developer community) haves {db.get_user_count()} active users(ranters)." + f"===All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total." + f"===The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)." ] printr(statistics_text) for contribution in db.get_contributions(): statistics_text.append( - f"Statistics: User(ranter) {contribution['username']} made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)." + f"===Statistics: User(ranter) {contribution['username']} is popularity rank {contribution['rank']} and made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)." ) printr(statistics_text[-1]) print("\n".join(statistics_text)) @@ -27,26 +28,41 @@ def dump(): if text: total_text += text print( - "```", - f"All rants written by user(ranter) `{user}` on devRant(developer community)```.", + "===", + f"All rants written by user(ranter) `{user}` on devRant(developer community).", + "```" + text + "```" ) - print(text, "```") printr(text) + with pathlib.Path("export/rants-" + user + ".txt").open("w") as f: + f.write(user,"said:```") + f.write(text) + f.write(user,"```") text = ( db.get_all_posts_of_user(user).replace("\n", " ").replace(" ", " ").strip() ) if text: total_text += text print( + "===", "```", f"All posts written by user(ranter) `{user}` on devRant(developer community): ```.", + text, + "```" ) - print(text, "```") printr(text) + with pathlib.Path("export/posts-" + user + ".txt").open("w") as f: + f.write(user,"said:```") + f.write(text) + f.write(user,"```") all_content += total_text - - for user in db.get_users(): + print("===Mentions of users:","```") + users = db.get_users() + users.sort() + for user in users: mention_text = f"@{user}" line = f"{user} is {all_content.count(mention_text)} times mentioned on devRant(developer comminity)." printr(line) print(line) + print("```") \ No newline at end of file