Updated dataset

This commit is contained in:
retoor 2024-11-24 07:09:50 +01:00
parent f227f64e08
commit d2007a731a
2 changed files with 41 additions and 11 deletions

View File

@ -1,4 +1,4 @@
all: build sync_excempt export_dataset export_stats merge_images all: build sync_excempt export_dataset export_stats merge_images export_statistics export_mentions
build: build:
pip install build pip install build
@ -27,6 +27,20 @@ export_dataset:
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt" @echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
dr.dataset > export/0_dataset.txt dr.dataset > export/0_dataset.txt
export_statistics:
@echo "Exporting statisticts. Result will be ./export/2_statistics.txt"
cat export/dataset.txt | grep "Statistics: "
cat export/dataset.txt | grep "Statistics: " > export/2_statistics.txt
export_mentions:
@echo "Exporting mentions. Result will be ./export/3_mentions.txt"
cat export/dataset.txt | grep "times ment"
cat export/dataset.txt | grep "times ment" > export/3_mentions.txt
merge_images: merge_images:
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png." @echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
python merge_images.py python merge_images.py

View File

@ -1,20 +1,21 @@
from drstats import db from drstats import db
import functools import functools
import sys import sys
import pathlib
printr = functools.partial(print, file=sys.stderr) printr = functools.partial(print, file=sys.stderr)
def dump(): def dump():
statistics_text = [ statistics_text = [
f"devRant(developer community) haves {db.get_user_count()} active users(ranters)." f"===devRant(developer community) haves {db.get_user_count()} active users(ranters)."
f"All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total." f"===All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total."
f"The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)." f"===The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)."
] ]
printr(statistics_text) printr(statistics_text)
for contribution in db.get_contributions(): for contribution in db.get_contributions():
statistics_text.append( statistics_text.append(
f"Statistics: User(ranter) {contribution['username']} made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)." f"===Statistics: User(ranter) {contribution['username']} is popularity rank {contribution['rank']} and made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)."
) )
printr(statistics_text[-1]) printr(statistics_text[-1])
print("\n".join(statistics_text)) print("\n".join(statistics_text))
@ -27,26 +28,41 @@ def dump():
if text: if text:
total_text += text total_text += text
print( print(
"```", "===",
f"All rants written by user(ranter) `{user}` on devRant(developer community)```.", f"All rants written by user(ranter) `{user}` on devRant(developer community).",
"```"
text
"```"
) )
print(text, "```")
printr(text) printr(text)
with pathlib.Path("export/rants-" + user + ".txt").open("w") as f:
f.write(user,"said:```")
f.write(text)
f.write(user,"```")
text = ( text = (
db.get_all_posts_of_user(user).replace("\n", " ").replace(" ", " ").strip() db.get_all_posts_of_user(user).replace("\n", " ").replace(" ", " ").strip()
) )
if text: if text:
total_text += text total_text += text
print( print(
"===",
"```", "```",
f"All posts written by user(ranter) `{user}` on devRant(developer community): ```.", f"All posts written by user(ranter) `{user}` on devRant(developer community): ```.",
text,
"```"
) )
print(text, "```")
printr(text) printr(text)
with pathlib.Path("export/posts-" + user + ".txt").open("w") as f:
f.write(user,"said:```")
f.write(text)
f.write(user,"```")
all_content += total_text all_content += total_text
print("===Mentions of users:","```")
for user in db.get_users(): users = db.get_users()
users.sort()
for user in users:
mention_text = f"@{user}" mention_text = f"@{user}"
line = f"{user} is {all_content.count(mention_text)} times mentioned on devRant(developer comminity)." line = f"{user} is {all_content.count(mention_text)} times mentioned on devRant(developer comminity)."
printr(line) printr(line)
print(line) print(line)
print("```")