Updated dataset
This commit is contained in:
parent
f227f64e08
commit
d2007a731a
16
Makefile
16
Makefile
@ -1,4 +1,4 @@
|
||||
all: build sync_excempt export_dataset export_stats merge_images
|
||||
all: build sync_excempt export_dataset export_stats merge_images export_statistics export_mentions
|
||||
|
||||
build:
|
||||
pip install build
|
||||
@ -27,6 +27,20 @@ export_dataset:
|
||||
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
|
||||
dr.dataset > export/0_dataset.txt
|
||||
|
||||
export_statistics:
|
||||
@echo "Exporting statisticts. Result will be ./export/2_statistics.txt"
|
||||
cat export/dataset.txt | grep "Statistics: "
|
||||
cat export/dataset.txt | grep "Statistics: " > export/2_statistics.txt
|
||||
|
||||
|
||||
export_mentions:
|
||||
@echo "Exporting mentions. Result will be ./export/3_mentions.txt"
|
||||
cat export/dataset.txt | grep "times ment"
|
||||
cat export/dataset.txt | grep "times ment" > export/3_mentions.txt
|
||||
|
||||
|
||||
merge_images:
|
||||
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
|
||||
python merge_images.py
|
||||
|
||||
|
||||
|
@ -1,20 +1,21 @@
|
||||
from drstats import db
|
||||
import functools
|
||||
import sys
|
||||
import pathlib
|
||||
|
||||
printr = functools.partial(print, file=sys.stderr)
|
||||
|
||||
|
||||
def dump():
|
||||
statistics_text = [
|
||||
f"devRant(developer community) haves {db.get_user_count()} active users(ranters)."
|
||||
f"All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total."
|
||||
f"The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)."
|
||||
f"===devRant(developer community) haves {db.get_user_count()} active users(ranters)."
|
||||
f"===All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total."
|
||||
f"===The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)."
|
||||
]
|
||||
printr(statistics_text)
|
||||
for contribution in db.get_contributions():
|
||||
statistics_text.append(
|
||||
f"Statistics: User(ranter) {contribution['username']} made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)."
|
||||
f"===Statistics: User(ranter) {contribution['username']} is popularity rank {contribution['rank']} and made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)."
|
||||
)
|
||||
printr(statistics_text[-1])
|
||||
print("\n".join(statistics_text))
|
||||
@ -27,26 +28,41 @@ def dump():
|
||||
if text:
|
||||
total_text += text
|
||||
print(
|
||||
"```",
|
||||
f"All rants written by user(ranter) `{user}` on devRant(developer community)```.",
|
||||
"===",
|
||||
f"All rants written by user(ranter) `{user}` on devRant(developer community).",
|
||||
"```"
|
||||
text
|
||||
"```"
|
||||
)
|
||||
print(text, "```")
|
||||
printr(text)
|
||||
with pathlib.Path("export/rants-" + user + ".txt").open("w") as f:
|
||||
f.write(user,"said:```")
|
||||
f.write(text)
|
||||
f.write(user,"```")
|
||||
text = (
|
||||
db.get_all_posts_of_user(user).replace("\n", " ").replace(" ", " ").strip()
|
||||
)
|
||||
if text:
|
||||
total_text += text
|
||||
print(
|
||||
"===",
|
||||
"```",
|
||||
f"All posts written by user(ranter) `{user}` on devRant(developer community): ```.",
|
||||
text,
|
||||
"```"
|
||||
)
|
||||
print(text, "```")
|
||||
printr(text)
|
||||
with pathlib.Path("export/posts-" + user + ".txt").open("w") as f:
|
||||
f.write(user,"said:```")
|
||||
f.write(text)
|
||||
f.write(user,"```")
|
||||
all_content += total_text
|
||||
|
||||
for user in db.get_users():
|
||||
print("===Mentions of users:","```")
|
||||
users = db.get_users()
|
||||
users.sort()
|
||||
for user in users:
|
||||
mention_text = f"@{user}"
|
||||
line = f"{user} is {all_content.count(mention_text)} times mentioned on devRant(developer comminity)."
|
||||
printr(line)
|
||||
print(line)
|
||||
print("```")
|
Loading…
Reference in New Issue
Block a user