Updated dataset
This commit is contained in:
parent
f227f64e08
commit
d2007a731a
16
Makefile
16
Makefile
@ -1,4 +1,4 @@
|
|||||||
all: build sync_excempt export_dataset export_stats merge_images
|
all: build sync_excempt export_dataset export_stats merge_images export_statistics export_mentions
|
||||||
|
|
||||||
build:
|
build:
|
||||||
pip install build
|
pip install build
|
||||||
@ -27,6 +27,20 @@ export_dataset:
|
|||||||
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
|
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
|
||||||
dr.dataset > export/0_dataset.txt
|
dr.dataset > export/0_dataset.txt
|
||||||
|
|
||||||
|
export_statistics:
|
||||||
|
@echo "Exporting statisticts. Result will be ./export/2_statistics.txt"
|
||||||
|
cat export/dataset.txt | grep "Statistics: "
|
||||||
|
cat export/dataset.txt | grep "Statistics: " > export/2_statistics.txt
|
||||||
|
|
||||||
|
|
||||||
|
export_mentions:
|
||||||
|
@echo "Exporting mentions. Result will be ./export/3_mentions.txt"
|
||||||
|
cat export/dataset.txt | grep "times ment"
|
||||||
|
cat export/dataset.txt | grep "times ment" > export/3_mentions.txt
|
||||||
|
|
||||||
|
|
||||||
merge_images:
|
merge_images:
|
||||||
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
|
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
|
||||||
python merge_images.py
|
python merge_images.py
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,20 +1,21 @@
|
|||||||
from drstats import db
|
from drstats import db
|
||||||
import functools
|
import functools
|
||||||
import sys
|
import sys
|
||||||
|
import pathlib
|
||||||
|
|
||||||
printr = functools.partial(print, file=sys.stderr)
|
printr = functools.partial(print, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def dump():
|
def dump():
|
||||||
statistics_text = [
|
statistics_text = [
|
||||||
f"devRant(developer community) haves {db.get_user_count()} active users(ranters)."
|
f"===devRant(developer community) haves {db.get_user_count()} active users(ranters)."
|
||||||
f"All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total."
|
f"===All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total."
|
||||||
f"The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)."
|
f"===The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)."
|
||||||
]
|
]
|
||||||
printr(statistics_text)
|
printr(statistics_text)
|
||||||
for contribution in db.get_contributions():
|
for contribution in db.get_contributions():
|
||||||
statistics_text.append(
|
statistics_text.append(
|
||||||
f"Statistics: User(ranter) {contribution['username']} made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)."
|
f"===Statistics: User(ranter) {contribution['username']} is popularity rank {contribution['rank']} and made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)."
|
||||||
)
|
)
|
||||||
printr(statistics_text[-1])
|
printr(statistics_text[-1])
|
||||||
print("\n".join(statistics_text))
|
print("\n".join(statistics_text))
|
||||||
@ -27,26 +28,41 @@ def dump():
|
|||||||
if text:
|
if text:
|
||||||
total_text += text
|
total_text += text
|
||||||
print(
|
print(
|
||||||
"```",
|
"===",
|
||||||
f"All rants written by user(ranter) `{user}` on devRant(developer community)```.",
|
f"All rants written by user(ranter) `{user}` on devRant(developer community).",
|
||||||
|
"```"
|
||||||
|
text
|
||||||
|
"```"
|
||||||
)
|
)
|
||||||
print(text, "```")
|
|
||||||
printr(text)
|
printr(text)
|
||||||
|
with pathlib.Path("export/rants-" + user + ".txt").open("w") as f:
|
||||||
|
f.write(user,"said:```")
|
||||||
|
f.write(text)
|
||||||
|
f.write(user,"```")
|
||||||
text = (
|
text = (
|
||||||
db.get_all_posts_of_user(user).replace("\n", " ").replace(" ", " ").strip()
|
db.get_all_posts_of_user(user).replace("\n", " ").replace(" ", " ").strip()
|
||||||
)
|
)
|
||||||
if text:
|
if text:
|
||||||
total_text += text
|
total_text += text
|
||||||
print(
|
print(
|
||||||
|
"===",
|
||||||
"```",
|
"```",
|
||||||
f"All posts written by user(ranter) `{user}` on devRant(developer community): ```.",
|
f"All posts written by user(ranter) `{user}` on devRant(developer community): ```.",
|
||||||
|
text,
|
||||||
|
"```"
|
||||||
)
|
)
|
||||||
print(text, "```")
|
|
||||||
printr(text)
|
printr(text)
|
||||||
|
with pathlib.Path("export/posts-" + user + ".txt").open("w") as f:
|
||||||
|
f.write(user,"said:```")
|
||||||
|
f.write(text)
|
||||||
|
f.write(user,"```")
|
||||||
all_content += total_text
|
all_content += total_text
|
||||||
|
print("===Mentions of users:","```")
|
||||||
for user in db.get_users():
|
users = db.get_users()
|
||||||
|
users.sort()
|
||||||
|
for user in users:
|
||||||
mention_text = f"@{user}"
|
mention_text = f"@{user}"
|
||||||
line = f"{user} is {all_content.count(mention_text)} times mentioned on devRant(developer comminity)."
|
line = f"{user} is {all_content.count(mention_text)} times mentioned on devRant(developer comminity)."
|
||||||
printr(line)
|
printr(line)
|
||||||
print(line)
|
print(line)
|
||||||
|
print("```")
|
Loading…
Reference in New Issue
Block a user