This commit is contained in:
retoor 2024-11-23 19:56:52 +01:00
parent 6a1233fc5c
commit 190456236a
14 changed files with 191 additions and 111 deletions

View File

@ -1,16 +1,28 @@
all: build sync export_stats export_dataset
all: build sync_excempt export_dataset export_stats merge_images
build:
time pip install build
time python -m build .
time pip install -e .
sync:
@echo "Synchronizing with devrant.com."
time dr.sync
sync_excempt:
@echo "Sync is not executed because it's a lengthy process ending with timeout error."
export_stats:
@echo "Make sure you have ran 'make sync' first. Results will be in ./export/"
@echo "Exporting statisticts."
time dr.stats_all
export_dataset:
@echo "Exporting dataset to be used for LLM embedding."
@echo "Make sure you have ran 'make sync' first."
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
time dr.dataset > export/dataset.txt
merge_images:
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
python merge_images.py

View File

@ -28,7 +28,7 @@ If you type `dr.` in terminal and press tab you'll see all available apps auto c
```
1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
3. `dr.stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
4. `dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
5. `dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
6. `dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.

Binary file not shown.

44
merge_images.py Normal file
View File

@ -0,0 +1,44 @@
from PIL import Image
from pathlib import Path
import functools
import sys
printr = functools.partial(print,file=sys.stderr)
per_image_width = 480
per_image_height = 320
cols = 2
images = list(Path("./export/").glob("*.png"))
image_count = len(images)
total_image_height = (image_count / cols * per_image_height)
if(image_count / cols * per_image_height > total_image_height):
total_image_height += per_image_height
total_image_width = image_count / cols * per_image_width
resized_images = []
for path in images:
image = Image.open(path)
image = image.resize((per_image_width, per_image_height))
resized_images.append((path,image))
new_image = Image.new("RGB",(per_image_width * cols, int(per_image_height * image_count / cols)), (250,250,250))
current_col = 0
current_row = 0
current_image_number = 0
for path, image in resized_images:
printr("Merging image {}".format(path))
current_row = int(current_image_number / cols)
left = int((current_col) * per_image_width)
top = int(per_image_height * current_row )
new_image.paste(image,(left,top))
new_image.save("export/1_graphs_compliation.png")
current_col += 1
current_image_number += 1
if current_col == cols:
current_col = 0
new_image.show()

View File

@ -27,5 +27,5 @@ console_scripts =
dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
dr.rant_stats_all = drstats.statistics:rant_stats_all
dr.stats_all = drstats.statistics:rant_stats_all
dr.dataset = drstats.dataset:dump

View File

@ -41,7 +41,7 @@ If you type `dr.` in terminal and press tab you'll see all available apps auto c
```
1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
3. `dr.stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
4. `dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
5. `dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
6. `dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.

View File

@ -1,7 +1,7 @@
[console_scripts]
dr.dataset = drstats.dataset:dump
dr.rant_stats_all = drstats.statistics:rant_stats_all
dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday
dr.stats_all = drstats.statistics:rant_stats_all
dr.sync = drstats.sync:sync

42
src/drstats/dataset.py Normal file
View File

@ -0,0 +1,42 @@
from drstats import db
import functools
import sys
printr = functools.partial(print,file=sys.stderr)
def dump():
statistics_text = [
f"devRant(developer community) haves {db.get_user_count()} active users(ranters)."
f"All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total."
f"The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)."
]
printr(statistics_text)
for contribution in db.get_contributions():
statistics_text.append(
f"Statistics: User(ranter) {contribution['username']} made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)."
)
printr(statistics_text[-1])
print("\n".join(statistics_text))
all_content = ''
for user in db.get_users():
text = db.get_all_rants_of_user(user).replace("\n"," ").replace(" "," ").strip()
total_text = ""
if text:
total_text += text
print("```",f"All rants written by user(ranter) `{user}` on devRant(developer community)```.")
print(text,"```")
text = db.get_all_posts_of_user(user).replace("\n", " ").replace(" "," ").strip()
if text:
total_text += text
print("```",f"All posts written by user(ranter) `{user}` on devRant(developer community): ```.")
print(text,"```")
all_content += total_text
for user in db.get_users():
mention_text = f"@{user}"
line = f"{user} is {all_content.count(mention_text)} times mentioned on devRant(developer comminity)."
printr(line)
print(line)

View File

@ -5,13 +5,11 @@ from drstats.duration import Duration
def get_db():
db = dataset.connect(f"sqlite:///{db_path}")
db.query(
"""
db.query("""
DROP VIEW IF EXISTS score_ignored_most_last_7_days
"""
)
db.query(
"""
""")
db.query("""
CREATE VIEW score_ignored_most_last_7_days AS SELECT
user_username AS username,
COUNT(score) AS userscore
@ -20,24 +18,20 @@ WHERE score = 0
AND created >= DATE('now', '-7 days')
GROUP BY username
ORDER BY userscore DESC
"""
)
""")
db.query("DROP VIEW IF EXISTS score_last_7_days")
db.query(
"""
db.query("""
CREATE VIEW score_last_7_days AS SELECT
user_username AS username,
SUM(score) AS userscore
FROM comments
GROUP BY user_username
ORDER BY userscore DESC
"""
)
""")
db.query("DROP VIEW IF EXISTS rant_stats_per_day")
db.query(
"""
db.query("""
CREATE VIEW rant_stats_per_day AS SELECT
COUNT(0) AS count,
DATE(created) AS created_date,
@ -53,12 +47,10 @@ CREATE VIEW rant_stats_per_day AS SELECT
FROM rants
GROUP BY created_date
ORDER BY created_date
"""
)
""")
db.query("DROP VIEW IF EXISTS comment_stats_per_day")
db.query(
"""
db.query("""
CREATE VIEW comment_stats_per_day AS SELECT
COUNT(0) AS count,
DATE(created) AS created_date,
@ -74,12 +66,10 @@ CREATE VIEW comment_stats_per_day AS SELECT
FROM comments
GROUP BY created_date
ORDER BY created_date
"""
)
""")
db.query("DROP VIEW IF EXISTS rant_stats_per_weekday")
db.query(
"""
db.query("""
CREATE VIEW rant_stats_per_weekday AS SELECT
COUNT(0) AS count,
DATE(created) AS created_date,
@ -95,12 +85,10 @@ CREATE VIEW rant_stats_per_weekday AS SELECT
FROM rants
GROUP BY weekday
ORDER BY created_date
"""
)
""")
db.query("DROP VIEW IF EXISTS comment_stats_per_weekday")
db.query(
"""
db.query("""
CREATE VIEW comment_stats_per_weekday AS SELECT
COUNT(0) AS count,
DATE(created) AS created_date,
@ -116,41 +104,33 @@ CREATE VIEW comment_stats_per_weekday AS SELECT
FROM comments
GROUP BY weekday
ORDER BY created_date
"""
)
""")
db.query("DROP VIEW IF EXISTS comment_stats_per_hour")
db.query(
"""
db.query("""
CREATE VIEW comment_stats_per_hour AS SELECT
COUNT(0) AS count,
strftime('%H', created) AS hour
FROM comments
GROUP BY hour
ORDER BY hour
"""
)
""")
db.query("DROP VIEW IF EXISTS rant_stats_per_hour")
db.query(
"""
db.query("""
CREATE VIEW rant_stats_per_hour AS SELECT
COUNT(0) AS count,
strftime('%H', created) AS hour
FROM rants
GROUP BY hour
ORDER BY hour
"""
)
""")
db.query(
"""
db.query("""
DROP VIEW IF EXISTS user_stats
"""
)
""")
db.query(
"""
db.query("""
CREATE VIEW user_stats AS
SELECT
user_username AS username,
@ -172,17 +152,21 @@ SELECT
FROM comments
GROUP BY username, DATE(comments.created)
ORDER BY username ASC, date ASC;
"""
)
""")
db.query("DROP VIEW IF EXISTS contributions")
db.query("""CREATE VIEW contributions AS select distinct user_username as username, count(0) as contributions,sum(score) as upvotes,avg(length(text)) as post_length_average, sum(length(text)) as content_length from rants
db.query("""
CREATE VIEW contributions AS
select distinct user_username as username, count(0) as contributions,sum(score) as upvotes,avg(length(text)) as post_length_average, sum(length(text)) as content_length from rants
union
select distinct user_username as username, count(0) as contributions,sum(score) as upvotes, sum(length(body)) / count(0) as post_length_average, sum(length(body)) as content_length from comments
group by username
order by contributions desc, username asc
""");
""")
db.query("DROP VIEW IF EXISTS contributions_extended")
db.query("CREATE VIEW contributions_extended as SELECT username, contributions,ROUND(CAST(contributions AS REAL) / CAST((select contributions from contributions) AS REAL),2) as ownership, upvotes, ROUND(CAST(upvotes AS REAL) / CAST((SELECT SUM(upvotes) from contributions) AS REAL),2) upvotes_ownership, ROUND(CAST(upvotes AS REAL) / CAST(contributions AS REAL),2) upvote_ratio,content_length as post_length_total, ROUND(CAST(content_length AS REAL) / CAST((SELECT SUM(content_length) from contributions) AS REAL)) as ownership_content,post_length_average FROM contributions")
db.query("""
CREATE VIEW contributions_extended as SELECT username, contributions,ROUND(CAST(contributions AS REAL) / CAST((select contributions from contributions) AS REAL),2) as ownership, upvotes, ROUND(CAST(upvotes AS REAL) / CAST((SELECT SUM(upvotes) from contributions) AS REAL),2) upvotes_ownership, ROUND(CAST(upvotes AS REAL) / CAST(contributions AS REAL),2) upvote_ratio,content_length as post_length_total, ROUND(CAST(content_length AS REAL) / CAST((SELECT SUM(content_length) from contributions) AS REAL)) as ownership_content,post_length_average
FROM contributions
""")
db.query("DROP VIEW IF EXISTS rants_of_user")
db.query("CREATE VIEW rants_of_user as SELECT user_username as username, GROUP_CONCAT(text) as text FROM rants")
db.query("DROP VIEW IF EXISTS posts_of_user")
@ -245,7 +229,6 @@ def get_contribution_average_per_user():
return round(get_contribution_count() / get_user_count(),2)
def get_all_rants_of_user(username):
with Db() as db:
try:
return db.db['rants_of_user'].find_one(username=username)['text']

View File

@ -1,4 +1,5 @@
import time
import sys
class Duration:
@ -12,5 +13,5 @@ class Duration:
def __exit__(self, exc_type, exc_val, exc_tb):
self.end = time.time()
self.duration = self.end - self.start
print(self.description,end=" ")
print("took {} seconds.".format(self.duration))
print(self.description,end=" ",file=sys.stderr)
print("took {} seconds.".format(self.duration),file=sys.stderr)

View File

@ -26,7 +26,6 @@ def timestamp_to_string(timestamp):
async def get_recent_rants(start_from=1, page_size=10):
page = 0
while True:
try:
rants = dr.get_rants("recent", page_size, start_from)["rants"]
page += 1
for rant in rants:
@ -38,9 +37,6 @@ async def get_recent_rants(start_from=1, page_size=10):
yield rant
start_from += page_size
except requests.exceptions.ConnectionError:
print("Rate limit of server exceeded.")
return
async def sync_rants():
count = 0
@ -48,13 +44,15 @@ async def sync_rants():
page_size = 20
try:
async for rant in get_recent_rants(start_from, page_size):
start_from += page_size
count += 1
rant["tags"] = json.dumps(rant["tags"])
db["rants"].upsert(rant, ["id"])
print(f"Upserted {count} rant(s).")
except:
print("Rate limit of server exceeded. That's normal.s")
async def sync_comments():
comments_synced = 0