diff --git a/Makefile b/Makefile index 76adc47..48686ca 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,28 @@ -all: build sync export_stats export_dataset +all: build sync_excempt export_dataset export_stats merge_images build: time pip install build time python -m build . time pip install -e . + + sync: - @echo "Synchronizing with devrant.com." time dr.sync + +sync_excempt: + @echo "Sync is not executed because it's a lengthy process ending with timeout error." + export_stats: + @echo "Make sure you have ran 'make sync' first. Results will be in ./export/" @echo "Exporting statisticts." time dr.stats_all + export_dataset: - @echo "Exporting dataset to be used for LLM embedding." + @echo "Make sure you have ran 'make sync' first." + @echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt" time dr.dataset > export/dataset.txt + +merge_images: + @echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png." + python merge_images.py diff --git a/README.md b/README.md index 03e9762..295cbbf 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ If you type `dr.` in terminal and press tab you'll see all available apps auto c ``` 1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited. 2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first. -3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first. -4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first. -5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first. -6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first. +3. `dr.stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first. +4. `dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first. +5. `dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first. +6. `dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first. diff --git a/drstats.db b/drstats.db index b5c0756..db8569d 100644 Binary files a/drstats.db and b/drstats.db differ diff --git a/merge_images.py b/merge_images.py new file mode 100644 index 0000000..ae1c7b9 --- /dev/null +++ b/merge_images.py @@ -0,0 +1,44 @@ +from PIL import Image +from pathlib import Path +import functools +import sys + +printr = functools.partial(print,file=sys.stderr) + +per_image_width = 480 +per_image_height = 320 +cols = 2 + +images = list(Path("./export/").glob("*.png")) +image_count = len(images) +total_image_height = (image_count / cols * per_image_height) +if(image_count / cols * per_image_height > total_image_height): + total_image_height += per_image_height +total_image_width = image_count / cols * per_image_width + +resized_images = [] + +for path in images: + image = Image.open(path) + image = image.resize((per_image_width, per_image_height)) + resized_images.append((path,image)) + +new_image = Image.new("RGB",(per_image_width * cols, int(per_image_height * image_count / cols)), (250,250,250)) + +current_col = 0 +current_row = 0 +current_image_number = 0 +for path, image in resized_images: + printr("Merging image {}".format(path)) + current_row = int(current_image_number / cols) + left = int((current_col) * per_image_width) + top = int(per_image_height * current_row ) + new_image.paste(image,(left,top)) + new_image.save("export/1_graphs_compliation.png") + + current_col += 1 + current_image_number += 1 + if current_col == cols: + current_col = 0 + +new_image.show() \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index b3c786b..a500e15 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,5 +27,5 @@ console_scripts = dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour - dr.rant_stats_all = drstats.statistics:rant_stats_all + dr.stats_all = drstats.statistics:rant_stats_all dr.dataset = drstats.dataset:dump \ No newline at end of file diff --git a/src/drstats.egg-info/PKG-INFO b/src/drstats.egg-info/PKG-INFO index f5b3833..0108d86 100644 --- a/src/drstats.egg-info/PKG-INFO +++ b/src/drstats.egg-info/PKG-INFO @@ -41,7 +41,7 @@ If you type `dr.` in terminal and press tab you'll see all available apps auto c ``` 1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited. 2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first. -3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first. -4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first. -5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first. -6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first. +3. `dr.stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first. +4. `dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first. +5. `dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first. +6. `dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first. diff --git a/src/drstats.egg-info/entry_points.txt b/src/drstats.egg-info/entry_points.txt index a7e6769..f4206ab 100644 --- a/src/drstats.egg-info/entry_points.txt +++ b/src/drstats.egg-info/entry_points.txt @@ -1,7 +1,7 @@ [console_scripts] dr.dataset = drstats.dataset:dump -dr.rant_stats_all = drstats.statistics:rant_stats_all dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday +dr.stats_all = drstats.statistics:rant_stats_all dr.sync = drstats.sync:sync diff --git a/src/drstats/__pycache__/db.cpython-312.pyc b/src/drstats/__pycache__/db.cpython-312.pyc index db03e4b..02d4668 100644 Binary files a/src/drstats/__pycache__/db.cpython-312.pyc and b/src/drstats/__pycache__/db.cpython-312.pyc differ diff --git a/src/drstats/__pycache__/statistics.cpython-312.pyc b/src/drstats/__pycache__/statistics.cpython-312.pyc index 4a78407..2982cd9 100644 Binary files a/src/drstats/__pycache__/statistics.cpython-312.pyc and b/src/drstats/__pycache__/statistics.cpython-312.pyc differ diff --git a/src/drstats/__pycache__/sync.cpython-312.pyc b/src/drstats/__pycache__/sync.cpython-312.pyc index 0de808a..257c666 100644 Binary files a/src/drstats/__pycache__/sync.cpython-312.pyc and b/src/drstats/__pycache__/sync.cpython-312.pyc differ diff --git a/src/drstats/dataset.py b/src/drstats/dataset.py new file mode 100644 index 0000000..629d5c5 --- /dev/null +++ b/src/drstats/dataset.py @@ -0,0 +1,42 @@ +from drstats import db +import functools +import sys + +printr = functools.partial(print,file=sys.stderr) + +def dump(): + statistics_text = [ + f"devRant(developer community) haves {db.get_user_count()} active users(ranters)." + f"All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total." + f"The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)." + ] + printr(statistics_text) + for contribution in db.get_contributions(): + statistics_text.append( + f"Statistics: User(ranter) {contribution['username']} made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)." + ) + printr(statistics_text[-1]) + print("\n".join(statistics_text)) + all_content = '' + for user in db.get_users(): + text = db.get_all_rants_of_user(user).replace("\n"," ").replace(" "," ").strip() + total_text = "" + if text: + total_text += text + print("```",f"All rants written by user(ranter) `{user}` on devRant(developer community)```.") + print(text,"```") + text = db.get_all_posts_of_user(user).replace("\n", " ").replace(" "," ").strip() + if text: + total_text += text + print("```",f"All posts written by user(ranter) `{user}` on devRant(developer community): ```.") + print(text,"```") + all_content += total_text + + for user in db.get_users(): + mention_text = f"@{user}" + line = f"{user} is {all_content.count(mention_text)} times mentioned on devRant(developer comminity)." + printr(line) + print(line) + + + diff --git a/src/drstats/db.py b/src/drstats/db.py index 765550f..5829eb9 100644 --- a/src/drstats/db.py +++ b/src/drstats/db.py @@ -5,13 +5,11 @@ from drstats.duration import Duration def get_db(): db = dataset.connect(f"sqlite:///{db_path}") - db.query( - """ + db.query(""" DROP VIEW IF EXISTS score_ignored_most_last_7_days - """ - ) - db.query( - """ + """) + + db.query(""" CREATE VIEW score_ignored_most_last_7_days AS SELECT user_username AS username, COUNT(score) AS userscore @@ -20,24 +18,20 @@ WHERE score = 0 AND created >= DATE('now', '-7 days') GROUP BY username ORDER BY userscore DESC - """ - ) + """) db.query("DROP VIEW IF EXISTS score_last_7_days") - db.query( - """ + db.query(""" CREATE VIEW score_last_7_days AS SELECT user_username AS username, SUM(score) AS userscore FROM comments GROUP BY user_username ORDER BY userscore DESC - """ - ) + """) db.query("DROP VIEW IF EXISTS rant_stats_per_day") - db.query( - """ + db.query(""" CREATE VIEW rant_stats_per_day AS SELECT COUNT(0) AS count, DATE(created) AS created_date, @@ -53,12 +47,10 @@ CREATE VIEW rant_stats_per_day AS SELECT FROM rants GROUP BY created_date ORDER BY created_date - """ - ) + """) db.query("DROP VIEW IF EXISTS comment_stats_per_day") - db.query( - """ + db.query(""" CREATE VIEW comment_stats_per_day AS SELECT COUNT(0) AS count, DATE(created) AS created_date, @@ -74,12 +66,10 @@ CREATE VIEW comment_stats_per_day AS SELECT FROM comments GROUP BY created_date ORDER BY created_date - """ - ) + """) db.query("DROP VIEW IF EXISTS rant_stats_per_weekday") - db.query( - """ + db.query(""" CREATE VIEW rant_stats_per_weekday AS SELECT COUNT(0) AS count, DATE(created) AS created_date, @@ -95,12 +85,10 @@ CREATE VIEW rant_stats_per_weekday AS SELECT FROM rants GROUP BY weekday ORDER BY created_date - """ - ) + """) db.query("DROP VIEW IF EXISTS comment_stats_per_weekday") - db.query( - """ + db.query(""" CREATE VIEW comment_stats_per_weekday AS SELECT COUNT(0) AS count, DATE(created) AS created_date, @@ -116,73 +104,69 @@ CREATE VIEW comment_stats_per_weekday AS SELECT FROM comments GROUP BY weekday ORDER BY created_date - """ - ) + """) db.query("DROP VIEW IF EXISTS comment_stats_per_hour") - db.query( - """ + db.query(""" CREATE VIEW comment_stats_per_hour AS SELECT COUNT(0) AS count, strftime('%H', created) AS hour FROM comments GROUP BY hour ORDER BY hour - """ - ) + """) db.query("DROP VIEW IF EXISTS rant_stats_per_hour") - db.query( - """ + db.query(""" CREATE VIEW rant_stats_per_hour AS SELECT COUNT(0) AS count, strftime('%H', created) AS hour FROM rants GROUP BY hour ORDER BY hour - """ - ) + """) - db.query( - """ -DROP VIEW IF EXISTS user_stats - """ - ) + db.query(""" + DROP VIEW IF EXISTS user_stats + """) - db.query( - """ -CREATE VIEW user_stats AS -SELECT - user_username AS username, - COUNT(0) AS post_count, - (select count(0) from rants where rants.id = comments.rant_id and date(rants.created) = date(comments.created)) as rant_count, - DATE(comments.created) AS date, - (SELECT COUNT(0) - FROM comments AS comments2 - WHERE comments2.user_username = comments.user_username - AND comments2.score = 0 and date(comments2.created) = date(comments.created)) AS ignore_count, - (SELECT COUNT(0) - FROM comments AS comments2 - WHERE comments2.user_username = comments.user_username - AND comments2.score > 0 and date(comments2.created) = date(comments.created)) AS upvote_times, - (SELECT SUM(score) - FROM comments AS comments2 - WHERE comments2.user_username = comments.user_username - AND comments2.score > 0 and date(comments2.created) = date(comments.created)) AS upvote_total -FROM comments -GROUP BY username, DATE(comments.created) -ORDER BY username ASC, date ASC; - """ - ) + db.query(""" + CREATE VIEW user_stats AS + SELECT + user_username AS username, + COUNT(0) AS post_count, + (select count(0) from rants where rants.id = comments.rant_id and date(rants.created) = date(comments.created)) as rant_count, + DATE(comments.created) AS date, + (SELECT COUNT(0) + FROM comments AS comments2 + WHERE comments2.user_username = comments.user_username + AND comments2.score = 0 and date(comments2.created) = date(comments.created)) AS ignore_count, + (SELECT COUNT(0) + FROM comments AS comments2 + WHERE comments2.user_username = comments.user_username + AND comments2.score > 0 and date(comments2.created) = date(comments.created)) AS upvote_times, + (SELECT SUM(score) + FROM comments AS comments2 + WHERE comments2.user_username = comments.user_username + AND comments2.score > 0 and date(comments2.created) = date(comments.created)) AS upvote_total + FROM comments + GROUP BY username, DATE(comments.created) + ORDER BY username ASC, date ASC; + """) db.query("DROP VIEW IF EXISTS contributions") - db.query("""CREATE VIEW contributions AS select distinct user_username as username, count(0) as contributions,sum(score) as upvotes,avg(length(text)) as post_length_average, sum(length(text)) as content_length from rants - union - select distinct user_username as username, count(0) as contributions,sum(score) as upvotes, sum(length(body)) / count(0) as post_length_average, sum(length(body)) as content_length from comments - group by username - order by contributions desc, username asc - """); + db.query(""" + CREATE VIEW contributions AS + select distinct user_username as username, count(0) as contributions,sum(score) as upvotes,avg(length(text)) as post_length_average, sum(length(text)) as content_length from rants + union + select distinct user_username as username, count(0) as contributions,sum(score) as upvotes, sum(length(body)) / count(0) as post_length_average, sum(length(body)) as content_length from comments + group by username + order by contributions desc, username asc + """) db.query("DROP VIEW IF EXISTS contributions_extended") - db.query("CREATE VIEW contributions_extended as SELECT username, contributions,ROUND(CAST(contributions AS REAL) / CAST((select contributions from contributions) AS REAL),2) as ownership, upvotes, ROUND(CAST(upvotes AS REAL) / CAST((SELECT SUM(upvotes) from contributions) AS REAL),2) upvotes_ownership, ROUND(CAST(upvotes AS REAL) / CAST(contributions AS REAL),2) upvote_ratio,content_length as post_length_total, ROUND(CAST(content_length AS REAL) / CAST((SELECT SUM(content_length) from contributions) AS REAL)) as ownership_content,post_length_average FROM contributions") + db.query(""" + CREATE VIEW contributions_extended as SELECT username, contributions,ROUND(CAST(contributions AS REAL) / CAST((select contributions from contributions) AS REAL),2) as ownership, upvotes, ROUND(CAST(upvotes AS REAL) / CAST((SELECT SUM(upvotes) from contributions) AS REAL),2) upvotes_ownership, ROUND(CAST(upvotes AS REAL) / CAST(contributions AS REAL),2) upvote_ratio,content_length as post_length_total, ROUND(CAST(content_length AS REAL) / CAST((SELECT SUM(content_length) from contributions) AS REAL)) as ownership_content,post_length_average + FROM contributions + """) db.query("DROP VIEW IF EXISTS rants_of_user") db.query("CREATE VIEW rants_of_user as SELECT user_username as username, GROUP_CONCAT(text) as text FROM rants") db.query("DROP VIEW IF EXISTS posts_of_user") @@ -245,7 +229,6 @@ def get_contribution_average_per_user(): return round(get_contribution_count() / get_user_count(),2) def get_all_rants_of_user(username): - with Db() as db: try: return db.db['rants_of_user'].find_one(username=username)['text'] diff --git a/src/drstats/duration.py b/src/drstats/duration.py index 7f12603..02ef7f6 100644 --- a/src/drstats/duration.py +++ b/src/drstats/duration.py @@ -1,4 +1,5 @@ import time +import sys class Duration: @@ -12,5 +13,5 @@ class Duration: def __exit__(self, exc_type, exc_val, exc_tb): self.end = time.time() self.duration = self.end - self.start - print(self.description,end=" ") - print("took {} seconds.".format(self.duration)) \ No newline at end of file + print(self.description,end=" ",file=sys.stderr) + print("took {} seconds.".format(self.duration),file=sys.stderr) \ No newline at end of file diff --git a/src/drstats/sync.py b/src/drstats/sync.py index 73bf4e4..b6b7f38 100644 --- a/src/drstats/sync.py +++ b/src/drstats/sync.py @@ -26,35 +26,33 @@ def timestamp_to_string(timestamp): async def get_recent_rants(start_from=1, page_size=10): page = 0 while True: - try: - rants = dr.get_rants("recent", page_size, start_from)["rants"] - page += 1 - for rant in rants: - if rant is None: - break - rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "") - rant["created"] = timestamp_to_string(rant["created_time"]) - rant = plain_object(rant) - - yield rant - start_from += page_size - except requests.exceptions.ConnectionError: - print("Rate limit of server exceeded.") - return + rants = dr.get_rants("recent", page_size, start_from)["rants"] + page += 1 + for rant in rants: + if rant is None: + break + rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "") + rant["created"] = timestamp_to_string(rant["created_time"]) + rant = plain_object(rant) + yield rant + start_from += page_size + async def sync_rants(): count = 0 start_from = 0 page_size = 20 - async for rant in get_recent_rants(start_from, page_size): - start_from += page_size - count += 1 - rant["tags"] = json.dumps(rant["tags"]) - db["rants"].upsert(rant, ["id"]) - print(f"Upserted {count} rant(s).") - + try: + async for rant in get_recent_rants(start_from, page_size): + start_from += page_size + count += 1 + rant["tags"] = json.dumps(rant["tags"]) + db["rants"].upsert(rant, ["id"]) + print(f"Upserted {count} rant(s).") + except: + print("Rate limit of server exceeded. That's normal.s") async def sync_comments(): comments_synced = 0