Update
This commit is contained in:
parent
6a1233fc5c
commit
190456236a
18
Makefile
18
Makefile
@ -1,16 +1,28 @@
|
||||
all: build sync export_stats export_dataset
|
||||
all: build sync_excempt export_dataset export_stats merge_images
|
||||
|
||||
build:
|
||||
time pip install build
|
||||
time python -m build .
|
||||
time pip install -e .
|
||||
|
||||
|
||||
|
||||
sync:
|
||||
@echo "Synchronizing with devrant.com."
|
||||
time dr.sync
|
||||
|
||||
sync_excempt:
|
||||
@echo "Sync is not executed because it's a lengthy process ending with timeout error."
|
||||
|
||||
export_stats:
|
||||
@echo "Make sure you have ran 'make sync' first. Results will be in ./export/"
|
||||
@echo "Exporting statisticts."
|
||||
time dr.stats_all
|
||||
|
||||
export_dataset:
|
||||
@echo "Exporting dataset to be used for LLM embedding."
|
||||
@echo "Make sure you have ran 'make sync' first."
|
||||
@echo "Exporting dataset to be used for LLM embedding. Result will be ./export/0_dataset.txt"
|
||||
time dr.dataset > export/dataset.txt
|
||||
|
||||
merge_images:
|
||||
@echo "Merging images to one big image. Result will be ./export/1_graphs_compliation.png."
|
||||
python merge_images.py
|
||||
|
@ -28,7 +28,7 @@ If you type `dr.` in terminal and press tab you'll see all available apps auto c
|
||||
```
|
||||
1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
|
||||
2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
|
||||
3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
|
||||
4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
3. `dr.stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
|
||||
4. `dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
5. `dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
6. `dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
|
BIN
drstats.db
BIN
drstats.db
Binary file not shown.
44
merge_images.py
Normal file
44
merge_images.py
Normal file
@ -0,0 +1,44 @@
|
||||
from PIL import Image
|
||||
from pathlib import Path
|
||||
import functools
|
||||
import sys
|
||||
|
||||
printr = functools.partial(print,file=sys.stderr)
|
||||
|
||||
per_image_width = 480
|
||||
per_image_height = 320
|
||||
cols = 2
|
||||
|
||||
images = list(Path("./export/").glob("*.png"))
|
||||
image_count = len(images)
|
||||
total_image_height = (image_count / cols * per_image_height)
|
||||
if(image_count / cols * per_image_height > total_image_height):
|
||||
total_image_height += per_image_height
|
||||
total_image_width = image_count / cols * per_image_width
|
||||
|
||||
resized_images = []
|
||||
|
||||
for path in images:
|
||||
image = Image.open(path)
|
||||
image = image.resize((per_image_width, per_image_height))
|
||||
resized_images.append((path,image))
|
||||
|
||||
new_image = Image.new("RGB",(per_image_width * cols, int(per_image_height * image_count / cols)), (250,250,250))
|
||||
|
||||
current_col = 0
|
||||
current_row = 0
|
||||
current_image_number = 0
|
||||
for path, image in resized_images:
|
||||
printr("Merging image {}".format(path))
|
||||
current_row = int(current_image_number / cols)
|
||||
left = int((current_col) * per_image_width)
|
||||
top = int(per_image_height * current_row )
|
||||
new_image.paste(image,(left,top))
|
||||
new_image.save("export/1_graphs_compliation.png")
|
||||
|
||||
current_col += 1
|
||||
current_image_number += 1
|
||||
if current_col == cols:
|
||||
current_col = 0
|
||||
|
||||
new_image.show()
|
@ -27,5 +27,5 @@ console_scripts =
|
||||
dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
|
||||
dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday
|
||||
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
|
||||
dr.rant_stats_all = drstats.statistics:rant_stats_all
|
||||
dr.stats_all = drstats.statistics:rant_stats_all
|
||||
dr.dataset = drstats.dataset:dump
|
@ -41,7 +41,7 @@ If you type `dr.` in terminal and press tab you'll see all available apps auto c
|
||||
```
|
||||
1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
|
||||
2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
|
||||
3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
|
||||
4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
3. `dr.stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
|
||||
4. `dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
5. `dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
6. `dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
|
@ -1,7 +1,7 @@
|
||||
[console_scripts]
|
||||
dr.dataset = drstats.dataset:dump
|
||||
dr.rant_stats_all = drstats.statistics:rant_stats_all
|
||||
dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
|
||||
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
|
||||
dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday
|
||||
dr.stats_all = drstats.statistics:rant_stats_all
|
||||
dr.sync = drstats.sync:sync
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
42
src/drstats/dataset.py
Normal file
42
src/drstats/dataset.py
Normal file
@ -0,0 +1,42 @@
|
||||
from drstats import db
|
||||
import functools
|
||||
import sys
|
||||
|
||||
printr = functools.partial(print,file=sys.stderr)
|
||||
|
||||
def dump():
|
||||
statistics_text = [
|
||||
f"devRant(developer community) haves {db.get_user_count()} active users(ranters)."
|
||||
f"All users(ranters) of devRant together did contribute {db.get_contribution_count()} times in total."
|
||||
f"The average user(ranter) of devrant(developer community) contributed {db.get_contribution_average_per_user()} times on devrant(developer community)."
|
||||
]
|
||||
printr(statistics_text)
|
||||
for contribution in db.get_contributions():
|
||||
statistics_text.append(
|
||||
f"Statistics: User(ranter) {contribution['username']} made {contribution['contributions']} contributions to devRant(developer community) what means {contribution['username']} owns {contribution['ownership']} percent of contributions on devRant(developer community). The avarage post length of {contribution['username']} is {contribution['post_length_average']} and total post length is {contribution['post_length_total']}. {contribution['username']} owns {contribution['ownership_content']} percent of content on devRant(developer community)."
|
||||
)
|
||||
printr(statistics_text[-1])
|
||||
print("\n".join(statistics_text))
|
||||
all_content = ''
|
||||
for user in db.get_users():
|
||||
text = db.get_all_rants_of_user(user).replace("\n"," ").replace(" "," ").strip()
|
||||
total_text = ""
|
||||
if text:
|
||||
total_text += text
|
||||
print("```",f"All rants written by user(ranter) `{user}` on devRant(developer community)```.")
|
||||
print(text,"```")
|
||||
text = db.get_all_posts_of_user(user).replace("\n", " ").replace(" "," ").strip()
|
||||
if text:
|
||||
total_text += text
|
||||
print("```",f"All posts written by user(ranter) `{user}` on devRant(developer community): ```.")
|
||||
print(text,"```")
|
||||
all_content += total_text
|
||||
|
||||
for user in db.get_users():
|
||||
mention_text = f"@{user}"
|
||||
line = f"{user} is {all_content.count(mention_text)} times mentioned on devRant(developer comminity)."
|
||||
printr(line)
|
||||
print(line)
|
||||
|
||||
|
||||
|
@ -5,13 +5,11 @@ from drstats.duration import Duration
|
||||
def get_db():
|
||||
db = dataset.connect(f"sqlite:///{db_path}")
|
||||
|
||||
db.query(
|
||||
"""
|
||||
db.query("""
|
||||
DROP VIEW IF EXISTS score_ignored_most_last_7_days
|
||||
"""
|
||||
)
|
||||
db.query(
|
||||
"""
|
||||
""")
|
||||
|
||||
db.query("""
|
||||
CREATE VIEW score_ignored_most_last_7_days AS SELECT
|
||||
user_username AS username,
|
||||
COUNT(score) AS userscore
|
||||
@ -20,24 +18,20 @@ WHERE score = 0
|
||||
AND created >= DATE('now', '-7 days')
|
||||
GROUP BY username
|
||||
ORDER BY userscore DESC
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
db.query("DROP VIEW IF EXISTS score_last_7_days")
|
||||
db.query(
|
||||
"""
|
||||
db.query("""
|
||||
CREATE VIEW score_last_7_days AS SELECT
|
||||
user_username AS username,
|
||||
SUM(score) AS userscore
|
||||
FROM comments
|
||||
GROUP BY user_username
|
||||
ORDER BY userscore DESC
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
db.query("DROP VIEW IF EXISTS rant_stats_per_day")
|
||||
db.query(
|
||||
"""
|
||||
db.query("""
|
||||
CREATE VIEW rant_stats_per_day AS SELECT
|
||||
COUNT(0) AS count,
|
||||
DATE(created) AS created_date,
|
||||
@ -53,12 +47,10 @@ CREATE VIEW rant_stats_per_day AS SELECT
|
||||
FROM rants
|
||||
GROUP BY created_date
|
||||
ORDER BY created_date
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
db.query("DROP VIEW IF EXISTS comment_stats_per_day")
|
||||
db.query(
|
||||
"""
|
||||
db.query("""
|
||||
CREATE VIEW comment_stats_per_day AS SELECT
|
||||
COUNT(0) AS count,
|
||||
DATE(created) AS created_date,
|
||||
@ -74,12 +66,10 @@ CREATE VIEW comment_stats_per_day AS SELECT
|
||||
FROM comments
|
||||
GROUP BY created_date
|
||||
ORDER BY created_date
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
db.query("DROP VIEW IF EXISTS rant_stats_per_weekday")
|
||||
db.query(
|
||||
"""
|
||||
db.query("""
|
||||
CREATE VIEW rant_stats_per_weekday AS SELECT
|
||||
COUNT(0) AS count,
|
||||
DATE(created) AS created_date,
|
||||
@ -95,12 +85,10 @@ CREATE VIEW rant_stats_per_weekday AS SELECT
|
||||
FROM rants
|
||||
GROUP BY weekday
|
||||
ORDER BY created_date
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
db.query("DROP VIEW IF EXISTS comment_stats_per_weekday")
|
||||
db.query(
|
||||
"""
|
||||
db.query("""
|
||||
CREATE VIEW comment_stats_per_weekday AS SELECT
|
||||
COUNT(0) AS count,
|
||||
DATE(created) AS created_date,
|
||||
@ -116,73 +104,69 @@ CREATE VIEW comment_stats_per_weekday AS SELECT
|
||||
FROM comments
|
||||
GROUP BY weekday
|
||||
ORDER BY created_date
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
db.query("DROP VIEW IF EXISTS comment_stats_per_hour")
|
||||
db.query(
|
||||
"""
|
||||
db.query("""
|
||||
CREATE VIEW comment_stats_per_hour AS SELECT
|
||||
COUNT(0) AS count,
|
||||
strftime('%H', created) AS hour
|
||||
FROM comments
|
||||
GROUP BY hour
|
||||
ORDER BY hour
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
db.query("DROP VIEW IF EXISTS rant_stats_per_hour")
|
||||
db.query(
|
||||
"""
|
||||
db.query("""
|
||||
CREATE VIEW rant_stats_per_hour AS SELECT
|
||||
COUNT(0) AS count,
|
||||
strftime('%H', created) AS hour
|
||||
FROM rants
|
||||
GROUP BY hour
|
||||
ORDER BY hour
|
||||
"""
|
||||
)
|
||||
""")
|
||||
|
||||
db.query(
|
||||
"""
|
||||
DROP VIEW IF EXISTS user_stats
|
||||
"""
|
||||
)
|
||||
db.query("""
|
||||
DROP VIEW IF EXISTS user_stats
|
||||
""")
|
||||
|
||||
db.query(
|
||||
"""
|
||||
CREATE VIEW user_stats AS
|
||||
SELECT
|
||||
user_username AS username,
|
||||
COUNT(0) AS post_count,
|
||||
(select count(0) from rants where rants.id = comments.rant_id and date(rants.created) = date(comments.created)) as rant_count,
|
||||
DATE(comments.created) AS date,
|
||||
(SELECT COUNT(0)
|
||||
FROM comments AS comments2
|
||||
WHERE comments2.user_username = comments.user_username
|
||||
AND comments2.score = 0 and date(comments2.created) = date(comments.created)) AS ignore_count,
|
||||
(SELECT COUNT(0)
|
||||
FROM comments AS comments2
|
||||
WHERE comments2.user_username = comments.user_username
|
||||
AND comments2.score > 0 and date(comments2.created) = date(comments.created)) AS upvote_times,
|
||||
(SELECT SUM(score)
|
||||
FROM comments AS comments2
|
||||
WHERE comments2.user_username = comments.user_username
|
||||
AND comments2.score > 0 and date(comments2.created) = date(comments.created)) AS upvote_total
|
||||
FROM comments
|
||||
GROUP BY username, DATE(comments.created)
|
||||
ORDER BY username ASC, date ASC;
|
||||
"""
|
||||
)
|
||||
db.query("""
|
||||
CREATE VIEW user_stats AS
|
||||
SELECT
|
||||
user_username AS username,
|
||||
COUNT(0) AS post_count,
|
||||
(select count(0) from rants where rants.id = comments.rant_id and date(rants.created) = date(comments.created)) as rant_count,
|
||||
DATE(comments.created) AS date,
|
||||
(SELECT COUNT(0)
|
||||
FROM comments AS comments2
|
||||
WHERE comments2.user_username = comments.user_username
|
||||
AND comments2.score = 0 and date(comments2.created) = date(comments.created)) AS ignore_count,
|
||||
(SELECT COUNT(0)
|
||||
FROM comments AS comments2
|
||||
WHERE comments2.user_username = comments.user_username
|
||||
AND comments2.score > 0 and date(comments2.created) = date(comments.created)) AS upvote_times,
|
||||
(SELECT SUM(score)
|
||||
FROM comments AS comments2
|
||||
WHERE comments2.user_username = comments.user_username
|
||||
AND comments2.score > 0 and date(comments2.created) = date(comments.created)) AS upvote_total
|
||||
FROM comments
|
||||
GROUP BY username, DATE(comments.created)
|
||||
ORDER BY username ASC, date ASC;
|
||||
""")
|
||||
db.query("DROP VIEW IF EXISTS contributions")
|
||||
db.query("""CREATE VIEW contributions AS select distinct user_username as username, count(0) as contributions,sum(score) as upvotes,avg(length(text)) as post_length_average, sum(length(text)) as content_length from rants
|
||||
union
|
||||
select distinct user_username as username, count(0) as contributions,sum(score) as upvotes, sum(length(body)) / count(0) as post_length_average, sum(length(body)) as content_length from comments
|
||||
group by username
|
||||
order by contributions desc, username asc
|
||||
""");
|
||||
db.query("""
|
||||
CREATE VIEW contributions AS
|
||||
select distinct user_username as username, count(0) as contributions,sum(score) as upvotes,avg(length(text)) as post_length_average, sum(length(text)) as content_length from rants
|
||||
union
|
||||
select distinct user_username as username, count(0) as contributions,sum(score) as upvotes, sum(length(body)) / count(0) as post_length_average, sum(length(body)) as content_length from comments
|
||||
group by username
|
||||
order by contributions desc, username asc
|
||||
""")
|
||||
db.query("DROP VIEW IF EXISTS contributions_extended")
|
||||
db.query("CREATE VIEW contributions_extended as SELECT username, contributions,ROUND(CAST(contributions AS REAL) / CAST((select contributions from contributions) AS REAL),2) as ownership, upvotes, ROUND(CAST(upvotes AS REAL) / CAST((SELECT SUM(upvotes) from contributions) AS REAL),2) upvotes_ownership, ROUND(CAST(upvotes AS REAL) / CAST(contributions AS REAL),2) upvote_ratio,content_length as post_length_total, ROUND(CAST(content_length AS REAL) / CAST((SELECT SUM(content_length) from contributions) AS REAL)) as ownership_content,post_length_average FROM contributions")
|
||||
db.query("""
|
||||
CREATE VIEW contributions_extended as SELECT username, contributions,ROUND(CAST(contributions AS REAL) / CAST((select contributions from contributions) AS REAL),2) as ownership, upvotes, ROUND(CAST(upvotes AS REAL) / CAST((SELECT SUM(upvotes) from contributions) AS REAL),2) upvotes_ownership, ROUND(CAST(upvotes AS REAL) / CAST(contributions AS REAL),2) upvote_ratio,content_length as post_length_total, ROUND(CAST(content_length AS REAL) / CAST((SELECT SUM(content_length) from contributions) AS REAL)) as ownership_content,post_length_average
|
||||
FROM contributions
|
||||
""")
|
||||
db.query("DROP VIEW IF EXISTS rants_of_user")
|
||||
db.query("CREATE VIEW rants_of_user as SELECT user_username as username, GROUP_CONCAT(text) as text FROM rants")
|
||||
db.query("DROP VIEW IF EXISTS posts_of_user")
|
||||
@ -245,7 +229,6 @@ def get_contribution_average_per_user():
|
||||
return round(get_contribution_count() / get_user_count(),2)
|
||||
|
||||
def get_all_rants_of_user(username):
|
||||
|
||||
with Db() as db:
|
||||
try:
|
||||
return db.db['rants_of_user'].find_one(username=username)['text']
|
||||
|
@ -1,4 +1,5 @@
|
||||
import time
|
||||
import sys
|
||||
|
||||
class Duration:
|
||||
|
||||
@ -12,5 +13,5 @@ class Duration:
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.end = time.time()
|
||||
self.duration = self.end - self.start
|
||||
print(self.description,end=" ")
|
||||
print("took {} seconds.".format(self.duration))
|
||||
print(self.description,end=" ",file=sys.stderr)
|
||||
print("took {} seconds.".format(self.duration),file=sys.stderr)
|
@ -26,35 +26,33 @@ def timestamp_to_string(timestamp):
|
||||
async def get_recent_rants(start_from=1, page_size=10):
|
||||
page = 0
|
||||
while True:
|
||||
try:
|
||||
rants = dr.get_rants("recent", page_size, start_from)["rants"]
|
||||
page += 1
|
||||
for rant in rants:
|
||||
if rant is None:
|
||||
break
|
||||
rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "")
|
||||
rant["created"] = timestamp_to_string(rant["created_time"])
|
||||
rant = plain_object(rant)
|
||||
|
||||
yield rant
|
||||
start_from += page_size
|
||||
except requests.exceptions.ConnectionError:
|
||||
print("Rate limit of server exceeded.")
|
||||
return
|
||||
rants = dr.get_rants("recent", page_size, start_from)["rants"]
|
||||
page += 1
|
||||
for rant in rants:
|
||||
if rant is None:
|
||||
break
|
||||
rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "")
|
||||
rant["created"] = timestamp_to_string(rant["created_time"])
|
||||
rant = plain_object(rant)
|
||||
|
||||
yield rant
|
||||
start_from += page_size
|
||||
|
||||
async def sync_rants():
|
||||
count = 0
|
||||
start_from = 0
|
||||
|
||||
page_size = 20
|
||||
|
||||
async for rant in get_recent_rants(start_from, page_size):
|
||||
start_from += page_size
|
||||
count += 1
|
||||
rant["tags"] = json.dumps(rant["tags"])
|
||||
db["rants"].upsert(rant, ["id"])
|
||||
print(f"Upserted {count} rant(s).")
|
||||
|
||||
try:
|
||||
async for rant in get_recent_rants(start_from, page_size):
|
||||
start_from += page_size
|
||||
count += 1
|
||||
rant["tags"] = json.dumps(rant["tags"])
|
||||
db["rants"].upsert(rant, ["id"])
|
||||
print(f"Upserted {count} rant(s).")
|
||||
except:
|
||||
print("Rate limit of server exceeded. That's normal.s")
|
||||
|
||||
async def sync_comments():
|
||||
comments_synced = 0
|
||||
|
Loading…
Reference in New Issue
Block a user