Backup before formatting
This commit is contained in:
parent
9084ab596d
commit
6a1233fc5c
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
.history
|
||||
dist
|
||||
venv
|
||||
export
|
||||
src/drstats/__pycache__
|
||||
src/drstats/__pycache__
|
||||
|
18
Makefile
18
Makefile
@ -1,6 +1,16 @@
|
||||
all: build
|
||||
all: build sync export_stats export_dataset
|
||||
|
||||
build:
|
||||
pip install build
|
||||
python -m build .
|
||||
pip install -e .
|
||||
time pip install build
|
||||
time python -m build .
|
||||
time pip install -e .
|
||||
|
||||
sync:
|
||||
@echo "Synchronizing with devrant.com."
|
||||
time dr.sync
|
||||
export_stats:
|
||||
@echo "Exporting statisticts."
|
||||
time dr.stats_all
|
||||
export_dataset:
|
||||
@echo "Exporting dataset to be used for LLM embedding."
|
||||
time dr.dataset > export/dataset.txt
|
||||
|
24
README.md
24
README.md
@ -3,8 +3,32 @@
|
||||
## About
|
||||
|
||||
Simple project to determine health of the devrant platform.
|
||||
Also, it will generate a dataset to be used with machine learning.
|
||||
Make Retoor9b great again!
|
||||
|
||||
## Credits
|
||||
Thanks to Rohan Burke (coolq). The creator of the dr api wrapper this project uses. Since it isn't made like a package, i had to copy his source files to my source folder. His library: https://github.com/coolq1000/devrant-python-api/
|
||||
|
||||
|
||||
## Using this project
|
||||
|
||||
### Prepare environment
|
||||
Create python3 environment:
|
||||
```
|
||||
python3 -m venv ./venv
|
||||
```
|
||||
Activate python3 environment:
|
||||
```
|
||||
source ./venv/bin/activate
|
||||
```
|
||||
### Make
|
||||
You don't have to use more than make. If you just run `make` all statistics will be generated. It will execute the right apps for generating statistics.
|
||||
### Applications
|
||||
If you type `dr.` in terminal and press tab you'll see all available apps auto completed. These applications are also used by make.
|
||||
```
|
||||
1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
|
||||
2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
|
||||
3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
|
||||
4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
|
BIN
drstats.db
BIN
drstats.db
Binary file not shown.
@ -27,4 +27,5 @@ console_scripts =
|
||||
dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
|
||||
dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday
|
||||
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
|
||||
dr.rant_stats_all = drstats.statistics:rant_stats_all
|
||||
dr.rant_stats_all = drstats.statistics:rant_stats_all
|
||||
dr.dataset = drstats.dataset:dump
|
@ -16,8 +16,32 @@ Requires-Dist: matplotlib>=3.9.2
|
||||
## About
|
||||
|
||||
Simple project to determine health of the devrant platform.
|
||||
Also, it will generate a dataset to be used with machine learning.
|
||||
Make Retoor9b great again!
|
||||
|
||||
## Credits
|
||||
Thanks to Rohan Burke (coolq). The creator of the dr api wrapper this project uses. Since it isn't made like a package, i had to copy his source files to my source folder. His library: https://github.com/coolq1000/devrant-python-api/
|
||||
|
||||
|
||||
## Using this project
|
||||
|
||||
### Prepare environment
|
||||
Create python3 environment:
|
||||
```
|
||||
python3 -m venv ./venv
|
||||
```
|
||||
Activate python3 environment:
|
||||
```
|
||||
source ./venv/bin/activate
|
||||
```
|
||||
### Make
|
||||
You don't have to use more than make. If you just run `make` all statistics will be generated. It will execute the right apps for generating statistics.
|
||||
### Applications
|
||||
If you type `dr.` in terminal and press tab you'll see all available apps auto completed. These applications are also used by make.
|
||||
```
|
||||
1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
|
||||
2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
|
||||
3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
|
||||
4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||
|
@ -3,8 +3,11 @@ pyproject.toml
|
||||
setup.cfg
|
||||
src/drstats/__init__.py
|
||||
src/drstats/__main__.py
|
||||
src/drstats/dataset.py
|
||||
src/drstats/db.py
|
||||
src/drstats/devrant.py
|
||||
src/drstats/dump_text.py
|
||||
src/drstats/duration.py
|
||||
src/drstats/statistics.py
|
||||
src/drstats/sync.py
|
||||
src/drstats.egg-info/PKG-INFO
|
||||
|
@ -1,4 +1,5 @@
|
||||
[console_scripts]
|
||||
dr.dataset = drstats.dataset:dump
|
||||
dr.rant_stats_all = drstats.statistics:rant_stats_all
|
||||
dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
|
||||
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -173,8 +173,21 @@ FROM comments
|
||||
GROUP BY username, DATE(comments.created)
|
||||
ORDER BY username ASC, date ASC;
|
||||
"""
|
||||
)
|
||||
|
||||
)
|
||||
db.query("DROP VIEW IF EXISTS contributions")
|
||||
db.query("""CREATE VIEW contributions AS select distinct user_username as username, count(0) as contributions,sum(score) as upvotes,avg(length(text)) as post_length_average, sum(length(text)) as content_length from rants
|
||||
union
|
||||
select distinct user_username as username, count(0) as contributions,sum(score) as upvotes, sum(length(body)) / count(0) as post_length_average, sum(length(body)) as content_length from comments
|
||||
group by username
|
||||
order by contributions desc, username asc
|
||||
""");
|
||||
db.query("DROP VIEW IF EXISTS contributions_extended")
|
||||
db.query("CREATE VIEW contributions_extended as SELECT username, contributions,ROUND(CAST(contributions AS REAL) / CAST((select contributions from contributions) AS REAL),2) as ownership, upvotes, ROUND(CAST(upvotes AS REAL) / CAST((SELECT SUM(upvotes) from contributions) AS REAL),2) upvotes_ownership, ROUND(CAST(upvotes AS REAL) / CAST(contributions AS REAL),2) upvote_ratio,content_length as post_length_total, ROUND(CAST(content_length AS REAL) / CAST((SELECT SUM(content_length) from contributions) AS REAL)) as ownership_content,post_length_average FROM contributions")
|
||||
db.query("DROP VIEW IF EXISTS rants_of_user")
|
||||
db.query("CREATE VIEW rants_of_user as SELECT user_username as username, GROUP_CONCAT(text) as text FROM rants")
|
||||
db.query("DROP VIEW IF EXISTS posts_of_user")
|
||||
db.query("CREATE VIEW posts_of_user AS SELECT user_username as username, GROUP_CONCAT(body) as text FROM comments")
|
||||
|
||||
return db
|
||||
|
||||
|
||||
@ -183,6 +196,20 @@ class Db:
|
||||
def __init__(self):
|
||||
self.db = None
|
||||
|
||||
def __enter__(self):
|
||||
self.db = get_db()
|
||||
return self
|
||||
|
||||
def query(self, str):
|
||||
with Duration("DB Query {}".format(str[:80])):
|
||||
return self.db.query(str)
|
||||
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.db.close()
|
||||
self.db = None
|
||||
|
||||
|
||||
async def __aenter__(self):
|
||||
self.db = get_db()
|
||||
return self
|
||||
@ -195,3 +222,38 @@ class Db:
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
self.db.close()
|
||||
self.db = None
|
||||
|
||||
|
||||
def get_contributions():
|
||||
with Db() as db:
|
||||
contributions = db.query("SELECT ROW_NUMBER() OVER (ORDER BY upvote_ratio DESC) as popularity_postion, * FROM contributions_extended ORDER BY upvote_ratio DESC")
|
||||
return list(contributions)
|
||||
|
||||
def get_upvote_average():
|
||||
return avg(contribution['upvote_ratio'] for contribution in get_contributions())
|
||||
|
||||
def get_users():
|
||||
return list(set([user['username'] for user in get_contributions()]))
|
||||
|
||||
def get_user_count():
|
||||
return len(get_users())
|
||||
|
||||
def get_contribution_count():
|
||||
return sum(user['contributions'] for user in get_contributions())
|
||||
|
||||
def get_contribution_average_per_user():
|
||||
return round(get_contribution_count() / get_user_count(),2)
|
||||
|
||||
def get_all_rants_of_user(username):
|
||||
|
||||
with Db() as db:
|
||||
try:
|
||||
return db.db['rants_of_user'].find_one(username=username)['text']
|
||||
except TypeError:
|
||||
return ""
|
||||
def get_all_posts_of_user(username):
|
||||
with Db() as db:
|
||||
try:
|
||||
return db.db['posts_of_user'].find_one(username=username)['text']
|
||||
except TypeError:
|
||||
return ""
|
@ -33,7 +33,7 @@ class Devrant:
|
||||
url = self.API + "devrant/search"
|
||||
params = {"app": 3, "term": term}
|
||||
|
||||
r = requests.get(url, params)
|
||||
r = requests.get(url, params,timeout=5)
|
||||
obj = json.loads(r.text)
|
||||
return obj
|
||||
|
||||
@ -52,7 +52,7 @@ class Devrant:
|
||||
params = {
|
||||
"app": 3,
|
||||
}
|
||||
r = requests.get(url, params)
|
||||
r = requests.get(url, params,timeout=5)
|
||||
obj = json.loads(r.text)
|
||||
return obj
|
||||
|
||||
@ -65,7 +65,7 @@ class Devrant:
|
||||
url = self.API + "devrant/rants"
|
||||
params = {"app": 3, "sort": sort, "limit": limit, "skip": skip}
|
||||
|
||||
r = requests.get(url, params)
|
||||
r = requests.get(url, params,timeout=5)
|
||||
obj = json.loads(r.text)
|
||||
return obj
|
||||
|
||||
@ -80,19 +80,4 @@ class Devrant:
|
||||
|
||||
r = requests.get(url, params)
|
||||
obj = json.loads(r.text)
|
||||
return obj
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Simple demo, runs through rants sorted by most recent.
|
||||
dr = Devrant()
|
||||
i = 0
|
||||
while True:
|
||||
result = dr.get_rant("recent", i)
|
||||
print("\n" * 50)
|
||||
name = result["user_username"]
|
||||
tags = ", ".join(result["tags"])
|
||||
print("-" + name + "-" * (50 - (len(name) + 1)))
|
||||
print(result["text"])
|
||||
print("-" + tags + "-" * (50 - (len(tags) + 1)))
|
||||
i += 1
|
||||
return obj
|
@ -1,4 +1,4 @@
|
||||
from drstats.db import get_db, Db
|
||||
from drstats.db import get_db, Db,get_users
|
||||
from drstats import sync
|
||||
import asyncio
|
||||
from drstats.duration import Duration
|
||||
@ -282,25 +282,9 @@ def rant_stats_all():
|
||||
asyncio.run(comment_stats_per_hour())
|
||||
asyncio.run(score_most_ignored_last_7_days())
|
||||
asyncio.run(score_last_7_days())
|
||||
asyncio.run(user_score_per_day("retoor"))
|
||||
asyncio.run(user_score_per_day("Ranchonyx"))
|
||||
asyncio.run(user_score_per_day("atheist"))
|
||||
asyncio.run(user_score_per_day("Chewbanacas"))
|
||||
asyncio.run(user_score_per_day("ScriptCoded"))
|
||||
asyncio.run(user_score_per_day("bazmd"))
|
||||
asyncio.run(user_score_per_day("feuerherz"))
|
||||
asyncio.run(user_score_per_day("D-4got10-01"))
|
||||
asyncio.run(user_score_per_day("jestdotty"))
|
||||
asyncio.run(user_score_per_day("Demolishun"))
|
||||
asyncio.run(user_score_per_day("cafecortado"))
|
||||
asyncio.run(user_score_per_day("lungdart"))
|
||||
asyncio.run(user_score_per_day("kiki"))
|
||||
asyncio.run(user_score_per_day("netikras"))
|
||||
asyncio.run(user_score_per_day("lorentz"))
|
||||
asyncio.run(user_score_per_day("12bitfloat"))
|
||||
asyncio.run(user_score_per_day("root"))
|
||||
asyncio.run(user_score_per_day("antigermgerm"))
|
||||
asyncio.run(user_score_per_day("Liebranca"))
|
||||
for user in get_users():
|
||||
asyncio.run(user_score_per_day(user))
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -3,7 +3,7 @@ from drstats.db import get_db
|
||||
import json
|
||||
import asyncio
|
||||
from pprint import pprint as pp
|
||||
|
||||
import requests
|
||||
dr = Devrant()
|
||||
db = get_db()
|
||||
|
||||
@ -26,18 +26,21 @@ def timestamp_to_string(timestamp):
|
||||
async def get_recent_rants(start_from=1, page_size=10):
|
||||
page = 0
|
||||
while True:
|
||||
rants = dr.get_rants("recent", page_size, start_from)["rants"]
|
||||
page += 1
|
||||
for rant in rants:
|
||||
if rant is None:
|
||||
break
|
||||
rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "")
|
||||
rant["created"] = timestamp_to_string(rant["created_time"])
|
||||
rant = plain_object(rant)
|
||||
|
||||
yield rant
|
||||
start_from += page_size
|
||||
try:
|
||||
rants = dr.get_rants("recent", page_size, start_from)["rants"]
|
||||
page += 1
|
||||
for rant in rants:
|
||||
if rant is None:
|
||||
break
|
||||
rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "")
|
||||
rant["created"] = timestamp_to_string(rant["created_time"])
|
||||
rant = plain_object(rant)
|
||||
|
||||
yield rant
|
||||
start_from += page_size
|
||||
except requests.exceptions.ConnectionError:
|
||||
print("Rate limit of server exceeded.")
|
||||
return
|
||||
|
||||
async def sync_rants():
|
||||
count = 0
|
||||
|
Loading…
Reference in New Issue
Block a user