Backup before formatting
This commit is contained in:
parent
9084ab596d
commit
6a1233fc5c
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
|
.history
|
||||||
dist
|
dist
|
||||||
venv
|
venv
|
||||||
export
|
export
|
||||||
|
18
Makefile
18
Makefile
@ -1,6 +1,16 @@
|
|||||||
all: build
|
all: build sync export_stats export_dataset
|
||||||
|
|
||||||
build:
|
build:
|
||||||
pip install build
|
time pip install build
|
||||||
python -m build .
|
time python -m build .
|
||||||
pip install -e .
|
time pip install -e .
|
||||||
|
|
||||||
|
sync:
|
||||||
|
@echo "Synchronizing with devrant.com."
|
||||||
|
time dr.sync
|
||||||
|
export_stats:
|
||||||
|
@echo "Exporting statisticts."
|
||||||
|
time dr.stats_all
|
||||||
|
export_dataset:
|
||||||
|
@echo "Exporting dataset to be used for LLM embedding."
|
||||||
|
time dr.dataset > export/dataset.txt
|
||||||
|
24
README.md
24
README.md
@ -3,8 +3,32 @@
|
|||||||
## About
|
## About
|
||||||
|
|
||||||
Simple project to determine health of the devrant platform.
|
Simple project to determine health of the devrant platform.
|
||||||
|
Also, it will generate a dataset to be used with machine learning.
|
||||||
|
Make Retoor9b great again!
|
||||||
|
|
||||||
## Credits
|
## Credits
|
||||||
Thanks to Rohan Burke (coolq). The creator of the dr api wrapper this project uses. Since it isn't made like a package, i had to copy his source files to my source folder. His library: https://github.com/coolq1000/devrant-python-api/
|
Thanks to Rohan Burke (coolq). The creator of the dr api wrapper this project uses. Since it isn't made like a package, i had to copy his source files to my source folder. His library: https://github.com/coolq1000/devrant-python-api/
|
||||||
|
|
||||||
|
|
||||||
|
## Using this project
|
||||||
|
|
||||||
|
### Prepare environment
|
||||||
|
Create python3 environment:
|
||||||
|
```
|
||||||
|
python3 -m venv ./venv
|
||||||
|
```
|
||||||
|
Activate python3 environment:
|
||||||
|
```
|
||||||
|
source ./venv/bin/activate
|
||||||
|
```
|
||||||
|
### Make
|
||||||
|
You don't have to use more than make. If you just run `make` all statistics will be generated. It will execute the right apps for generating statistics.
|
||||||
|
### Applications
|
||||||
|
If you type `dr.` in terminal and press tab you'll see all available apps auto completed. These applications are also used by make.
|
||||||
|
```
|
||||||
|
1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
|
||||||
|
2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
|
||||||
|
3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
|
||||||
|
4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||||
|
5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||||
|
6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||||
|
BIN
drstats.db
BIN
drstats.db
Binary file not shown.
@ -28,3 +28,4 @@ console_scripts =
|
|||||||
dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday
|
dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday
|
||||||
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
|
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
|
||||||
dr.rant_stats_all = drstats.statistics:rant_stats_all
|
dr.rant_stats_all = drstats.statistics:rant_stats_all
|
||||||
|
dr.dataset = drstats.dataset:dump
|
@ -16,8 +16,32 @@ Requires-Dist: matplotlib>=3.9.2
|
|||||||
## About
|
## About
|
||||||
|
|
||||||
Simple project to determine health of the devrant platform.
|
Simple project to determine health of the devrant platform.
|
||||||
|
Also, it will generate a dataset to be used with machine learning.
|
||||||
|
Make Retoor9b great again!
|
||||||
|
|
||||||
## Credits
|
## Credits
|
||||||
Thanks to Rohan Burke (coolq). The creator of the dr api wrapper this project uses. Since it isn't made like a package, i had to copy his source files to my source folder. His library: https://github.com/coolq1000/devrant-python-api/
|
Thanks to Rohan Burke (coolq). The creator of the dr api wrapper this project uses. Since it isn't made like a package, i had to copy his source files to my source folder. His library: https://github.com/coolq1000/devrant-python-api/
|
||||||
|
|
||||||
|
|
||||||
|
## Using this project
|
||||||
|
|
||||||
|
### Prepare environment
|
||||||
|
Create python3 environment:
|
||||||
|
```
|
||||||
|
python3 -m venv ./venv
|
||||||
|
```
|
||||||
|
Activate python3 environment:
|
||||||
|
```
|
||||||
|
source ./venv/bin/activate
|
||||||
|
```
|
||||||
|
### Make
|
||||||
|
You don't have to use more than make. If you just run `make` all statistics will be generated. It will execute the right apps for generating statistics.
|
||||||
|
### Applications
|
||||||
|
If you type `dr.` in terminal and press tab you'll see all available apps auto completed. These applications are also used by make.
|
||||||
|
```
|
||||||
|
1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
|
||||||
|
2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
|
||||||
|
3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
|
||||||
|
4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||||
|
5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||||
|
6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
|
||||||
|
@ -3,8 +3,11 @@ pyproject.toml
|
|||||||
setup.cfg
|
setup.cfg
|
||||||
src/drstats/__init__.py
|
src/drstats/__init__.py
|
||||||
src/drstats/__main__.py
|
src/drstats/__main__.py
|
||||||
|
src/drstats/dataset.py
|
||||||
src/drstats/db.py
|
src/drstats/db.py
|
||||||
src/drstats/devrant.py
|
src/drstats/devrant.py
|
||||||
|
src/drstats/dump_text.py
|
||||||
|
src/drstats/duration.py
|
||||||
src/drstats/statistics.py
|
src/drstats/statistics.py
|
||||||
src/drstats/sync.py
|
src/drstats/sync.py
|
||||||
src/drstats.egg-info/PKG-INFO
|
src/drstats.egg-info/PKG-INFO
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
[console_scripts]
|
[console_scripts]
|
||||||
|
dr.dataset = drstats.dataset:dump
|
||||||
dr.rant_stats_all = drstats.statistics:rant_stats_all
|
dr.rant_stats_all = drstats.statistics:rant_stats_all
|
||||||
dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
|
dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
|
||||||
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
|
dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
|
||||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -174,6 +174,19 @@ GROUP BY username, DATE(comments.created)
|
|||||||
ORDER BY username ASC, date ASC;
|
ORDER BY username ASC, date ASC;
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
db.query("DROP VIEW IF EXISTS contributions")
|
||||||
|
db.query("""CREATE VIEW contributions AS select distinct user_username as username, count(0) as contributions,sum(score) as upvotes,avg(length(text)) as post_length_average, sum(length(text)) as content_length from rants
|
||||||
|
union
|
||||||
|
select distinct user_username as username, count(0) as contributions,sum(score) as upvotes, sum(length(body)) / count(0) as post_length_average, sum(length(body)) as content_length from comments
|
||||||
|
group by username
|
||||||
|
order by contributions desc, username asc
|
||||||
|
""");
|
||||||
|
db.query("DROP VIEW IF EXISTS contributions_extended")
|
||||||
|
db.query("CREATE VIEW contributions_extended as SELECT username, contributions,ROUND(CAST(contributions AS REAL) / CAST((select contributions from contributions) AS REAL),2) as ownership, upvotes, ROUND(CAST(upvotes AS REAL) / CAST((SELECT SUM(upvotes) from contributions) AS REAL),2) upvotes_ownership, ROUND(CAST(upvotes AS REAL) / CAST(contributions AS REAL),2) upvote_ratio,content_length as post_length_total, ROUND(CAST(content_length AS REAL) / CAST((SELECT SUM(content_length) from contributions) AS REAL)) as ownership_content,post_length_average FROM contributions")
|
||||||
|
db.query("DROP VIEW IF EXISTS rants_of_user")
|
||||||
|
db.query("CREATE VIEW rants_of_user as SELECT user_username as username, GROUP_CONCAT(text) as text FROM rants")
|
||||||
|
db.query("DROP VIEW IF EXISTS posts_of_user")
|
||||||
|
db.query("CREATE VIEW posts_of_user AS SELECT user_username as username, GROUP_CONCAT(body) as text FROM comments")
|
||||||
|
|
||||||
return db
|
return db
|
||||||
|
|
||||||
@ -183,6 +196,20 @@ class Db:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.db = None
|
self.db = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.db = get_db()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def query(self, str):
|
||||||
|
with Duration("DB Query {}".format(str[:80])):
|
||||||
|
return self.db.query(str)
|
||||||
|
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
self.db.close()
|
||||||
|
self.db = None
|
||||||
|
|
||||||
|
|
||||||
async def __aenter__(self):
|
async def __aenter__(self):
|
||||||
self.db = get_db()
|
self.db = get_db()
|
||||||
return self
|
return self
|
||||||
@ -195,3 +222,38 @@ class Db:
|
|||||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||||
self.db.close()
|
self.db.close()
|
||||||
self.db = None
|
self.db = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_contributions():
|
||||||
|
with Db() as db:
|
||||||
|
contributions = db.query("SELECT ROW_NUMBER() OVER (ORDER BY upvote_ratio DESC) as popularity_postion, * FROM contributions_extended ORDER BY upvote_ratio DESC")
|
||||||
|
return list(contributions)
|
||||||
|
|
||||||
|
def get_upvote_average():
|
||||||
|
return avg(contribution['upvote_ratio'] for contribution in get_contributions())
|
||||||
|
|
||||||
|
def get_users():
|
||||||
|
return list(set([user['username'] for user in get_contributions()]))
|
||||||
|
|
||||||
|
def get_user_count():
|
||||||
|
return len(get_users())
|
||||||
|
|
||||||
|
def get_contribution_count():
|
||||||
|
return sum(user['contributions'] for user in get_contributions())
|
||||||
|
|
||||||
|
def get_contribution_average_per_user():
|
||||||
|
return round(get_contribution_count() / get_user_count(),2)
|
||||||
|
|
||||||
|
def get_all_rants_of_user(username):
|
||||||
|
|
||||||
|
with Db() as db:
|
||||||
|
try:
|
||||||
|
return db.db['rants_of_user'].find_one(username=username)['text']
|
||||||
|
except TypeError:
|
||||||
|
return ""
|
||||||
|
def get_all_posts_of_user(username):
|
||||||
|
with Db() as db:
|
||||||
|
try:
|
||||||
|
return db.db['posts_of_user'].find_one(username=username)['text']
|
||||||
|
except TypeError:
|
||||||
|
return ""
|
@ -33,7 +33,7 @@ class Devrant:
|
|||||||
url = self.API + "devrant/search"
|
url = self.API + "devrant/search"
|
||||||
params = {"app": 3, "term": term}
|
params = {"app": 3, "term": term}
|
||||||
|
|
||||||
r = requests.get(url, params)
|
r = requests.get(url, params,timeout=5)
|
||||||
obj = json.loads(r.text)
|
obj = json.loads(r.text)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
@ -52,7 +52,7 @@ class Devrant:
|
|||||||
params = {
|
params = {
|
||||||
"app": 3,
|
"app": 3,
|
||||||
}
|
}
|
||||||
r = requests.get(url, params)
|
r = requests.get(url, params,timeout=5)
|
||||||
obj = json.loads(r.text)
|
obj = json.loads(r.text)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
@ -65,7 +65,7 @@ class Devrant:
|
|||||||
url = self.API + "devrant/rants"
|
url = self.API + "devrant/rants"
|
||||||
params = {"app": 3, "sort": sort, "limit": limit, "skip": skip}
|
params = {"app": 3, "sort": sort, "limit": limit, "skip": skip}
|
||||||
|
|
||||||
r = requests.get(url, params)
|
r = requests.get(url, params,timeout=5)
|
||||||
obj = json.loads(r.text)
|
obj = json.loads(r.text)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
@ -81,18 +81,3 @@ class Devrant:
|
|||||||
r = requests.get(url, params)
|
r = requests.get(url, params)
|
||||||
obj = json.loads(r.text)
|
obj = json.loads(r.text)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Simple demo, runs through rants sorted by most recent.
|
|
||||||
dr = Devrant()
|
|
||||||
i = 0
|
|
||||||
while True:
|
|
||||||
result = dr.get_rant("recent", i)
|
|
||||||
print("\n" * 50)
|
|
||||||
name = result["user_username"]
|
|
||||||
tags = ", ".join(result["tags"])
|
|
||||||
print("-" + name + "-" * (50 - (len(name) + 1)))
|
|
||||||
print(result["text"])
|
|
||||||
print("-" + tags + "-" * (50 - (len(tags) + 1)))
|
|
||||||
i += 1
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from drstats.db import get_db, Db
|
from drstats.db import get_db, Db,get_users
|
||||||
from drstats import sync
|
from drstats import sync
|
||||||
import asyncio
|
import asyncio
|
||||||
from drstats.duration import Duration
|
from drstats.duration import Duration
|
||||||
@ -282,25 +282,9 @@ def rant_stats_all():
|
|||||||
asyncio.run(comment_stats_per_hour())
|
asyncio.run(comment_stats_per_hour())
|
||||||
asyncio.run(score_most_ignored_last_7_days())
|
asyncio.run(score_most_ignored_last_7_days())
|
||||||
asyncio.run(score_last_7_days())
|
asyncio.run(score_last_7_days())
|
||||||
asyncio.run(user_score_per_day("retoor"))
|
for user in get_users():
|
||||||
asyncio.run(user_score_per_day("Ranchonyx"))
|
asyncio.run(user_score_per_day(user))
|
||||||
asyncio.run(user_score_per_day("atheist"))
|
|
||||||
asyncio.run(user_score_per_day("Chewbanacas"))
|
|
||||||
asyncio.run(user_score_per_day("ScriptCoded"))
|
|
||||||
asyncio.run(user_score_per_day("bazmd"))
|
|
||||||
asyncio.run(user_score_per_day("feuerherz"))
|
|
||||||
asyncio.run(user_score_per_day("D-4got10-01"))
|
|
||||||
asyncio.run(user_score_per_day("jestdotty"))
|
|
||||||
asyncio.run(user_score_per_day("Demolishun"))
|
|
||||||
asyncio.run(user_score_per_day("cafecortado"))
|
|
||||||
asyncio.run(user_score_per_day("lungdart"))
|
|
||||||
asyncio.run(user_score_per_day("kiki"))
|
|
||||||
asyncio.run(user_score_per_day("netikras"))
|
|
||||||
asyncio.run(user_score_per_day("lorentz"))
|
|
||||||
asyncio.run(user_score_per_day("12bitfloat"))
|
|
||||||
asyncio.run(user_score_per_day("root"))
|
|
||||||
asyncio.run(user_score_per_day("antigermgerm"))
|
|
||||||
asyncio.run(user_score_per_day("Liebranca"))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ from drstats.db import get_db
|
|||||||
import json
|
import json
|
||||||
import asyncio
|
import asyncio
|
||||||
from pprint import pprint as pp
|
from pprint import pprint as pp
|
||||||
|
import requests
|
||||||
dr = Devrant()
|
dr = Devrant()
|
||||||
db = get_db()
|
db = get_db()
|
||||||
|
|
||||||
@ -26,18 +26,21 @@ def timestamp_to_string(timestamp):
|
|||||||
async def get_recent_rants(start_from=1, page_size=10):
|
async def get_recent_rants(start_from=1, page_size=10):
|
||||||
page = 0
|
page = 0
|
||||||
while True:
|
while True:
|
||||||
rants = dr.get_rants("recent", page_size, start_from)["rants"]
|
try:
|
||||||
page += 1
|
rants = dr.get_rants("recent", page_size, start_from)["rants"]
|
||||||
for rant in rants:
|
page += 1
|
||||||
if rant is None:
|
for rant in rants:
|
||||||
break
|
if rant is None:
|
||||||
rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "")
|
break
|
||||||
rant["created"] = timestamp_to_string(rant["created_time"])
|
rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "")
|
||||||
rant = plain_object(rant)
|
rant["created"] = timestamp_to_string(rant["created_time"])
|
||||||
|
rant = plain_object(rant)
|
||||||
yield rant
|
|
||||||
start_from += page_size
|
|
||||||
|
|
||||||
|
yield rant
|
||||||
|
start_from += page_size
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
print("Rate limit of server exceeded.")
|
||||||
|
return
|
||||||
|
|
||||||
async def sync_rants():
|
async def sync_rants():
|
||||||
count = 0
|
count = 0
|
||||||
|
Loading…
Reference in New Issue
Block a user