Backup before formatting

2024-11-23 19:56:52 +01:00 · 2024-11-23 19:56:52 +01:00 · 6a1233fc5c
commit 6a1233fc5c
parent 9084ab596d
16 changed files with 157 additions and 59 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 .history
 dist
 venv
 export
-src/drstats/__pycache__
+src/drstats/__pycache__
--- a/18
+++ b/18
@ -1,6 +1,16 @@
-all: build
+all: build sync export_stats export_dataset
 build:
-	pip install build 
+	time pip install build 
-	python -m build .
+	time python -m build .
-	pip install -e .
+	time pip install -e .
 sync:
 	@echo "Synchronizing with devrant.com."
 	time dr.sync
 export_stats:
 	@echo "Exporting statisticts."
 	time dr.stats_all
 export_dataset:
 	@echo "Exporting dataset to be used for LLM embedding."
 	time dr.dataset > export/dataset.txt
--- a/README.md
+++ b/README.md
@ -3,8 +3,32 @@
 ## About
 Simple project to determine health of the devrant platform.
 Also, it will generate a dataset to be used with machine learning.
 Make Retoor9b great again!
 ## Credits
 Thanks to Rohan Burke (coolq). The creator of the dr api wrapper this project uses. Since it isn't made like a package, i had to copy his source files to my source folder. His library: https://github.com/coolq1000/devrant-python-api/
 ## Using this project
 ### Prepare environment
 Create python3 environment:
 ```
 python3 -m venv ./venv
 ```
 Activate python3 environment:
 ```
 source ./venv/bin/activate
 ```
 ### Make
 You don't have to use more than make. If you just run `make` all statistics will be generated. It will execute the right apps for generating statistics.
 ### Applications
 If you type `dr.` in terminal and press tab you'll see all available apps auto completed. These applications are also used by make.
 ```
 1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
 2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
 3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
 4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
 5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
 6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
--- a/drstats.db
+++ b/drstats.db
--- a/setup.cfg
+++ b/setup.cfg
@ -27,4 +27,5 @@ console_scripts =
    dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
    dr.rant_stats_per_weekday = drstats.statistics:rant_stats_per_weekday
    dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
-    dr.rant_stats_all = drstats.statistics:rant_stats_all
+    dr.rant_stats_all = drstats.statistics:rant_stats_all
    dr.dataset = drstats.dataset:dump
--- a/src/drstats.egg-info/PKG-INFO
+++ b/src/drstats.egg-info/PKG-INFO
@ -16,8 +16,32 @@ Requires-Dist: matplotlib>=3.9.2
 ## About
 Simple project to determine health of the devrant platform.
 Also, it will generate a dataset to be used with machine learning.
 Make Retoor9b great again!
 ## Credits
 Thanks to Rohan Burke (coolq). The creator of the dr api wrapper this project uses. Since it isn't made like a package, i had to copy his source files to my source folder. His library: https://github.com/coolq1000/devrant-python-api/
 ## Using this project
 ### Prepare environment
 Create python3 environment:
 ```
 python3 -m venv ./venv
 ```
 Activate python3 environment:
 ```
 source ./venv/bin/activate
 ```
 ### Make
 You don't have to use more than make. If you just run `make` all statistics will be generated. It will execute the right apps for generating statistics.
 ### Applications
 If you type `dr.` in terminal and press tab you'll see all available apps auto completed. These applications are also used by make.
 ```
 1. `dr.sync` synchronizes all data from last two weeks from devrant. Only two weeks because it's rate limited.
 2. `dr.dataset` exports all data to be used for LLM embedding., don't forget to execute `dr.sync` first.
 3. `dr.rant_stats_all` exports all graphs to export folder, don't forget to execute `dr.sync` first.
 4. dr.rant_stats_per_day` exports graphs to export folder. don't forget to execute `dr.sync` first.
 5.dr.rant_stats_per_hour` exports graphs to export folder. don't forget to execute `dr.sync` first.
 6. dr.rant_stats_per_weekday` exports graphs to export folder. don't forget to execute `dr.sync` first.
--- a/src/drstats.egg-info/SOURCES.txt
+++ b/src/drstats.egg-info/SOURCES.txt
@ -3,8 +3,11 @@ pyproject.toml
 setup.cfg
 src/drstats/__init__.py
 src/drstats/__main__.py
 src/drstats/dataset.py
 src/drstats/db.py
 src/drstats/devrant.py
 src/drstats/dump_text.py
 src/drstats/duration.py
 src/drstats/statistics.py
 src/drstats/sync.py
 src/drstats.egg-info/PKG-INFO
--- a/src/drstats.egg-info/entry_points.txt
+++ b/src/drstats.egg-info/entry_points.txt
@ -1,4 +1,5 @@
 [console_scripts]
 dr.dataset = drstats.dataset:dump
 dr.rant_stats_all = drstats.statistics:rant_stats_all
 dr.rant_stats_per_day = drstats.statistics:rant_stats_per_day
 dr.rant_stats_per_hour = drstats.statistics:rant_stats_per_hour
--- a/src/drstats/pycache/db.cpython-312.pyc
+++ b/src/drstats/pycache/db.cpython-312.pyc
--- a/src/drstats/pycache/devrant.cpython-312.pyc
+++ b/src/drstats/pycache/devrant.cpython-312.pyc
--- a/src/drstats/pycache/statistics.cpython-312.pyc
+++ b/src/drstats/pycache/statistics.cpython-312.pyc
--- a/src/drstats/pycache/sync.cpython-312.pyc
+++ b/src/drstats/pycache/sync.cpython-312.pyc
--- a/src/drstats/db.py
+++ b/src/drstats/db.py
@ -173,8 +173,21 @@ FROM comments
 GROUP BY username, DATE(comments.created)
 ORDER BY username ASC, date ASC;
    """
-    )
+    )   
-
+    db.query("DROP  VIEW IF EXISTS contributions")
    db.query("""CREATE VIEW contributions AS select distinct user_username as username, count(0) as contributions,sum(score) as upvotes,avg(length(text)) as post_length_average, sum(length(text)) as content_length from rants
            union
            select distinct user_username as username, count(0) as contributions,sum(score) as upvotes, sum(length(body)) / count(0) as post_length_average, sum(length(body)) as content_length from comments
            group by username
            order by contributions desc, username asc
        """);
    db.query("DROP VIEW IF EXISTS contributions_extended")
    db.query("CREATE VIEW contributions_extended as SELECT username, contributions,ROUND(CAST(contributions AS REAL) / CAST((select contributions from contributions) AS REAL),2) as ownership, upvotes, ROUND(CAST(upvotes AS REAL) / CAST((SELECT SUM(upvotes) from contributions) AS REAL),2) upvotes_ownership, ROUND(CAST(upvotes AS REAL) / CAST(contributions AS REAL),2) upvote_ratio,content_length as post_length_total, ROUND(CAST(content_length AS REAL) / CAST((SELECT SUM(content_length) from contributions) AS REAL)) as ownership_content,post_length_average    FROM contributions")
    db.query("DROP VIEW IF EXISTS rants_of_user")
    db.query("CREATE VIEW rants_of_user as SELECT user_username as username, GROUP_CONCAT(text) as text FROM rants")
    db.query("DROP VIEW IF EXISTS posts_of_user")
    db.query("CREATE VIEW posts_of_user AS SELECT user_username as username, GROUP_CONCAT(body) as text FROM comments")
    return db
@ -183,6 +196,20 @@ class Db:
    def __init__(self):
        self.db = None
    def __enter__(self):
        self.db = get_db()
        return self
    def query(self, str):
        with Duration("DB Query {}".format(str[:80])):
            return self.db.query(str) 
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.db.close()
        self.db = None
    async def __aenter__(self):
        self.db = get_db()
        return self
@ -195,3 +222,38 @@ class Db:
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        self.db.close()
        self.db = None
 def get_contributions():
    with Db() as db:
        contributions = db.query("SELECT ROW_NUMBER() OVER (ORDER BY upvote_ratio DESC) as popularity_postion, * FROM contributions_extended ORDER BY upvote_ratio DESC")
        return list(contributions)
 def get_upvote_average():
    return avg(contribution['upvote_ratio'] for contribution in get_contributions())
 def get_users():
    return list(set([user['username'] for user in get_contributions()])) 
 def get_user_count():
    return len(get_users())
 def get_contribution_count():
    return sum(user['contributions'] for user in get_contributions())
 def get_contribution_average_per_user():
    return round(get_contribution_count() / get_user_count(),2)
 def get_all_rants_of_user(username):
    with Db() as db:
        try:
            return db.db['rants_of_user'].find_one(username=username)['text']
        except TypeError:
            return ""
 def get_all_posts_of_user(username):
    with Db() as db:
        try:
            return db.db['posts_of_user'].find_one(username=username)['text']
        except TypeError:
            return ""
--- a/src/drstats/devrant.py
+++ b/src/drstats/devrant.py
@ -33,7 +33,7 @@ class Devrant:
        url = self.API + "devrant/search"
        params = {"app": 3, "term": term}
-        r = requests.get(url, params)
+        r = requests.get(url, params,timeout=5)
        obj = json.loads(r.text)
        return obj
@ -52,7 +52,7 @@ class Devrant:
        params = {
            "app": 3,
        }
-        r = requests.get(url, params)
+        r = requests.get(url, params,timeout=5)
        obj = json.loads(r.text)
        return obj
@ -65,7 +65,7 @@ class Devrant:
        url = self.API + "devrant/rants"
        params = {"app": 3, "sort": sort, "limit": limit, "skip": skip}
-        r = requests.get(url, params)
+        r = requests.get(url, params,timeout=5)
        obj = json.loads(r.text)
        return obj
@ -80,19 +80,4 @@ class Devrant:
        r = requests.get(url, params)
        obj = json.loads(r.text)
-        return obj
+        return obj
 if __name__ == "__main__":
    # Simple demo, runs through rants sorted by most recent.
    dr = Devrant()
    i = 0
    while True:
        result = dr.get_rant("recent", i)
        print("\n" * 50)
        name = result["user_username"]
        tags = ", ".join(result["tags"])
        print("-" + name + "-" * (50 - (len(name) + 1)))
        print(result["text"])
        print("-" + tags + "-" * (50 - (len(tags) + 1)))
        i += 1
--- a/src/drstats/statistics.py
+++ b/src/drstats/statistics.py
@ -1,4 +1,4 @@
-from drstats.db import get_db, Db
+from drstats.db import get_db, Db,get_users
 from drstats import sync
 import asyncio
 from drstats.duration import Duration 
@ -282,25 +282,9 @@ def rant_stats_all():
        asyncio.run(comment_stats_per_hour())
        asyncio.run(score_most_ignored_last_7_days())
        asyncio.run(score_last_7_days())
-        asyncio.run(user_score_per_day("retoor"))
+        for user in get_users():
-        asyncio.run(user_score_per_day("Ranchonyx"))
+            asyncio.run(user_score_per_day(user))
-        asyncio.run(user_score_per_day("atheist"))
+            
        asyncio.run(user_score_per_day("Chewbanacas"))
        asyncio.run(user_score_per_day("ScriptCoded"))
        asyncio.run(user_score_per_day("bazmd"))
        asyncio.run(user_score_per_day("feuerherz"))
        asyncio.run(user_score_per_day("D-4got10-01"))
        asyncio.run(user_score_per_day("jestdotty"))
        asyncio.run(user_score_per_day("Demolishun"))
        asyncio.run(user_score_per_day("cafecortado"))
        asyncio.run(user_score_per_day("lungdart"))
        asyncio.run(user_score_per_day("kiki"))
        asyncio.run(user_score_per_day("netikras"))
        asyncio.run(user_score_per_day("lorentz"))
        asyncio.run(user_score_per_day("12bitfloat"))
        asyncio.run(user_score_per_day("root"))
        asyncio.run(user_score_per_day("antigermgerm"))
        asyncio.run(user_score_per_day("Liebranca"))
--- a/src/drstats/sync.py
+++ b/src/drstats/sync.py
@ -3,7 +3,7 @@ from drstats.db import get_db
 import json
 import asyncio
 from pprint import pprint as pp
-
+import requests
 dr = Devrant()
 db = get_db()
@ -26,18 +26,21 @@ def timestamp_to_string(timestamp):
 async def get_recent_rants(start_from=1, page_size=10):
    page = 0
    while True:
-        rants = dr.get_rants("recent", page_size, start_from)["rants"]
+        try: 
-        page += 1
+            rants = dr.get_rants("recent", page_size, start_from)["rants"]
-        for rant in rants:
+            page += 1
-            if rant is None:
+            for rant in rants:
-                break
+                if rant is None:
-            rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "")
+                    break
-            rant["created"] = timestamp_to_string(rant["created_time"])
+                rant["tags"] = json.dumps("tags" in rant and rant["tags"] or "")
-            rant = plain_object(rant)
+                rant["created"] = timestamp_to_string(rant["created_time"])
-
+                rant = plain_object(rant)
            yield rant
        start_from += page_size
                yield rant
            start_from += page_size
        except requests.exceptions.ConnectionError:
            print("Rate limit of server exceeded.")
            return
 async def sync_rants():
    count = 0