471 lines
14 KiB
Python
Raw Normal View History

2025-11-08 15:08:49 +01:00
#!/usr/bin/env python3
"""
Keyboard Analytics - A tool for analyzing keyboard usage patterns
This script analyzes keyboard events stored in a SQLite database and generates
visualizations and reports based on the data. It can track key presses across
different time periods and create meaningful insights about typing patterns.
"""
2024-12-15 16:05:21 +01:00
import sqlite3
2024-12-22 08:06:49 +01:00
import time
import pathlib
2025-11-08 15:08:49 +01:00
import json
import logging
import requests
from typing import List, Dict, Tuple, Any, Set
import matplotlib.pyplot as plt
2024-12-22 08:06:49 +01:00
from xmlrpc.client import ServerProxy
2024-12-15 16:05:21 +01:00
2025-11-08 15:08:49 +01:00
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
2024-12-22 08:06:49 +01:00
)
2025-11-08 15:08:49 +01:00
logger = logging.getLogger(__name__)
# Initialize API connection
API_ENDPOINT = "https://api.molodetz.nl/rpc"
api = ServerProxy(API_ENDPOINT)
# Database connection
DB_PATH = 'tikker.db'
connection = sqlite3.connect(DB_PATH)
# Track processed items to avoid duplicate work
processed_files: Set[str] = set()
processed_weeks: Set[str] = set()
# SQL helper for weekday names
WEEKDAY_SQL = """
CASE
WHEN strftime('%w', timestamp) = '0' THEN 'Sunday'
WHEN strftime('%w', timestamp) = '1' THEN 'Monday'
WHEN strftime('%w', timestamp) = '2' THEN 'Tuesday'
WHEN strftime('%w', timestamp) = '3' THEN 'Wednesday'
WHEN strftime('%w', timestamp) = '4' THEN 'Thursday'
WHEN strftime('%w', timestamp) = '5' THEN 'Friday'
WHEN strftime('%w', timestamp) = '6' THEN 'Saturday'
END
"""
def query(sql: str) -> List[Tuple]:
"""
Execute an SQL query and return the results.
Args:
sql: SQL query to execute
Returns:
List of result tuples
"""
2024-12-22 08:06:49 +01:00
start = time.time()
2025-11-08 15:08:49 +01:00
2024-12-22 08:06:49 +01:00
cursor = connection.cursor()
2025-11-08 15:08:49 +01:00
logger.debug(f"Executing SQL: {sql}")
2024-12-22 08:06:49 +01:00
result = cursor.execute(sql).fetchall()
2025-11-08 15:08:49 +01:00
2024-12-22 08:06:49 +01:00
cursor.close()
duration = time.time() - start
2025-11-08 15:08:49 +01:00
logger.info(f"Query completed in {duration:.4f} seconds")
2024-12-22 08:06:49 +01:00
return result
2025-11-08 15:08:49 +01:00
def setup_plot_defaults() -> None:
"""Configure default matplotlib settings for all plots"""
plt.style.use('dark_background')
plt.figure(figsize=(10, 7))
def save_figure(filename: str) -> None:
"""Save the current matplotlib figure to a file"""
plt.tight_layout()
plt.savefig(filename)
plt.close()
logger.info(f"Figure saved: {filename}")
def render_per_hour(week: str) -> None:
"""
Generate visualization of key presses per hour for a specific week
Args:
week: Week number to analyze
"""
# Skip if already processed
if week in processed_weeks:
logger.info(f"Week {week} already processed for hourly analysis, skipping")
return
processed_weeks.add(week)
sql_presses = """
SELECT count(0) as total, strftime('%H', timestamp) as hour,
strftime('%U', timestamp) as week
FROM kevent
WHERE event = 'PRESSED' AND week = '{week}'
GROUP BY week, hour
ORDER BY hour
"""
2024-12-22 08:06:49 +01:00
rows_presses = query(sql_presses.format(week=week))
2025-11-08 15:08:49 +01:00
if not rows_presses:
logger.warning(f"No data found for week {week}")
return
2024-12-15 16:05:21 +01:00
totals = [row[0] for row in rows_presses]
hours = [row[1] for row in rows_presses]
2025-11-08 15:08:49 +01:00
setup_plot_defaults()
plt.plot(hours, totals, marker='o', label=f'Presses per hour', color='red')
2024-12-15 16:05:21 +01:00
plt.xlabel('Hour')
plt.ylabel('Event count')
2025-11-08 15:08:49 +01:00
plt.title(f'Key presses per hour - Week {week}')
2025-01-07 23:45:49 +01:00
plt.legend()
2025-11-08 15:08:49 +01:00
save_figure(f"graph_week_{week.strip('\'')}_per_hour.png")
def render_per_day() -> None:
"""Generate visualization of key presses per day"""
sql_pressed_per_day = """
SELECT strftime('%Y-%m-%d', timestamp) as month_day,
count(0) as total
FROM kevent
WHERE event = 'PRESSED'
GROUP BY month_day
ORDER BY month_day
"""
2025-01-07 23:45:49 +01:00
rows_pressed_per_day = query(sql_pressed_per_day)
2025-11-08 15:08:49 +01:00
if not rows_pressed_per_day:
logger.warning("No data found for daily analysis")
return
dates = [row[0] for row in rows_pressed_per_day]
totals = [row[1] for row in rows_pressed_per_day]
setup_plot_defaults()
plt.plot(dates, totals, marker='o', label='Presses per day', color='red')
2025-01-07 23:45:49 +01:00
plt.xlabel('Date')
plt.ylabel('Event count')
2025-11-08 15:08:49 +01:00
plt.xticks(rotation=45)
plt.title('Keyboard events by day')
2024-12-15 16:05:21 +01:00
plt.legend()
2025-11-08 15:08:49 +01:00
save_figure("graph_per_day.png")
def render_per_week() -> None:
"""Generate visualization of key presses per week"""
sql_pressed_per_week = """
SELECT strftime('%Y-%U', timestamp) as week,
count(0) as total
FROM kevent
WHERE event = 'PRESSED'
GROUP BY week
ORDER BY week
"""
rows_pressed_per_week = query(sql_pressed_per_week)
if not rows_pressed_per_week:
logger.warning("No data found for weekly analysis")
return
weeks = [row[0] for row in rows_pressed_per_week]
totals = [row[1] for row in rows_pressed_per_week]
setup_plot_defaults()
plt.plot(weeks, totals, marker='o', label='Presses per week', color='red')
2025-01-07 23:45:49 +01:00
plt.xlabel('Week')
plt.ylabel('Presses count')
2025-11-08 15:08:49 +01:00
plt.xticks(rotation=45)
plt.title('Presses per week')
2025-01-07 23:45:49 +01:00
plt.legend()
2025-11-08 15:08:49 +01:00
save_figure("graph_per_week.png")
2025-01-07 23:45:49 +01:00
2025-11-08 15:08:49 +01:00
def render_per_weekday(week: str) -> None:
"""
Generate visualization of key presses per weekday for a specific week
Args:
week: Week number to analyze
"""
# Skip if already processed
if week in processed_weeks:
logger.info(f"Week {week} already processed for weekday analysis, skipping")
return
processed_weeks.add(week)
sql_presses = f"""
SELECT count(0) as total, {WEEKDAY_SQL} as weekday,
strftime('%w', timestamp) as day, strftime('%U', timestamp) as week
FROM kevent
WHERE event = 'PRESSED' AND week = '{week}'
GROUP BY week, day
ORDER BY day
"""
rows_presses = query(sql_presses)
if not rows_presses:
logger.warning(f"No data found for week {week} weekday analysis")
return
2024-12-15 16:05:21 +01:00
totals = [row[0] for row in rows_presses]
2024-12-22 08:06:49 +01:00
days = [row[2] for row in rows_presses]
2025-11-08 15:08:49 +01:00
weekday_names = [row[1] for row in rows_presses]
setup_plot_defaults()
plt.plot(days, totals, marker='o', label='Press count', color='red')
plt.xlabel('Weekday')
2024-12-15 16:05:21 +01:00
plt.ylabel('Event count')
2025-11-08 15:08:49 +01:00
plt.title(f'Presses per weekday - Week {week}')
plt.xticks(range(len(weekday_names)), weekday_names, rotation=45)
2024-12-15 16:05:21 +01:00
plt.legend()
2025-11-08 15:08:49 +01:00
save_figure(f"graph_week_{week.strip('\"')}_per_weekday.png")
2024-12-15 16:05:21 +01:00
2025-11-08 15:08:49 +01:00
def get_weeks() -> List[str]:
"""
Get list of all weeks in the database
Returns:
List of week numbers
"""
sql = "SELECT DISTINCT strftime('%U', timestamp) as week FROM kevent GROUP BY week"
2024-12-22 08:06:49 +01:00
weeks = query(sql)
return [record[0] for record in weeks]
2025-11-08 15:08:49 +01:00
def get_score_per_week() -> List[Tuple]:
"""
Get event counts grouped by week
Returns:
List of (week, event_type, count) tuples
"""
sql = """
SELECT strftime('%U', timestamp) as week, event, COUNT(0) as total
FROM kevent
GROUP BY event, week
"""
2024-12-22 08:06:49 +01:00
return query(sql)
2025-11-08 15:08:49 +01:00
def get_score_per_day() -> List[Tuple]:
"""
Get event counts grouped by day of week
2024-12-22 08:06:49 +01:00
2025-11-08 15:08:49 +01:00
Returns:
List of (week, weekday, event_type, count) tuples
"""
sql = f"""
SELECT strftime('%U', timestamp) as week,
{WEEKDAY_SQL} as wday,
event, COUNT(0) as total
FROM kevent
WHERE event in ('PRESSED')
GROUP BY week, event, wday
ORDER BY week, event, wday
"""
2024-12-22 08:06:49 +01:00
return query(sql)
2025-11-08 15:08:49 +01:00
def get_totals() -> List[Tuple]:
"""
Get total count of each event type
Returns:
List of (count, event_type) tuples
"""
sql = "SELECT count(0) as total, event FROM kevent GROUP BY event"
2024-12-22 08:06:49 +01:00
return query(sql)
2025-11-08 15:08:49 +01:00
def generate_keylog() -> Dict[str, str]:
"""
Generate a log of key presses grouped by date and hour
Returns:
Dictionary of date-hour to concatenated key presses
"""
result = {}
rows = query("""
SELECT strftime('%Y-%m-%d.%H', timestamp) as date_hour,
GROUP_CONCAT(char,'')
FROM kevent
WHERE event = 'PRESSED'
GROUP BY date_hour
""")
for row in rows:
result[row[0]] = row[1]
return result
2024-12-22 08:06:49 +01:00
2025-11-08 15:08:49 +01:00
def write_keylog_files(keylog: Dict[str, str]) -> None:
"""
Write keylog data to files
Args:
keylog: Dictionary of date-hour to concatenated key presses
"""
logs_dir = pathlib.Path("logs_plain")
logs_dir.mkdir(exist_ok=True)
with open("keylog.txt", "w") as f:
for day in keylog.keys():
date, hour = day.split(".")
label = f"{date} {hour}:00"
log_file = logs_dir / f"{day}.txt"
if not log_file.exists():
with open(log_file, "w") as g:
g.write(f"**{label}**: ```{keylog[day]}```\n\n")
f.write(f"**{label}**: ```{keylog[day]}```\n\n")
def ipa(prompt):
import requests
result = requests.post("https://retoor:retoorded@ipa.molodetz.nl/ai/prompt",json={"prompt": prompt, "model": "google/gemma-3-12b-it","json":False}).text
print(result)
return result
def generate_summaries(dry_run: bool=False) -> None:
"""Generate summaries for keylog files using AI API"""
logs_dir = pathlib.Path("logs_plain")
summary_dir = pathlib.Path("logs_summaries")
oneliner_dir = pathlib.Path("logs_lines")
summary_dir.mkdir(exist_ok=True)
oneliner_dir.mkdir(exist_ok=True)
# Process summaries
for file in logs_dir.glob("*.txt"):
# Skip if already processed
if str(file) in processed_files:
logger.info(f"File {file} already processed for summary, skipping")
continue
processed_files.add(str(file))
dest_file = summary_dir / file.name
if dest_file.exists():
logger.info(f"Summary already exists for {file.name}, skipping")
continue
try:
logger.info(f"Generating summary for {file.name}")
if dry_run:
continue
param = file.read_text().replace("@", "").replace("`", "")
prompt = "The following data is key presses made by user. Describe what user could be working on using bulletpoints: " + param
response = ipa(prompt)
with dest_file.open("w+") as f:
f.write(response)
logger.info(f"Summary generated for {file.name}")
except Exception as e:
logger.error(f"Error generating summary for {file.name}: {e}")
# Process one-liners
for file in summary_dir.glob("*.txt"):
# Skip if already processed
if str(file) in processed_files:
logger.info(f"File {file} already processed for one-liner, skipping")
continue
processed_files.add(str(file))
dest_file = oneliner_dir / file.name
if dest_file.exists():
logger.info(f"One-liner already exists for {file.name}, skipping")
continue
try:
logger.info(f"Generating one-liner for {file.name}")
if dry_run:
continue
source = file.read_text().replace("@", "").replace("`", "")
prompt = "The following data is a hour of work summarized from the user. Describe what user was doing in a oneliner: " + source
response = ipa(prompt)
with dest_file.open("w+") as f:
f.write(response)
logger.info(f"One-liner generated for {file.name}")
except Exception as e:
logger.error(f"Error generating one-liner for {file.name}: {e}")
def main() -> None:
# Generate summaries
generate_summaries(False)
"""Main function to execute all analytics tasks"""
time_start = time.time()
logger.info("Starting keyboard analytics process")
# Load state if exists
state_file = pathlib.Path("analytics_state.json")
if state_file.exists():
try:
state = json.loads(state_file.read_text())
processed_files.update(state.get("processed_files", []))
processed_weeks.update(state.get("processed_weeks", []))
logger.info(f"Loaded state: {len(processed_files)} files and {len(processed_weeks)} weeks processed previously")
except Exception as e:
logger.error(f"Error loading state: {e}")
# Generate visualizations
2025-01-07 23:45:49 +01:00
render_per_day()
render_per_week()
2025-11-08 15:08:49 +01:00
weeks = get_weeks()
for week in weeks:
2024-12-22 08:06:49 +01:00
render_per_hour(week)
render_per_weekday(week)
2025-11-08 15:08:49 +01:00
# Print statistics
logger.info("Score per week:")
2024-12-22 08:06:49 +01:00
for record in get_score_per_week():
2025-11-08 15:08:49 +01:00
logger.info(f"{record[0]}\t{record[1]}\t{record[2]}")
logger.info("Score per day:")
2024-12-22 08:06:49 +01:00
for record in get_score_per_day():
2025-11-08 15:08:49 +01:00
logger.info(f"{record[0]}\t{record[1]}\t{record[2]}\t{record[3]}")
2024-12-22 08:06:49 +01:00
2025-11-08 15:08:49 +01:00
logger.info("Total events:")
2024-12-22 08:06:49 +01:00
totals = 0
for record in get_totals():
2025-11-08 15:08:49 +01:00
logger.info(f"{record[1]}: {record[0]}")
2024-12-22 08:06:49 +01:00
totals += record[0]
2025-11-08 15:08:49 +01:00
logger.info(f"Total: {totals}")
2024-12-22 08:06:49 +01:00
2025-11-08 15:08:49 +01:00
# Generate and write keylog
keylog = generate_keylog()
write_keylog_files(keylog)
# Generate summaries
generate_summaries()
# Save state
try:
state = {
"processed_files": list(processed_files),
"processed_weeks": list(processed_weeks),
"last_run": time.time()
}
state_file.write_text(json.dumps(state))
logger.info("State saved successfully")
except Exception as e:
logger.error(f"Error saving state: {e}")
duration = time.time() - time_start
logger.info(f"Process completed in {duration:.2f} seconds")
2025-03-20 03:21:22 +01:00
2025-11-08 15:08:49 +01:00
if __name__ == "__main__":
main()