This commit is contained in:
retoor 2025-11-08 15:08:49 +01:00
parent 9950340145
commit a8f1d81976
14 changed files with 1920 additions and 232 deletions

0
.clang-format Normal file → Executable file
View File

0
.gitignore vendored Normal file → Executable file
View File

0
LICENSE Normal file → Executable file
View File

6
Makefile Normal file → Executable file
View File

@ -1,4 +1,4 @@
all: tikker run
all: plot process
tikker: tikker.c sormc.h
gcc tikker.c -Ofast -Wall -Werror -Wextra -o tikker -lsqlite3
@ -9,8 +9,8 @@ run:
PYTHON="./.venv/bin/python"
ensure_env:
-@python3 -m venv .venv
$(PYTHON) -m pip install dataset matplotlib
-@python3.12 -m venv .venv
$(PYTHON) -m pip install dataset matplotlib openai requests
merge:
$(PYTHON) merge.py

0
README.md Normal file → Executable file
View File

0
merge.py Normal file → Executable file
View File

567
plot.py Normal file → Executable file
View File

@ -1,279 +1,470 @@
#!/usr/bin/env python3
"""
Keyboard Analytics - A tool for analyzing keyboard usage patterns
This script analyzes keyboard events stored in a SQLite database and generates
visualizations and reports based on the data. It can track key presses across
different time periods and create meaningful insights about typing patterns.
"""
import sqlite3
import time
import matplotlib.pyplot as plt
import pathlib
import json
import logging
import requests
from typing import List, Dict, Tuple, Any, Set
import matplotlib.pyplot as plt
from xmlrpc.client import ServerProxy
api = ServerProxy("https://api.molodetz.nl/rpc")
connection = sqlite3.connect('tikker.db')
weekday_sql = (
"CASE "
"WHEN strftime('%w', timestamp) = '0' THEN 'Sunday' "
"WHEN strftime('%w', timestamp) = '1' THEN 'Monday' "
"WHEN strftime('%w', timestamp) = '2' THEN 'Tuesday' "
"WHEN strftime('%w', timestamp) = '3' THEN 'Wednesday' "
"WHEN strftime('%w', timestamp) = '4' THEN 'Thursday' "
"WHEN strftime('%w', timestamp) = '5' THEN 'Friday' "
"WHEN strftime('%w', timestamp) = '6' THEN 'Saturday' "
"END"
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def query(sql):
# Initialize API connection
API_ENDPOINT = "https://api.molodetz.nl/rpc"
api = ServerProxy(API_ENDPOINT)
# Database connection
DB_PATH = 'tikker.db'
connection = sqlite3.connect(DB_PATH)
# Track processed items to avoid duplicate work
processed_files: Set[str] = set()
processed_weeks: Set[str] = set()
# SQL helper for weekday names
WEEKDAY_SQL = """
CASE
WHEN strftime('%w', timestamp) = '0' THEN 'Sunday'
WHEN strftime('%w', timestamp) = '1' THEN 'Monday'
WHEN strftime('%w', timestamp) = '2' THEN 'Tuesday'
WHEN strftime('%w', timestamp) = '3' THEN 'Wednesday'
WHEN strftime('%w', timestamp) = '4' THEN 'Thursday'
WHEN strftime('%w', timestamp) = '5' THEN 'Friday'
WHEN strftime('%w', timestamp) = '6' THEN 'Saturday'
END
"""
def query(sql: str) -> List[Tuple]:
"""
Execute an SQL query and return the results.
Args:
sql: SQL query to execute
Returns:
List of result tuples
"""
start = time.time()
cursor = connection.cursor()
print(sql)
logger.debug(f"Executing SQL: {sql}")
result = cursor.execute(sql).fetchall()
cursor.close()
duration = time.time() - start
print("Duration: {}\n".format(duration))
logger.info(f"Query completed in {duration:.4f} seconds")
return result
def render_per_hour(week):
week = f"{week}"
def setup_plot_defaults() -> None:
"""Configure default matplotlib settings for all plots"""
plt.style.use('dark_background')
plt.figure(figsize=(10, 7))
sql_presses = (
"SELECT count(0) as total, strftime('%H', timestamp) as hour, "
"strftime('%U', timestamp) as week "
"FROM kevent WHERE event = 'PRESSED' AND week = '{week}' "
"GROUP BY week, hour ORDER BY hour"
)
def save_figure(filename: str) -> None:
"""Save the current matplotlib figure to a file"""
plt.tight_layout()
plt.savefig(filename)
plt.close()
logger.info(f"Figure saved: {filename}")
#sql_repeated = (
# "SELECT count(0) as total, strftime('%H', timestamp) as hour, "
# "strftime('%U', timestamp) as week "
# "FROM kevent WHERE event = 'REPEATED' AND week = {week} "
# "GROUP BY week, hour ORDER BY hour"
#)
def render_per_hour(week: str) -> None:
"""
Generate visualization of key presses per hour for a specific week
#sql_released = (
# "SELECT count(0) as total, strftime('%H', timestamp) as hour, "
# "strftime('%U', timestamp) as week "
# "FROM kevent WHERE event = 'RELEASED' AND week = {week} "
# "GROUP BY week, hour ORDER BY hour"
#)
Args:
week: Week number to analyze
"""
# Skip if already processed
if week in processed_weeks:
logger.info(f"Week {week} already processed for hourly analysis, skipping")
return
processed_weeks.add(week)
sql_presses = """
SELECT count(0) as total, strftime('%H', timestamp) as hour,
strftime('%U', timestamp) as week
FROM kevent
WHERE event = 'PRESSED' AND week = '{week}'
GROUP BY week, hour
ORDER BY hour
"""
rows_presses = query(sql_presses.format(week=week))
#rows_repeated = query(sql_repeated.format(week=week))
#rows_released = query(sql_released.format(week=week))
if not rows_presses:
logger.warning(f"No data found for week {week}")
return
totals = [row[0] for row in rows_presses]
hours = [row[1] for row in rows_presses]
#totals_repeated = [row[0] for row in rows_repeated]
#hours_repeated = [row[1] for row in rows_repeated]
#totals_released = [row[0] for row in rows_released]
#hours_released = [row[1] for row in rows_released]
plt.figure(figsize=(8, 6))
#plt.plot(hours_repeated, totals_repeated, marker='o', label='Repeats per hour', color='green')
#plt.plot(hours_released, totals_released, marker='o', label='Releases per hour', color='orange')
plt.plot(hours, totals, marker='o', label=f'Presses per hour week {week}', color='red')
setup_plot_defaults()
plt.plot(hours, totals, marker='o', label=f'Presses per hour', color='red')
plt.xlabel('Hour')
plt.ylabel('Event count')
plt.title(f'Key presses per hour. Week {week}')
plt.style.use('dark_background')
plt.title(f'Key presses per hour - Week {week}')
plt.legend()
plt.savefig(f"graph_week_{week.strip('\'')}_per_hour.png")
save_figure(f"graph_week_{week.strip('\'')}_per_hour.png")
def render_per_day():
sql_pressed_per_day = (
"SELECT strftime('%Y-%m-%d', timestamp) as month_day,count(0) as total FROM kevent WHERE event = 'PRESSED' GROUP BY month_day ORDER BY month_day"
)
plt.figure(figsize=(8,6))
def render_per_day() -> None:
"""Generate visualization of key presses per day"""
sql_pressed_per_day = """
SELECT strftime('%Y-%m-%d', timestamp) as month_day,
count(0) as total
FROM kevent
WHERE event = 'PRESSED'
GROUP BY month_day
ORDER BY month_day
"""
rows_pressed_per_day = query(sql_pressed_per_day)
totals = [row[0] for row in rows_pressed_per_day]
dates = [row[1] for row in rows_pressed_per_day]
if not rows_pressed_per_day:
logger.warning("No data found for daily analysis")
return
plt.plot(totals, dates, marker='o', label='Presses per day', color='red')
dates = [row[0] for row in rows_pressed_per_day]
totals = [row[1] for row in rows_pressed_per_day]
setup_plot_defaults()
plt.plot(dates, totals, marker='o', label='Presses per day', color='red')
plt.xlabel('Date')
plt.ylabel('Event count')
plt.xticks(rotation=45)
plt.style.use('dark_background')
plt.title('Keyboard events')
plt.tight_layout()
plt.title('Keyboard events by day')
plt.legend()
plt.savefig(f"graph_per_day.png")
def render_per_week():
sql_pressed_per_day = (
f"SELECT strftime('%Y-%U', timestamp) as week,count(0) as total FROM kevent WHERE event = 'PRESSED' GROUP BY week ORDER BY week"
)
plt.figure(figsize=(8,6))
save_figure("graph_per_day.png")
rows_pressed_per_day = query(sql_pressed_per_day)
def render_per_week() -> None:
"""Generate visualization of key presses per week"""
sql_pressed_per_week = """
SELECT strftime('%Y-%U', timestamp) as week,
count(0) as total
FROM kevent
WHERE event = 'PRESSED'
GROUP BY week
ORDER BY week
"""
totals = [row[0] for row in rows_pressed_per_day]
dates = [row[1] for row in rows_pressed_per_day]
rows_pressed_per_week = query(sql_pressed_per_week)
plt.plot(totals, dates, marker='o', label='Presses per day', color='red')
if not rows_pressed_per_week:
logger.warning("No data found for weekly analysis")
return
weeks = [row[0] for row in rows_pressed_per_week]
totals = [row[1] for row in rows_pressed_per_week]
setup_plot_defaults()
plt.plot(weeks, totals, marker='o', label='Presses per week', color='red')
plt.xlabel('Week')
plt.ylabel('Presses count')
plt.xticks(rotation=45)
plt.title(f'Presses per week')
plt.tight_layout()
plt.style.use('dark_background')
plt.title('Presses per week')
plt.legend()
plt.savefig(f"graph_per_week.png")
save_figure("graph_per_week.png")
def render_per_weekday(week: str) -> None:
"""
Generate visualization of key presses per weekday for a specific week
Args:
week: Week number to analyze
"""
# Skip if already processed
if week in processed_weeks:
logger.info(f"Week {week} already processed for weekday analysis, skipping")
return
processed_weeks.add(week)
def render_per_weekday(week):
sql_presses = f"""
SELECT count(0) as total, {WEEKDAY_SQL} as weekday,
strftime('%w', timestamp) as day, strftime('%U', timestamp) as week
FROM kevent
WHERE event = 'PRESSED' AND week = '{week}'
GROUP BY week, day
ORDER BY day
"""
sql_presses = (
f"SELECT count(0) as total, {weekday_sql} as weekday, "
"strftime('%w', timestamp) as day, strftime('%U', timestamp) as week "
"FROM kevent WHERE event = 'PRESSED' AND week = '{week}' "
"GROUP BY week, day ORDER BY day"
)
rows_presses = query(sql_presses)
sql_repeated = (
f"SELECT count(0) as total, {weekday_sql} as weekday, "
"strftime('%w', timestamp) as day, strftime('%U', timestamp) as week "
"FROM kevent WHERE event = 'REPEATED' AND week = '{week}' "
"GROUP BY week, day ORDER BY day"
)
sql_released = (
f"SELECT count(0) as total, {weekday_sql} as weekday, "
"strftime('%w', timestamp) as day, strftime('%U', timestamp) as week "
"FROM kevent WHERE event = 'RELEASED' AND week = '{week}' "
"GROUP BY week, day ORDER BY day"
)
rows_presses = query(sql_presses.format(week=week))
#rows_repeated = query(sql_repeated.format(week=week))
#rows_released = query(sql_released.format(week=week))
if not rows_presses:
logger.warning(f"No data found for week {week} weekday analysis")
return
totals = [row[0] for row in rows_presses]
days = [row[2] for row in rows_presses]
weekday_names = [row[1] for row in rows_presses]
#totals_repeated = [row[0] for row in rows_repeated]
#days_repeated = [row[2] for row in rows_repeated]
setup_plot_defaults()
plt.plot(days, totals, marker='o', label='Press count', color='red')
#totals_released = [row[0] for row in rows_released]
#days_released = [row[2] for row in rows_released]
plt.figure(figsize=(8, 6))
#plt.plot(days_repeated, totals_repeated, marker='o', label='Repeats per weekday', color='green')
#plt.plot(days_released, totals_released, marker='o', label='Releases per weekday', color='orange')
plt.plot(days, totals, marker='o', label=f'Press count', color='red')
plt.xlabel('Weekday (0 = Sunday, 6 = Saturday)')
plt.xlabel('Weekday')
plt.ylabel('Event count')
plt.title(f'Presses per weekday. Week {week}')
plt.style.use('dark_background')
plt.title(f'Presses per weekday - Week {week}')
plt.xticks(range(len(weekday_names)), weekday_names, rotation=45)
plt.legend()
plt.savefig(f"graph_week_{week.strip('\"')}_per_weekday.png")
save_figure(f"graph_week_{week.strip('\"')}_per_weekday.png")
def get_weeks():
sql = "SELECT strftime('%U', timestamp) as week FROM kevent GROUP BY week"
def get_weeks() -> List[str]:
"""
Get list of all weeks in the database
Returns:
List of week numbers
"""
sql = "SELECT DISTINCT strftime('%U', timestamp) as week FROM kevent GROUP BY week"
weeks = query(sql)
return [record[0] for record in weeks]
def get_score_per_week():
sql = (
"SELECT strftime('%U', timestamp) as week, event, COUNT(0) as total "
"FROM kevent GROUP BY event, week"
)
def get_score_per_week() -> List[Tuple]:
"""
Get event counts grouped by week
Returns:
List of (week, event_type, count) tuples
"""
sql = """
SELECT strftime('%U', timestamp) as week, event, COUNT(0) as total
FROM kevent
GROUP BY event, week
"""
return query(sql)
def get_score_per_day():
def get_score_per_day() -> List[Tuple]:
"""
Get event counts grouped by day of week
sql ="SELECT count(0) as total, CASE WHEN strftime('%w', timestamp) = 0 THEN 'Sunday' WHEN strftime('%w', timestamp) = 1 THEN 'Monday' WHEN strftime('%w', timestamp) = 2 THEN 'Tuesday' WHEN strftime('%w', timestamp) = 3 THEN 'Wednesday' WHEN strftime('%w', timestamp) = 4 THEN 'Thursday' WHEN strftime('%w', timestamp) = 5 THEN 'Friday' WHEN strftime('%w', timestamp) = 6 THEN 'Saturday' END as weekday, strftime('%w', timestamp) as day, strftime('%U', timestamp) as week FROM kevent WHERE event = 'REPEATED' GROUP BY week, day ORDER BY day"
sql = (
f"SELECT strftime('%U',timestamp) as week, {weekday_sql} as wday, event, COUNT(0) as total "
f"FROM kevent WHERE event in ('PRESSED') GROUP BY week, event, wday ORDER BY week, event, wday"
)
Returns:
List of (week, weekday, event_type, count) tuples
"""
sql = f"""
SELECT strftime('%U', timestamp) as week,
{WEEKDAY_SQL} as wday,
event, COUNT(0) as total
FROM kevent
WHERE event in ('PRESSED')
GROUP BY week, event, wday
ORDER BY week, event, wday
"""
return query(sql)
def get_totals():
sql = "SELECT count(0) as total, event from kevent group by event"
def get_totals() -> List[Tuple]:
"""
Get total count of each event type
Returns:
List of (count, event_type) tuples
"""
sql = "SELECT count(0) as total, event FROM kevent GROUP BY event"
return query(sql)
# Main execution
if __name__ == "__main__":
time_start = time.time()
render_per_day()
render_per_week()
for week in get_weeks():
render_per_hour(week)
render_per_weekday(week)
print("Score per week:")
for record in get_score_per_week():
print(f"{record[0]} \t{record[1]} \t{record[2]}")
print("Score per day:")
for record in get_score_per_day():
print(f"{record[0]} \t{record[1]} \t{record[2]}")
print("Total events:")
totals = 0
for record in get_totals():
print(f"{record[1]}: {record[0]}")
totals += record[0]
print(totals)
def generate_keylog() -> Dict[str, str]:
"""
Generate a log of key presses grouped by date and hour
Returns:
Dictionary of date-hour to concatenated key presses
"""
result = {}
rows = query("SElECT strftime('%Y-%m-%d.%H', timestamp) as date_hour, GROUP_CONCAT(char,'') FROM kevent WHERE event = 'PRESSED' group by date_hour")
rows = query("""
SELECT strftime('%Y-%m-%d.%H', timestamp) as date_hour,
GROUP_CONCAT(char,'')
FROM kevent
WHERE event = 'PRESSED'
GROUP BY date_hour
""")
for row in rows:
result[row[0]] = row[1]
return result
def write_keylog_files(keylog: Dict[str, str]) -> None:
"""
Write keylog data to files
Args:
keylog: Dictionary of date-hour to concatenated key presses
"""
logs_dir = pathlib.Path("logs_plain")
logs_dir.mkdir(exist_ok=True)
with open("keylog.txt", "w") as f:
for day in result.keys():
for day in keylog.keys():
date, hour = day.split(".")
label = f"{date} {hour}:00"
if not pathlib.Path("logs_plain/"+day+".txt").exists():
with open("logs_plain/"+day+".txt","w") as g:
log_file = logs_dir / f"{day}.txt"
if not log_file.exists():
with open(log_file, "w") as g:
g.write(f"**{label}**: ```{keylog[day]}```\n\n")
g.write(f"**{label}**: ```{result[day]}```\n\n")
f.write(f"**{label}**: ```{result[day]}```\n\n")
f.write(f"**{label}**: ```{keylog[day]}```\n\n")
print("Duration: {}".format(time.time() - time_start))
exit()
import json
for file in pathlib.Path(".").glob("logs_plain/*.txt"):
print("Working on: {}".format(file))
dest_file = file.parent.parent.joinpath("logs_summaries").joinpath(file.name)
print("Dest file: ", dest_file)
if dest_file.exists():
def ipa(prompt):
import requests
result = requests.post("https://retoor:retoorded@ipa.molodetz.nl/ai/prompt",json={"prompt": prompt, "model": "google/gemma-3-12b-it","json":False}).text
print(result)
return result
def generate_summaries(dry_run: bool=False) -> None:
"""Generate summaries for keylog files using AI API"""
logs_dir = pathlib.Path("logs_plain")
summary_dir = pathlib.Path("logs_summaries")
oneliner_dir = pathlib.Path("logs_lines")
summary_dir.mkdir(exist_ok=True)
oneliner_dir.mkdir(exist_ok=True)
# Process summaries
for file in logs_dir.glob("*.txt"):
# Skip if already processed
if str(file) in processed_files:
logger.info(f"File {file} already processed for summary, skipping")
continue
processed_files.add(str(file))
dest_file = summary_dir / file.name
if dest_file.exists():
logger.info(f"Summary already exists for {file.name}, skipping")
continue
try:
logger.info(f"Generating summary for {file.name}")
if dry_run:
continue
with dest_file.open("w+") as f:
print("Requesting...")
param = file.read_text().replace("@", "").replace("`", "")
response = api.gpt4o_mini("The following data is key presses made by user. Describe what user could be working on using bulletpoints: "+param)
print("Done")
f.write(response)
print(response)
for file in pathlib.Path(".").glob("logs_summaries/*.txt"):
dest_file = file.parent.parent.joinpath("logs_lines").joinpath(file.name)
if dest_file.exists():
print("One liner already exists for" + file.name)
continue
with dest_file.open("w+") as f:
source = file.read_text().replace("@","").replace("`","")
response = api.gpt4o_mini("The following data is a hour of work summarized from the user. Describe what user was doing in a onliner.: "+source)
f.write(response)
print("Made one liner for" + file.name)
prompt = "The following data is key presses made by user. Describe what user could be working on using bulletpoints: " + param
response = ipa(prompt)
print("Duration: {}".format(time.time() - time_start))
with dest_file.open("w+") as f:
f.write(response)
logger.info(f"Summary generated for {file.name}")
except Exception as e:
logger.error(f"Error generating summary for {file.name}: {e}")
# Process one-liners
for file in summary_dir.glob("*.txt"):
# Skip if already processed
if str(file) in processed_files:
logger.info(f"File {file} already processed for one-liner, skipping")
continue
processed_files.add(str(file))
dest_file = oneliner_dir / file.name
if dest_file.exists():
logger.info(f"One-liner already exists for {file.name}, skipping")
continue
try:
logger.info(f"Generating one-liner for {file.name}")
if dry_run:
continue
source = file.read_text().replace("@", "").replace("`", "")
prompt = "The following data is a hour of work summarized from the user. Describe what user was doing in a oneliner: " + source
response = ipa(prompt)
with dest_file.open("w+") as f:
f.write(response)
logger.info(f"One-liner generated for {file.name}")
except Exception as e:
logger.error(f"Error generating one-liner for {file.name}: {e}")
def main() -> None:
# Generate summaries
generate_summaries(False)
"""Main function to execute all analytics tasks"""
time_start = time.time()
logger.info("Starting keyboard analytics process")
# Load state if exists
state_file = pathlib.Path("analytics_state.json")
if state_file.exists():
try:
state = json.loads(state_file.read_text())
processed_files.update(state.get("processed_files", []))
processed_weeks.update(state.get("processed_weeks", []))
logger.info(f"Loaded state: {len(processed_files)} files and {len(processed_weeks)} weeks processed previously")
except Exception as e:
logger.error(f"Error loading state: {e}")
# Generate visualizations
render_per_day()
render_per_week()
weeks = get_weeks()
for week in weeks:
render_per_hour(week)
render_per_weekday(week)
# Print statistics
logger.info("Score per week:")
for record in get_score_per_week():
logger.info(f"{record[0]}\t{record[1]}\t{record[2]}")
logger.info("Score per day:")
for record in get_score_per_day():
logger.info(f"{record[0]}\t{record[1]}\t{record[2]}\t{record[3]}")
logger.info("Total events:")
totals = 0
for record in get_totals():
logger.info(f"{record[1]}: {record[0]}")
totals += record[0]
logger.info(f"Total: {totals}")
# Generate and write keylog
keylog = generate_keylog()
write_keylog_files(keylog)
# Generate summaries
generate_summaries()
# Save state
try:
state = {
"processed_files": list(processed_files),
"processed_weeks": list(processed_weeks),
"last_run": time.time()
}
state_file.write_text(json.dumps(state))
logger.info("State saved successfully")
except Exception as e:
logger.error(f"Error saving state: {e}")
duration = time.time() - time_start
logger.info(f"Process completed in {duration:.2f} seconds")
if __name__ == "__main__":
main()

3
requirements.txt Normal file → Executable file
View File

@ -1,2 +1,3 @@
matplotlib
openai
requests

0
review.md Normal file → Executable file
View File

0
sormc.h Normal file → Executable file
View File

0
tags.py Normal file → Executable file
View File

0
tikker.c Normal file → Executable file
View File

0
tikker.c.md Normal file → Executable file
View File

1496
tikker_viz.py Normal file

File diff suppressed because it is too large Load Diff