import re import json from uuid import uuid4 from datetime import datetime, timezone from typing import Any, Dict, Iterable, List, Optional, AsyncGenerator, Union, Tuple, Set from pathlib import Path import aiosqlite import unittest from types import SimpleNamespace import asyncio class AsyncDataSet: _KV_TABLE = "__kv_store" _DEFAULT_COLUMNS = { "uid": "TEXT PRIMARY KEY", "created_at": "TEXT", "updated_at": "TEXT", "deleted_at": "TEXT", } def __init__(self, file: str): self._file = file self._table_columns_cache: Dict[str, Set[str]] = {} @staticmethod def _utc_iso() -> str: return ( datetime.now(timezone.utc) .replace(microsecond=0) .isoformat() .replace("+00:00", "Z") ) @staticmethod def _py_to_sqlite_type(value: Any) -> str: if value is None: return "TEXT" if isinstance(value, bool): return "INTEGER" if isinstance(value, int): return "INTEGER" if isinstance(value, float): return "REAL" if isinstance(value, (bytes, bytearray, memoryview)): return "BLOB" return "TEXT" async def _get_table_columns(self, table: str) -> Set[str]: """Get actual columns that exist in the table.""" if table in self._table_columns_cache: return self._table_columns_cache[table] columns = set() try: async with aiosqlite.connect(self._file) as db: async with db.execute(f"PRAGMA table_info({table})") as cursor: async for row in cursor: columns.add(row[1]) # Column name is at index 1 self._table_columns_cache[table] = columns except: pass return columns async def _invalidate_column_cache(self, table: str): """Invalidate column cache for a table.""" if table in self._table_columns_cache: del self._table_columns_cache[table] async def _ensure_column(self, table: str, name: str, value: Any) -> None: col_type = self._py_to_sqlite_type(value) try: async with aiosqlite.connect(self._file) as db: await db.execute(f"ALTER TABLE {table} ADD COLUMN `{name}` {col_type}") await db.commit() await self._invalidate_column_cache(table) except aiosqlite.OperationalError as e: if "duplicate column name" in str(e).lower(): pass # Column already exists else: raise async def _ensure_table(self, table: str, col_sources: Dict[str, Any]) -> None: # Always include default columns cols = self._DEFAULT_COLUMNS.copy() # Add columns from col_sources for key, val in col_sources.items(): if key not in cols: cols[key] = self._py_to_sqlite_type(val) columns_sql = ", ".join(f"`{k}` {t}" for k, t in cols.items()) async with aiosqlite.connect(self._file) as db: await db.execute(f"CREATE TABLE IF NOT EXISTS {table} ({columns_sql})") await db.commit() await self._invalidate_column_cache(table) async def _table_exists(self, table: str) -> bool: """Check if a table exists.""" async with aiosqlite.connect(self._file) as db: async with db.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name=?", (table,) ) as cursor: return await cursor.fetchone() is not None _RE_NO_COLUMN = re.compile(r"(?:no such column:|has no column named) (\w+)") _RE_NO_TABLE = re.compile(r"no such table: (\w+)") @classmethod def _missing_column_from_error( cls, err: aiosqlite.OperationalError ) -> Optional[str]: m = cls._RE_NO_COLUMN.search(str(err)) return m.group(1) if m else None @classmethod def _missing_table_from_error( cls, err: aiosqlite.OperationalError ) -> Optional[str]: m = cls._RE_NO_TABLE.search(str(err)) return m.group(1) if m else None async def _safe_execute( self, table: str, sql: str, params: Iterable[Any], col_sources: Dict[str, Any], max_retries: int = 10 ) -> aiosqlite.Cursor: retries = 0 while retries < max_retries: try: async with aiosqlite.connect(self._file) as db: cursor = await db.execute(sql, params) await db.commit() return cursor except aiosqlite.OperationalError as err: retries += 1 err_str = str(err).lower() # Handle missing column col = self._missing_column_from_error(err) if col: if col in col_sources: await self._ensure_column(table, col, col_sources[col]) else: # Column not in sources, ensure it with NULL/TEXT type await self._ensure_column(table, col, None) continue # Handle missing table tbl = self._missing_table_from_error(err) if tbl: await self._ensure_table(tbl, col_sources) continue # Handle other column-related errors if "has no column named" in err_str: # Extract column name differently match = re.search(r"table \w+ has no column named (\w+)", err_str) if match: col_name = match.group(1) if col_name in col_sources: await self._ensure_column(table, col_name, col_sources[col_name]) else: await self._ensure_column(table, col_name, None) continue raise raise Exception(f"Max retries ({max_retries}) exceeded") async def _filter_existing_columns(self, table: str, data: Dict[str, Any]) -> Dict[str, Any]: """Filter data to only include columns that exist in the table.""" if not await self._table_exists(table): return data existing_columns = await self._get_table_columns(table) if not existing_columns: return data return {k: v for k, v in data.items() if k in existing_columns} async def _safe_query( self, table: str, sql: str, params: Iterable[Any], col_sources: Dict[str, Any], ) -> AsyncGenerator[Dict[str, Any], None]: # Check if table exists first if not await self._table_exists(table): return max_retries = 10 retries = 0 while retries < max_retries: try: async with aiosqlite.connect(self._file) as db: db.row_factory = aiosqlite.Row async with db.execute(sql, params) as cursor: async for row in cursor: yield dict(row) return except aiosqlite.OperationalError as err: retries += 1 err_str = str(err).lower() # Handle missing table tbl = self._missing_table_from_error(err) if tbl: # For queries, if table doesn't exist, just return empty return # Handle missing column in WHERE clause or SELECT if "no such column" in err_str: # For queries with missing columns, return empty return raise @staticmethod def _build_where(where: Optional[Dict[str, Any]]) -> tuple[str, List[Any]]: if not where: return "", [] clauses, vals = zip(*[(f"`{k}` = ?", v) for k, v in where.items()]) return " WHERE " + " AND ".join(clauses), list(vals) async def insert(self, table: str, args: Dict[str, Any], return_id: bool = False) -> Union[str, int]: """Insert a record. If return_id=True, returns auto-incremented ID instead of UUID.""" uid = str(uuid4()) now = self._utc_iso() record = { "uid": uid, "created_at": now, "updated_at": now, "deleted_at": None, **args, } # Ensure table exists with all needed columns await self._ensure_table(table, record) # Handle auto-increment ID if requested if return_id and 'id' not in args: # Ensure id column exists async with aiosqlite.connect(self._file) as db: # Add id column if it doesn't exist try: await db.execute(f"ALTER TABLE {table} ADD COLUMN id INTEGER PRIMARY KEY AUTOINCREMENT") await db.commit() except aiosqlite.OperationalError as e: if "duplicate column name" not in str(e).lower(): # Try without autoincrement constraint try: await db.execute(f"ALTER TABLE {table} ADD COLUMN id INTEGER") await db.commit() except: pass await self._invalidate_column_cache(table) # Insert and get lastrowid cols = "`" + "`, `".join(record.keys()) + "`" qs = ", ".join(["?"] * len(record)) sql = f"INSERT INTO {table} ({cols}) VALUES ({qs})" cursor = await self._safe_execute(table, sql, list(record.values()), record) return cursor.lastrowid cols = "`" + "`, `".join(record) + "`" qs = ", ".join(["?"] * len(record)) sql = f"INSERT INTO {table} ({cols}) VALUES ({qs})" await self._safe_execute(table, sql, list(record.values()), record) return uid async def update( self, table: str, args: Dict[str, Any], where: Optional[Dict[str, Any]] = None, ) -> int: if not args: return 0 # Check if table exists if not await self._table_exists(table): return 0 args["updated_at"] = self._utc_iso() # Ensure all columns exist all_cols = {**args, **(where or {})} await self._ensure_table(table, all_cols) for col, val in all_cols.items(): await self._ensure_column(table, col, val) set_clause = ", ".join(f"`{k}` = ?" for k in args) where_clause, where_params = self._build_where(where) sql = f"UPDATE {table} SET {set_clause}{where_clause}" params = list(args.values()) + where_params cur = await self._safe_execute(table, sql, params, all_cols) return cur.rowcount async def delete(self, table: str, where: Optional[Dict[str, Any]] = None) -> int: # Check if table exists if not await self._table_exists(table): return 0 where_clause, where_params = self._build_where(where) sql = f"DELETE FROM {table}{where_clause}" cur = await self._safe_execute(table, sql, where_params, where or {}) return cur.rowcount async def upsert( self, table: str, args: Dict[str, Any], where: Optional[Dict[str, Any]] = None, ) -> str | None: if not args: raise ValueError("Nothing to update. Empty dict given.") args['updated_at'] = self._utc_iso() affected = await self.update(table, args, where) if affected: rec = await self.get(table, where) return rec.get("uid") if rec else None merged = {**(where or {}), **args} return await self.insert(table, merged) async def get( self, table: str, where: Optional[Dict[str, Any]] = None ) -> Optional[Dict[str, Any]]: where_clause, where_params = self._build_where(where) sql = f"SELECT * FROM {table}{where_clause} LIMIT 1" async for row in self._safe_query(table, sql, where_params, where or {}): return row return None async def find( self, table: str, where: Optional[Dict[str, Any]] = None, *, limit: int = 0, offset: int = 0, order_by: Optional[str] = None, ) -> List[Dict[str, Any]]: """Find records with optional ordering.""" where_clause, where_params = self._build_where(where) order_clause = f" ORDER BY {order_by}" if order_by else "" extra = (f" LIMIT {limit}" if limit else "") + ( f" OFFSET {offset}" if offset else "" ) sql = f"SELECT * FROM {table}{where_clause}{order_clause}{extra}" return [ row async for row in self._safe_query(table, sql, where_params, where or {}) ] async def count(self, table: str, where: Optional[Dict[str, Any]] = None) -> int: # Check if table exists if not await self._table_exists(table): return 0 where_clause, where_params = self._build_where(where) sql = f"SELECT COUNT(*) FROM {table}{where_clause}" gen = self._safe_query(table, sql, where_params, where or {}) async for row in gen: return next(iter(row.values()), 0) return 0 async def exists(self, table: str, where: Dict[str, Any]) -> bool: return (await self.count(table, where)) > 0 async def kv_set( self, key: str, value: Any, *, table: str | None = None, ) -> None: tbl = table or self._KV_TABLE json_val = json.dumps(value, default=str) await self.upsert(tbl, {"value": json_val}, {"key": key}) async def kv_get( self, key: str, *, default: Any = None, table: str | None = None, ) -> Any: tbl = table or self._KV_TABLE row = await self.get(tbl, {"key": key}) if not row: return default try: return json.loads(row["value"]) except Exception: return default async def execute_raw(self, sql: str, params: Optional[Tuple] = None) -> Any: """Execute raw SQL for complex queries like JOINs.""" async with aiosqlite.connect(self._file) as db: cursor = await db.execute(sql, params or ()) await db.commit() return cursor async def query_raw(self, sql: str, params: Optional[Tuple] = None) -> List[Dict[str, Any]]: """Execute raw SQL query and return results as list of dicts.""" try: async with aiosqlite.connect(self._file) as db: db.row_factory = aiosqlite.Row async with db.execute(sql, params or ()) as cursor: return [dict(row) async for row in cursor] except aiosqlite.OperationalError: # Return empty list if query fails return [] async def query_one(self, sql: str, params: Optional[Tuple] = None) -> Optional[Dict[str, Any]]: """Execute raw SQL query and return single result.""" results = await self.query_raw(sql + " LIMIT 1", params) return results[0] if results else None async def create_table(self, table: str, schema: Dict[str, str], constraints: Optional[List[str]] = None): """Create table with custom schema and constraints. Always includes default columns.""" # Merge default columns with custom schema full_schema = self._DEFAULT_COLUMNS.copy() full_schema.update(schema) columns = [f"`{col}` {dtype}" for col, dtype in full_schema.items()] if constraints: columns.extend(constraints) columns_sql = ", ".join(columns) async with aiosqlite.connect(self._file) as db: await db.execute(f"CREATE TABLE IF NOT EXISTS {table} ({columns_sql})") await db.commit() await self._invalidate_column_cache(table) async def insert_unique(self, table: str, args: Dict[str, Any], unique_fields: List[str]) -> Union[str, None]: """Insert with unique constraint handling. Returns uid on success, None if duplicate.""" try: return await self.insert(table, args) except aiosqlite.IntegrityError as e: if "UNIQUE" in str(e): return None raise async def transaction(self): """Context manager for transactions.""" return TransactionContext(self._file) async def aggregate(self, table: str, function: str, column: str = "*", where: Optional[Dict[str, Any]] = None) -> Any: """Perform aggregate functions like SUM, AVG, MAX, MIN.""" # Check if table exists if not await self._table_exists(table): return None where_clause, where_params = self._build_where(where) sql = f"SELECT {function}({column}) as result FROM {table}{where_clause}" result = await self.query_one(sql, tuple(where_params)) return result['result'] if result else None class TransactionContext: """Context manager for database transactions.""" def __init__(self, db_file: str): self.db_file = db_file self.conn = None async def __aenter__(self): self.conn = await aiosqlite.connect(self.db_file) self.conn.row_factory = aiosqlite.Row await self.conn.execute("BEGIN") return self.conn async def __aexit__(self, exc_type, exc_val, exc_tb): if exc_type is None: await self.conn.commit() else: await self.conn.rollback() await self.conn.close() # Test cases remain the same but with additional tests for new functionality class TestAsyncDataSet(unittest.IsolatedAsyncioTestCase): async def asyncSetUp(self): self.db_path = Path("temp_test.db") if self.db_path.exists(): self.db_path.unlink() self.connector = AsyncDataSet(str(self.db_path)) async def asyncTearDown(self): if self.db_path.exists(): self.db_path.unlink() async def test_insert_and_get(self): await self.connector.insert("people", {"name": "John Doe", "age": 30}) rec = await self.connector.get("people", {"name": "John Doe"}) self.assertIsNotNone(rec) self.assertEqual(rec["name"], "John Doe") async def test_get_nonexistent(self): result = await self.connector.get("people", {"name": "Jane Doe"}) self.assertIsNone(result) async def test_update(self): await self.connector.insert("people", {"name": "John Doe", "age": 30}) await self.connector.update("people", {"age": 31}, {"name": "John Doe"}) rec = await self.connector.get("people", {"name": "John Doe"}) self.assertEqual(rec["age"], 31) async def test_order_by(self): await self.connector.insert("people", {"name": "Alice", "age": 25}) await self.connector.insert("people", {"name": "Bob", "age": 30}) await self.connector.insert("people", {"name": "Charlie", "age": 20}) results = await self.connector.find("people", order_by="age ASC") self.assertEqual(results[0]["name"], "Charlie") self.assertEqual(results[-1]["name"], "Bob") async def test_raw_query(self): await self.connector.insert("people", {"name": "John", "age": 30}) await self.connector.insert("people", {"name": "Jane", "age": 25}) results = await self.connector.query_raw( "SELECT * FROM people WHERE age > ?", (26,) ) self.assertEqual(len(results), 1) self.assertEqual(results[0]["name"], "John") async def test_aggregate(self): await self.connector.insert("people", {"name": "John", "age": 30}) await self.connector.insert("people", {"name": "Jane", "age": 25}) await self.connector.insert("people", {"name": "Bob", "age": 35}) avg_age = await self.connector.aggregate("people", "AVG", "age") self.assertEqual(avg_age, 30) max_age = await self.connector.aggregate("people", "MAX", "age") self.assertEqual(max_age, 35) async def test_insert_with_auto_id(self): # Test auto-increment ID functionality id1 = await self.connector.insert("posts", {"title": "First"}, return_id=True) id2 = await self.connector.insert("posts", {"title": "Second"}, return_id=True) self.assertEqual(id2, id1 + 1) async def test_transaction(self): async with self.connector.transaction() as conn: await conn.execute("INSERT INTO people (uid, name, age, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", ("test-uid", "John", 30, "2024-01-01", "2024-01-01")) # Transaction will be committed rec = await self.connector.get("people", {"name": "John"}) self.assertIsNotNone(rec) async def test_create_custom_table(self): schema = { "id": "INTEGER PRIMARY KEY AUTOINCREMENT", "username": "TEXT NOT NULL", "email": "TEXT NOT NULL", "score": "INTEGER DEFAULT 0" } constraints = ["UNIQUE(username)", "UNIQUE(email)"] await self.connector.create_table("users", schema, constraints) # Test that table was created with constraints result = await self.connector.insert_unique( "users", {"username": "john", "email": "john@example.com"}, ["username", "email"] ) self.assertIsNotNone(result) # Test duplicate insert result = await self.connector.insert_unique( "users", {"username": "john", "email": "different@example.com"}, ["username", "email"] ) self.assertIsNone(result) async def test_missing_table_operations(self): # Test operations on non-existent tables self.assertEqual(await self.connector.count("nonexistent"), 0) self.assertEqual(await self.connector.find("nonexistent"), []) self.assertIsNone(await self.connector.get("nonexistent")) self.assertFalse(await self.connector.exists("nonexistent", {"id": 1})) self.assertEqual(await self.connector.delete("nonexistent"), 0) self.assertEqual(await self.connector.update("nonexistent", {"name": "test"}), 0) async def test_auto_column_creation(self): # Insert with new columns that don't exist yet await self.connector.insert("dynamic", {"col1": "value1", "col2": 42, "col3": 3.14}) # Add more columns in next insert await self.connector.insert("dynamic", {"col1": "value2", "col4": True, "col5": None}) # All records should be retrievable records = await self.connector.find("dynamic") self.assertEqual(len(records), 2) if __name__ == "__main__": unittest.main()