diff --git a/dataset/__init__.py b/dataset/__init__.py index 99007a5..83b0b0c 100644 --- a/dataset/__init__.py +++ b/dataset/__init__.py @@ -9,8 +9,7 @@ from dataset.persistence.table import Table def connect(url): """ Opens a new connection to a database. *url* can be any valid `SQLAlchemy engine URL`_. Returns - an instance of :py:class:`dataset.Database. - + an instance of :py:class:`Database `. :: db = dataset.connect('sqlite:///factbook.db') diff --git a/dataset/persistence/database.py b/dataset/persistence/database.py index 84de871..8dbc242 100644 --- a/dataset/persistence/database.py +++ b/dataset/persistence/database.py @@ -40,7 +40,10 @@ class Database(object): an `id` column, which is set to be an auto-incrementing integer as the primary key of the table. - Returns a :py:class:`dataset.Table` instance.""" + Returns a :py:class:`Table ` instance. + :: + table = db.create_table('population') + """ with self.lock: log.debug("Creating table: %s on %r" % (table_name, self.engine)) table = SQLATable(table_name, self.metadata) @@ -53,10 +56,12 @@ class Database(object): def load_table(self, table_name): """ Loads a table. This will fail if the tables does not already exist in the database. If the table exists, its columns will be - reflected and are available on the :py:class:`dataset.Table` + reflected and are available on the :py:class:`Table ` object. - Returns a :py:class:`dataset.Table` instance.""" + Returns a :py:class:`Table ` instance. + :: + table = db.load_table('population')""" with self.lock: log.debug("Loading table: %s on %r" % (table_name, self)) table = SQLATable(table_name, self.metadata, autoload=True) @@ -64,9 +69,15 @@ class Database(object): return Table(self, table) def get_table(self, table_name): - """ Loads a table or creates it if it doesn't exist yet. - Returns a :py:class:`dataset.Table` instance. Alternatively to *get_table* - you can also get tables using the dict syntax.""" + """ Smart wrapper around *load_table* and *create_table*. Either loads a table + or creates it if it doesn't exist yet. + + Returns a :py:class:`Table ` instance. + :: + table = db.get_table('population') + # you can also use the short-hand syntax: + table = db['population'] + """ with self.lock: if table_name in self._tables: return Table(self, self._tables[table_name]) @@ -83,9 +94,7 @@ class Database(object): execution of arbitrary read/write queries. A query can either be a plain text string, or a SQLAlchemy expression. The returned iterator will yield each result sequentially. - - .. code-block:: python - + :: result = db.query('SELECT * FROM population WHERE population > 10000000') for row in result: print row diff --git a/dataset/persistence/table.py b/dataset/persistence/table.py index a83c56c..14263a9 100644 --- a/dataset/persistence/table.py +++ b/dataset/persistence/table.py @@ -18,24 +18,24 @@ class Table(object): self.table = table def drop(self): - """ Drop the table from the database, deleting both the schema + """ Drop the table from the database, deleting both the schema and all the contents within it. - + Note: the object will be in an unusable state after using this command and should not be used again. If you want to re-create the table, make sure to get a fresh instance from the - :py:class:`dataset.Database`. """ + :py:class:`Database `. """ with self.database.lock: self.database.tables.pop(self.table.name, None) self.table.drop(engine) def insert(self, row, ensure=True, types={}): - """ Add a row (type: dict) by inserting it into the database. + """ Add a row (type: dict) by inserting it into the table. If ``ensure`` is set, any of the keys of the row are not - table columns, they will be created automatically. - + table columns, they will be created automatically. + During column creation, ``types`` will be checked for a key - matching the name of a column to be created, and the given + matching the name of a column to be created, and the given SQLAlchemy column type will be used. Otherwise, the type is guessed from the row's value, defaulting to a simple unicode field. """ @@ -43,27 +43,23 @@ class Table(object): self._ensure_columns(row, types=types) self.database.engine.execute(self.table.insert(row)) - def update(self, row, unique, ensure=True, types={}): - """ Update a row in the database. The update is managed via - the set of column names stated in ``unique``: they will be + def update(self, row, keys, ensure=True, types={}): + """ Update a row in the table. The update is managed via + the set of column names stated in ``keys``: they will be used as filters for the data to be updated, using the values - in ``row``. Example: - - .. code-block:: python - + in ``row``. + :: + # update all entries with id matching 10, setting their title columns data = dict(id=10, title='I am a banana!') table.update(data, ['id']) - This will update all entries matching the given ``id``, setting - their ``title`` column. - - If keys in ``row`` update columns not present in the table, - they will be created based on the settings of ``ensure`` and - ``types``, matching the behaviour of ``insert``. + If keys in ``row`` update columns not present in the table, + they will be created based on the settings of ``ensure`` and + ``types``, matching the behaviour of :py:meth:`insert() `. """ - if not len(unique): + if not len(keys): return False - clause = [(u, row.get(u)) for u in unique] + clause = [(u, row.get(u)) for u in keys] if ensure: self._ensure_columns(row, types=types) try: @@ -74,15 +70,25 @@ class Table(object): except KeyError, ke: return False - def upsert(self, row, unique, ensure=True, types={}): + def upsert(self, row, keys, ensure=True, types={}): + """An UPSERT is a smart combination of insert and update. If rows with matching ``keys`` exist + they will be updated, otherwise a new row is inserted in the table. + :: + data = dict(id=10, title='I am a banana!') + table.upsert(data, ['id']) + """ if ensure: - self.create_index(unique) + self.create_index(keys) - if not self.update(row, unique, ensure=ensure, types=types): + if not self.update(row, keys, ensure=ensure, types=types): self.insert(row, ensure=ensure, types=types) - def delete(self, **kw): - q = self._args_to_clause(kw) + def delete(self, **filter): + """Delete rows matching the ``filter`` arguments. + :: + table.delete(year=2010) + """ + q = self._args_to_clause(filter) stmt = self.table.delete(q) self.database.engine.execute(stmt) @@ -92,8 +98,8 @@ class Table(object): _type = types[column] else: _type = guess_type(row[column]) - log.debug("Creating column: %s (%s) on %r" % (column, - _type, self.table.name)) + log.debug("Creating column: %s (%s) on %r" % (column, + _type, self.table.name)) self.create_column(column, _type) def _args_to_clause(self, args): @@ -108,7 +114,7 @@ class Table(object): if name not in self.table.columns.keys(): col = Column(name, type) col.create(self.table, - connection=self.database.engine) + connection=self.database.engine) def create_index(self, columns, name=None): with self.database.lock: @@ -126,29 +132,44 @@ class Table(object): self.indexes[name] = idx return idx - def find_one(self, **kw): - res = list(self.find(_limit=1, **kw)) + def find_one(self, **filter): + """Works just like :py:meth:`find() ` but returns only the first result. + :: + row = table.find_one(country='United States') + """ + res = list(self.find(_limit=1, **filter)) if not len(res): return None return res[0] def find(self, _limit=None, _step=5000, _offset=0, - order_by='id', **kw): + order_by='id', **filter): + """Performs a simple search on the table. + :: + results = table.find(country='France') + # combining multiple conditions (AND) + results = table.find(country='France', year=1980) + # just return the first 10 rows + results = table.find(country='France', _limit=10) + # sort results by a column + results = table.find(country='France', order_by='year') + + For more complex queries, please use :py:meth:`db.query() ` instead.""" order_by = [self.table.c[order_by].asc()] - args = self._args_to_clause(kw) + args = self._args_to_clause(filter) for i in count(): qoffset = _offset + (_step * i) qlimit = _step if _limit is not None: - qlimit = min(_limit-(_step*i), _step) + qlimit = min(_limit - (_step * i), _step) if qlimit <= 0: break q = self.table.select(whereclause=args, limit=qlimit, - offset=qoffset, order_by=order_by) + offset=qoffset, order_by=order_by) rows = list(self.database.query(q)) if not len(rows): - return + return for row in rows: yield row @@ -156,20 +177,33 @@ class Table(object): d = self.database.query(self.table.count()).next() return d.values().pop() - def distinct(self, *columns, **kw): + def distinct(self, *columns, **filter): + """Returns all rows of a table, but removes rows in with duplicate values in ``columns`. + Interally this creates a `DISTINCT statement `_. + :: + # returns only one row per year, ignoring the rest + table.distinct('year') + # works with multiple columns, too + table.distinct('year', 'country') + # you can also combine this with a filter + table.distinct('year', country='China') + """ qargs = [] try: columns = [self.table.c[c] for c in columns] - for col, val in kw.items(): - qargs.append(self.table.c[col]==val) + for col, val in filter.items(): + qargs.append(self.table.c[col] == val) except KeyError: return [] q = expression.select(columns, distinct=True, - whereclause=and_(*qargs), - order_by=[c.asc() for c in columns]) + whereclause=and_(*qargs), + order_by=[c.asc() for c in columns]) return self.database.query(q) def all(self): + """Returns all rows of the table as simple dictionaries. This is simply a shortcut + to *find()* called with no arguments. + :: + rows = table.all()""" return self.find() - diff --git a/docs/_themes/kr/layout.html b/docs/_themes/kr/layout.html index 391e037..0907a52 100755 --- a/docs/_themes/kr/layout.html +++ b/docs/_themes/kr/layout.html @@ -1,7 +1,7 @@ {%- extends "basic/layout.html" %} {%- block extrahead %} {{ super() }} - + {% if theme_touch_icon %} {% endif %} diff --git a/docs/_themes/kr/static/flasky.css_t b/docs/_themes/kr/static/flasky.css_t index 422731e..c5bf911 100755 --- a/docs/_themes/kr/static/flasky.css_t +++ b/docs/_themes/kr/static/flasky.css_t @@ -14,9 +14,10 @@ /* -- page layout ----------------------------------------------------------- */ body { - font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro'; + font-family: "Georgia", "Open Sans", OpenSansRegular, sans-serif; font-size: 17px; - background-color: whitesmoke; + background-color: white; + font-weight: 400; color: #000; margin: 0; padding: 0; @@ -45,7 +46,7 @@ hr { } div.body { - background-color: whitesmoke; + background-color: white; color: #3E4349; padding: 0 30px 0 30px; } @@ -98,7 +99,7 @@ div.sphinxsidebarwrapper p.logo { div.sphinxsidebar h3, div.sphinxsidebar h4 { - font-family: 'Garamond', 'Georgia', serif; + font-family: 'Antic Slab' ,'Garamond', 'Georgia', serif; color: #444; font-size: 24px; font-weight: normal; @@ -127,7 +128,7 @@ div.sphinxsidebar p { } div.sphinxsidebar ul { - margin: 10px 0; + margin: 10px 0 30px; padding: 0; color: #000; } @@ -156,10 +157,11 @@ div.body h3, div.body h4, div.body h5, div.body h6 { - font-family: 'Garamond', 'Georgia', serif; + font-family: 'Antic Slab', "Open Sans", OpenSansRegular, sans-serif; font-weight: normal; margin: 30px 0px 10px 0px; padding: 0; + text-shadow: 1px 1px 3px #ddd; } div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; } @@ -244,9 +246,14 @@ p.admonition-title:after { content: ":"; } -pre, tt { +pre { font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; - font-size: 0.9em; + font-size: 0.8em; +} + +tt { + font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; + font-size: 0.95em; } img.screenshot { @@ -359,6 +366,7 @@ tt { tt.xref, a tt { background-color: #FBFBFB; + color: #2277bb; border-bottom: 1px solid white; } diff --git a/docs/index.rst b/docs/index.rst index 0340555..75b4f4c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,14 +6,31 @@ dataset: databases for humans ============================= +dataset is a ETL +Getting the databases out of your data's way:: -Getting the databases out of your data's way. + import dataset + + db = dataset.connect('sqlite:///weather.db') + db['temperature'].find() + +Features include: + +* **Automatic schema**. If a table or column is written that does not + exist in the database, it will be created automatically. +* **Upserts**. Records are either created or updated, depdending on + whether an existing version can be found. +* **Query helpers** for simple queries such as all rows in a table or + all distinct values across a set of columns. .. toctree:: :maxdepth: 2 -* `Learn how to use dataset in five minutes `_ -* `Browse the complete API docs `_ +Next steps: + +`Learn how to use dataset in five minutes `_ + +`Browse the complete API docs `_