documentation!

This commit is contained in:
Gregor Aisch 2013-04-02 11:10:29 +02:00
parent 4bfd6e5d0c
commit 58c1773777
6 changed files with 133 additions and 66 deletions

View File

@ -9,8 +9,7 @@ from dataset.persistence.table import Table
def connect(url):
""" Opens a new connection to a database. *url* can be any valid `SQLAlchemy engine URL`_. Returns
an instance of :py:class:`dataset.Database.
an instance of :py:class:`Database <dataset.Database>`.
::
db = dataset.connect('sqlite:///factbook.db')

View File

@ -40,7 +40,10 @@ class Database(object):
an `id` column, which is set to be an auto-incrementing integer
as the primary key of the table.
Returns a :py:class:`dataset.Table` instance."""
Returns a :py:class:`Table <dataset.Table>` instance.
::
table = db.create_table('population')
"""
with self.lock:
log.debug("Creating table: %s on %r" % (table_name, self.engine))
table = SQLATable(table_name, self.metadata)
@ -53,10 +56,12 @@ class Database(object):
def load_table(self, table_name):
""" Loads a table. This will fail if the tables does not already
exist in the database. If the table exists, its columns will be
reflected and are available on the :py:class:`dataset.Table`
reflected and are available on the :py:class:`Table <dataset.Table>`
object.
Returns a :py:class:`dataset.Table` instance."""
Returns a :py:class:`Table <dataset.Table>` instance.
::
table = db.load_table('population')"""
with self.lock:
log.debug("Loading table: %s on %r" % (table_name, self))
table = SQLATable(table_name, self.metadata, autoload=True)
@ -64,9 +69,15 @@ class Database(object):
return Table(self, table)
def get_table(self, table_name):
""" Loads a table or creates it if it doesn't exist yet.
Returns a :py:class:`dataset.Table` instance. Alternatively to *get_table*
you can also get tables using the dict syntax."""
""" Smart wrapper around *load_table* and *create_table*. Either loads a table
or creates it if it doesn't exist yet.
Returns a :py:class:`Table <dataset.Table>` instance.
::
table = db.get_table('population')
# you can also use the short-hand syntax:
table = db['population']
"""
with self.lock:
if table_name in self._tables:
return Table(self, self._tables[table_name])
@ -83,9 +94,7 @@ class Database(object):
execution of arbitrary read/write queries. A query can either be
a plain text string, or a SQLAlchemy expression. The returned
iterator will yield each result sequentially.
.. code-block:: python
::
result = db.query('SELECT * FROM population WHERE population > 10000000')
for row in result:
print row

View File

@ -24,13 +24,13 @@ class Table(object):
Note: the object will be in an unusable state after using this
command and should not be used again. If you want to re-create
the table, make sure to get a fresh instance from the
:py:class:`dataset.Database`. """
:py:class:`Database <dataset.Database>`. """
with self.database.lock:
self.database.tables.pop(self.table.name, None)
self.table.drop(engine)
def insert(self, row, ensure=True, types={}):
""" Add a row (type: dict) by inserting it into the database.
""" Add a row (type: dict) by inserting it into the table.
If ``ensure`` is set, any of the keys of the row are not
table columns, they will be created automatically.
@ -43,27 +43,23 @@ class Table(object):
self._ensure_columns(row, types=types)
self.database.engine.execute(self.table.insert(row))
def update(self, row, unique, ensure=True, types={}):
""" Update a row in the database. The update is managed via
the set of column names stated in ``unique``: they will be
def update(self, row, keys, ensure=True, types={}):
""" Update a row in the table. The update is managed via
the set of column names stated in ``keys``: they will be
used as filters for the data to be updated, using the values
in ``row``. Example:
.. code-block:: python
in ``row``.
::
# update all entries with id matching 10, setting their title columns
data = dict(id=10, title='I am a banana!')
table.update(data, ['id'])
This will update all entries matching the given ``id``, setting
their ``title`` column.
If keys in ``row`` update columns not present in the table,
they will be created based on the settings of ``ensure`` and
``types``, matching the behaviour of ``insert``.
``types``, matching the behaviour of :py:meth:`insert() <dataset.Table.insert>`.
"""
if not len(unique):
if not len(keys):
return False
clause = [(u, row.get(u)) for u in unique]
clause = [(u, row.get(u)) for u in keys]
if ensure:
self._ensure_columns(row, types=types)
try:
@ -74,15 +70,25 @@ class Table(object):
except KeyError, ke:
return False
def upsert(self, row, unique, ensure=True, types={}):
def upsert(self, row, keys, ensure=True, types={}):
"""An UPSERT is a smart combination of insert and update. If rows with matching ``keys`` exist
they will be updated, otherwise a new row is inserted in the table.
::
data = dict(id=10, title='I am a banana!')
table.upsert(data, ['id'])
"""
if ensure:
self.create_index(unique)
self.create_index(keys)
if not self.update(row, unique, ensure=ensure, types=types):
if not self.update(row, keys, ensure=ensure, types=types):
self.insert(row, ensure=ensure, types=types)
def delete(self, **kw):
q = self._args_to_clause(kw)
def delete(self, **filter):
"""Delete rows matching the ``filter`` arguments.
::
table.delete(year=2010)
"""
q = self._args_to_clause(filter)
stmt = self.table.delete(q)
self.database.engine.execute(stmt)
@ -93,7 +99,7 @@ class Table(object):
else:
_type = guess_type(row[column])
log.debug("Creating column: %s (%s) on %r" % (column,
_type, self.table.name))
_type, self.table.name))
self.create_column(column, _type)
def _args_to_clause(self, args):
@ -108,7 +114,7 @@ class Table(object):
if name not in self.table.columns.keys():
col = Column(name, type)
col.create(self.table,
connection=self.database.engine)
connection=self.database.engine)
def create_index(self, columns, name=None):
with self.database.lock:
@ -126,26 +132,41 @@ class Table(object):
self.indexes[name] = idx
return idx
def find_one(self, **kw):
res = list(self.find(_limit=1, **kw))
def find_one(self, **filter):
"""Works just like :py:meth:`find() <dataset.Table.find>` but returns only the first result.
::
row = table.find_one(country='United States')
"""
res = list(self.find(_limit=1, **filter))
if not len(res):
return None
return res[0]
def find(self, _limit=None, _step=5000, _offset=0,
order_by='id', **kw):
order_by='id', **filter):
"""Performs a simple search on the table.
::
results = table.find(country='France')
# combining multiple conditions (AND)
results = table.find(country='France', year=1980)
# just return the first 10 rows
results = table.find(country='France', _limit=10)
# sort results by a column
results = table.find(country='France', order_by='year')
For more complex queries, please use :py:meth:`db.query() <dataset.Database.query>` instead."""
order_by = [self.table.c[order_by].asc()]
args = self._args_to_clause(kw)
args = self._args_to_clause(filter)
for i in count():
qoffset = _offset + (_step * i)
qlimit = _step
if _limit is not None:
qlimit = min(_limit-(_step*i), _step)
qlimit = min(_limit - (_step * i), _step)
if qlimit <= 0:
break
q = self.table.select(whereclause=args, limit=qlimit,
offset=qoffset, order_by=order_by)
offset=qoffset, order_by=order_by)
rows = list(self.database.query(q))
if not len(rows):
return
@ -156,20 +177,33 @@ class Table(object):
d = self.database.query(self.table.count()).next()
return d.values().pop()
def distinct(self, *columns, **kw):
def distinct(self, *columns, **filter):
"""Returns all rows of a table, but removes rows in with duplicate values in ``columns`.
Interally this creates a `DISTINCT statement <http://www.w3schools.com/sql/sql_distinct.asp>`_.
::
# returns only one row per year, ignoring the rest
table.distinct('year')
# works with multiple columns, too
table.distinct('year', 'country')
# you can also combine this with a filter
table.distinct('year', country='China')
"""
qargs = []
try:
columns = [self.table.c[c] for c in columns]
for col, val in kw.items():
qargs.append(self.table.c[col]==val)
for col, val in filter.items():
qargs.append(self.table.c[col] == val)
except KeyError:
return []
q = expression.select(columns, distinct=True,
whereclause=and_(*qargs),
order_by=[c.asc() for c in columns])
whereclause=and_(*qargs),
order_by=[c.asc() for c in columns])
return self.database.query(q)
def all(self):
"""Returns all rows of the table as simple dictionaries. This is simply a shortcut
to *find()* called with no arguments.
::
rows = table.all()"""
return self.find()

View File

@ -1,7 +1,7 @@
{%- extends "basic/layout.html" %}
{%- block extrahead %}
{{ super() }}
<link href='http://fonts.googleapis.com/css?family=Lato:300' rel='stylesheet' type='text/css'>
<link href='http://fonts.googleapis.com/css?family=Open+Sans:400|Antic+Slab' rel='stylesheet' type='text/css'>
{% if theme_touch_icon %}
<link rel="apple-touch-icon" href="{{ pathto('_static/' ~ theme_touch_icon, 1) }}" />
{% endif %}

View File

@ -14,9 +14,10 @@
/* -- page layout ----------------------------------------------------------- */
body {
font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro';
font-family: "Georgia", "Open Sans", OpenSansRegular, sans-serif;
font-size: 17px;
background-color: whitesmoke;
background-color: white;
font-weight: 400;
color: #000;
margin: 0;
padding: 0;
@ -45,7 +46,7 @@ hr {
}
div.body {
background-color: whitesmoke;
background-color: white;
color: #3E4349;
padding: 0 30px 0 30px;
}
@ -98,7 +99,7 @@ div.sphinxsidebarwrapper p.logo {
div.sphinxsidebar h3,
div.sphinxsidebar h4 {
font-family: 'Garamond', 'Georgia', serif;
font-family: 'Antic Slab' ,'Garamond', 'Georgia', serif;
color: #444;
font-size: 24px;
font-weight: normal;
@ -127,7 +128,7 @@ div.sphinxsidebar p {
}
div.sphinxsidebar ul {
margin: 10px 0;
margin: 10px 0 30px;
padding: 0;
color: #000;
}
@ -156,10 +157,11 @@ div.body h3,
div.body h4,
div.body h5,
div.body h6 {
font-family: 'Garamond', 'Georgia', serif;
font-family: 'Antic Slab', "Open Sans", OpenSansRegular, sans-serif;
font-weight: normal;
margin: 30px 0px 10px 0px;
padding: 0;
text-shadow: 1px 1px 3px #ddd;
}
div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; }
@ -244,9 +246,14 @@ p.admonition-title:after {
content: ":";
}
pre, tt {
pre {
font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
font-size: 0.9em;
font-size: 0.8em;
}
tt {
font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
font-size: 0.95em;
}
img.screenshot {
@ -359,6 +366,7 @@ tt {
tt.xref, a tt {
background-color: #FBFBFB;
color: #2277bb;
border-bottom: 1px solid white;
}

View File

@ -6,14 +6,31 @@
dataset: databases for humans
=============================
dataset is a ETL
Getting the databases out of your data's way::
Getting the databases out of your data's way.
import dataset
db = dataset.connect('sqlite:///weather.db')
db['temperature'].find()
Features include:
* **Automatic schema**. If a table or column is written that does not
exist in the database, it will be created automatically.
* **Upserts**. Records are either created or updated, depdending on
whether an existing version can be found.
* **Query helpers** for simple queries such as all rows in a table or
all distinct values across a set of columns.
.. toctree::
:maxdepth: 2
* `Learn how to use dataset in five minutes <quickstart>`_
* `Browse the complete API docs <api>`_
Next steps:
`Learn how to use dataset in five minutes <quickstart>`_
`Browse the complete API docs <api>`_