documentation!

This commit is contained in:
Gregor Aisch 2013-04-02 11:10:29 +02:00
parent 4bfd6e5d0c
commit 58c1773777
6 changed files with 133 additions and 66 deletions

View File

@ -9,8 +9,7 @@ from dataset.persistence.table import Table
def connect(url): def connect(url):
""" Opens a new connection to a database. *url* can be any valid `SQLAlchemy engine URL`_. Returns """ Opens a new connection to a database. *url* can be any valid `SQLAlchemy engine URL`_. Returns
an instance of :py:class:`dataset.Database. an instance of :py:class:`Database <dataset.Database>`.
:: ::
db = dataset.connect('sqlite:///factbook.db') db = dataset.connect('sqlite:///factbook.db')

View File

@ -40,7 +40,10 @@ class Database(object):
an `id` column, which is set to be an auto-incrementing integer an `id` column, which is set to be an auto-incrementing integer
as the primary key of the table. as the primary key of the table.
Returns a :py:class:`dataset.Table` instance.""" Returns a :py:class:`Table <dataset.Table>` instance.
::
table = db.create_table('population')
"""
with self.lock: with self.lock:
log.debug("Creating table: %s on %r" % (table_name, self.engine)) log.debug("Creating table: %s on %r" % (table_name, self.engine))
table = SQLATable(table_name, self.metadata) table = SQLATable(table_name, self.metadata)
@ -53,10 +56,12 @@ class Database(object):
def load_table(self, table_name): def load_table(self, table_name):
""" Loads a table. This will fail if the tables does not already """ Loads a table. This will fail if the tables does not already
exist in the database. If the table exists, its columns will be exist in the database. If the table exists, its columns will be
reflected and are available on the :py:class:`dataset.Table` reflected and are available on the :py:class:`Table <dataset.Table>`
object. object.
Returns a :py:class:`dataset.Table` instance.""" Returns a :py:class:`Table <dataset.Table>` instance.
::
table = db.load_table('population')"""
with self.lock: with self.lock:
log.debug("Loading table: %s on %r" % (table_name, self)) log.debug("Loading table: %s on %r" % (table_name, self))
table = SQLATable(table_name, self.metadata, autoload=True) table = SQLATable(table_name, self.metadata, autoload=True)
@ -64,9 +69,15 @@ class Database(object):
return Table(self, table) return Table(self, table)
def get_table(self, table_name): def get_table(self, table_name):
""" Loads a table or creates it if it doesn't exist yet. """ Smart wrapper around *load_table* and *create_table*. Either loads a table
Returns a :py:class:`dataset.Table` instance. Alternatively to *get_table* or creates it if it doesn't exist yet.
you can also get tables using the dict syntax."""
Returns a :py:class:`Table <dataset.Table>` instance.
::
table = db.get_table('population')
# you can also use the short-hand syntax:
table = db['population']
"""
with self.lock: with self.lock:
if table_name in self._tables: if table_name in self._tables:
return Table(self, self._tables[table_name]) return Table(self, self._tables[table_name])
@ -83,9 +94,7 @@ class Database(object):
execution of arbitrary read/write queries. A query can either be execution of arbitrary read/write queries. A query can either be
a plain text string, or a SQLAlchemy expression. The returned a plain text string, or a SQLAlchemy expression. The returned
iterator will yield each result sequentially. iterator will yield each result sequentially.
::
.. code-block:: python
result = db.query('SELECT * FROM population WHERE population > 10000000') result = db.query('SELECT * FROM population WHERE population > 10000000')
for row in result: for row in result:
print row print row

View File

@ -24,13 +24,13 @@ class Table(object):
Note: the object will be in an unusable state after using this Note: the object will be in an unusable state after using this
command and should not be used again. If you want to re-create command and should not be used again. If you want to re-create
the table, make sure to get a fresh instance from the the table, make sure to get a fresh instance from the
:py:class:`dataset.Database`. """ :py:class:`Database <dataset.Database>`. """
with self.database.lock: with self.database.lock:
self.database.tables.pop(self.table.name, None) self.database.tables.pop(self.table.name, None)
self.table.drop(engine) self.table.drop(engine)
def insert(self, row, ensure=True, types={}): def insert(self, row, ensure=True, types={}):
""" Add a row (type: dict) by inserting it into the database. """ Add a row (type: dict) by inserting it into the table.
If ``ensure`` is set, any of the keys of the row are not If ``ensure`` is set, any of the keys of the row are not
table columns, they will be created automatically. table columns, they will be created automatically.
@ -43,27 +43,23 @@ class Table(object):
self._ensure_columns(row, types=types) self._ensure_columns(row, types=types)
self.database.engine.execute(self.table.insert(row)) self.database.engine.execute(self.table.insert(row))
def update(self, row, unique, ensure=True, types={}): def update(self, row, keys, ensure=True, types={}):
""" Update a row in the database. The update is managed via """ Update a row in the table. The update is managed via
the set of column names stated in ``unique``: they will be the set of column names stated in ``keys``: they will be
used as filters for the data to be updated, using the values used as filters for the data to be updated, using the values
in ``row``. Example: in ``row``.
::
.. code-block:: python # update all entries with id matching 10, setting their title columns
data = dict(id=10, title='I am a banana!') data = dict(id=10, title='I am a banana!')
table.update(data, ['id']) table.update(data, ['id'])
This will update all entries matching the given ``id``, setting
their ``title`` column.
If keys in ``row`` update columns not present in the table, If keys in ``row`` update columns not present in the table,
they will be created based on the settings of ``ensure`` and they will be created based on the settings of ``ensure`` and
``types``, matching the behaviour of ``insert``. ``types``, matching the behaviour of :py:meth:`insert() <dataset.Table.insert>`.
""" """
if not len(unique): if not len(keys):
return False return False
clause = [(u, row.get(u)) for u in unique] clause = [(u, row.get(u)) for u in keys]
if ensure: if ensure:
self._ensure_columns(row, types=types) self._ensure_columns(row, types=types)
try: try:
@ -74,15 +70,25 @@ class Table(object):
except KeyError, ke: except KeyError, ke:
return False return False
def upsert(self, row, unique, ensure=True, types={}): def upsert(self, row, keys, ensure=True, types={}):
"""An UPSERT is a smart combination of insert and update. If rows with matching ``keys`` exist
they will be updated, otherwise a new row is inserted in the table.
::
data = dict(id=10, title='I am a banana!')
table.upsert(data, ['id'])
"""
if ensure: if ensure:
self.create_index(unique) self.create_index(keys)
if not self.update(row, unique, ensure=ensure, types=types): if not self.update(row, keys, ensure=ensure, types=types):
self.insert(row, ensure=ensure, types=types) self.insert(row, ensure=ensure, types=types)
def delete(self, **kw): def delete(self, **filter):
q = self._args_to_clause(kw) """Delete rows matching the ``filter`` arguments.
::
table.delete(year=2010)
"""
q = self._args_to_clause(filter)
stmt = self.table.delete(q) stmt = self.table.delete(q)
self.database.engine.execute(stmt) self.database.engine.execute(stmt)
@ -126,22 +132,37 @@ class Table(object):
self.indexes[name] = idx self.indexes[name] = idx
return idx return idx
def find_one(self, **kw): def find_one(self, **filter):
res = list(self.find(_limit=1, **kw)) """Works just like :py:meth:`find() <dataset.Table.find>` but returns only the first result.
::
row = table.find_one(country='United States')
"""
res = list(self.find(_limit=1, **filter))
if not len(res): if not len(res):
return None return None
return res[0] return res[0]
def find(self, _limit=None, _step=5000, _offset=0, def find(self, _limit=None, _step=5000, _offset=0,
order_by='id', **kw): order_by='id', **filter):
"""Performs a simple search on the table.
::
results = table.find(country='France')
# combining multiple conditions (AND)
results = table.find(country='France', year=1980)
# just return the first 10 rows
results = table.find(country='France', _limit=10)
# sort results by a column
results = table.find(country='France', order_by='year')
For more complex queries, please use :py:meth:`db.query() <dataset.Database.query>` instead."""
order_by = [self.table.c[order_by].asc()] order_by = [self.table.c[order_by].asc()]
args = self._args_to_clause(kw) args = self._args_to_clause(filter)
for i in count(): for i in count():
qoffset = _offset + (_step * i) qoffset = _offset + (_step * i)
qlimit = _step qlimit = _step
if _limit is not None: if _limit is not None:
qlimit = min(_limit-(_step*i), _step) qlimit = min(_limit - (_step * i), _step)
if qlimit <= 0: if qlimit <= 0:
break break
q = self.table.select(whereclause=args, limit=qlimit, q = self.table.select(whereclause=args, limit=qlimit,
@ -156,12 +177,22 @@ class Table(object):
d = self.database.query(self.table.count()).next() d = self.database.query(self.table.count()).next()
return d.values().pop() return d.values().pop()
def distinct(self, *columns, **kw): def distinct(self, *columns, **filter):
"""Returns all rows of a table, but removes rows in with duplicate values in ``columns`.
Interally this creates a `DISTINCT statement <http://www.w3schools.com/sql/sql_distinct.asp>`_.
::
# returns only one row per year, ignoring the rest
table.distinct('year')
# works with multiple columns, too
table.distinct('year', 'country')
# you can also combine this with a filter
table.distinct('year', country='China')
"""
qargs = [] qargs = []
try: try:
columns = [self.table.c[c] for c in columns] columns = [self.table.c[c] for c in columns]
for col, val in kw.items(): for col, val in filter.items():
qargs.append(self.table.c[col]==val) qargs.append(self.table.c[col] == val)
except KeyError: except KeyError:
return [] return []
@ -171,5 +202,8 @@ class Table(object):
return self.database.query(q) return self.database.query(q)
def all(self): def all(self):
"""Returns all rows of the table as simple dictionaries. This is simply a shortcut
to *find()* called with no arguments.
::
rows = table.all()"""
return self.find() return self.find()

View File

@ -1,7 +1,7 @@
{%- extends "basic/layout.html" %} {%- extends "basic/layout.html" %}
{%- block extrahead %} {%- block extrahead %}
{{ super() }} {{ super() }}
<link href='http://fonts.googleapis.com/css?family=Lato:300' rel='stylesheet' type='text/css'> <link href='http://fonts.googleapis.com/css?family=Open+Sans:400|Antic+Slab' rel='stylesheet' type='text/css'>
{% if theme_touch_icon %} {% if theme_touch_icon %}
<link rel="apple-touch-icon" href="{{ pathto('_static/' ~ theme_touch_icon, 1) }}" /> <link rel="apple-touch-icon" href="{{ pathto('_static/' ~ theme_touch_icon, 1) }}" />
{% endif %} {% endif %}

View File

@ -14,9 +14,10 @@
/* -- page layout ----------------------------------------------------------- */ /* -- page layout ----------------------------------------------------------- */
body { body {
font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro'; font-family: "Georgia", "Open Sans", OpenSansRegular, sans-serif;
font-size: 17px; font-size: 17px;
background-color: whitesmoke; background-color: white;
font-weight: 400;
color: #000; color: #000;
margin: 0; margin: 0;
padding: 0; padding: 0;
@ -45,7 +46,7 @@ hr {
} }
div.body { div.body {
background-color: whitesmoke; background-color: white;
color: #3E4349; color: #3E4349;
padding: 0 30px 0 30px; padding: 0 30px 0 30px;
} }
@ -98,7 +99,7 @@ div.sphinxsidebarwrapper p.logo {
div.sphinxsidebar h3, div.sphinxsidebar h3,
div.sphinxsidebar h4 { div.sphinxsidebar h4 {
font-family: 'Garamond', 'Georgia', serif; font-family: 'Antic Slab' ,'Garamond', 'Georgia', serif;
color: #444; color: #444;
font-size: 24px; font-size: 24px;
font-weight: normal; font-weight: normal;
@ -127,7 +128,7 @@ div.sphinxsidebar p {
} }
div.sphinxsidebar ul { div.sphinxsidebar ul {
margin: 10px 0; margin: 10px 0 30px;
padding: 0; padding: 0;
color: #000; color: #000;
} }
@ -156,10 +157,11 @@ div.body h3,
div.body h4, div.body h4,
div.body h5, div.body h5,
div.body h6 { div.body h6 {
font-family: 'Garamond', 'Georgia', serif; font-family: 'Antic Slab', "Open Sans", OpenSansRegular, sans-serif;
font-weight: normal; font-weight: normal;
margin: 30px 0px 10px 0px; margin: 30px 0px 10px 0px;
padding: 0; padding: 0;
text-shadow: 1px 1px 3px #ddd;
} }
div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; } div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; }
@ -244,9 +246,14 @@ p.admonition-title:after {
content: ":"; content: ":";
} }
pre, tt { pre {
font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
font-size: 0.9em; font-size: 0.8em;
}
tt {
font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
font-size: 0.95em;
} }
img.screenshot { img.screenshot {
@ -359,6 +366,7 @@ tt {
tt.xref, a tt { tt.xref, a tt {
background-color: #FBFBFB; background-color: #FBFBFB;
color: #2277bb;
border-bottom: 1px solid white; border-bottom: 1px solid white;
} }

View File

@ -6,14 +6,31 @@
dataset: databases for humans dataset: databases for humans
============================= =============================
dataset is a ETL
Getting the databases out of your data's way::
Getting the databases out of your data's way. import dataset
db = dataset.connect('sqlite:///weather.db')
db['temperature'].find()
Features include:
* **Automatic schema**. If a table or column is written that does not
exist in the database, it will be created automatically.
* **Upserts**. Records are either created or updated, depdending on
whether an existing version can be found.
* **Query helpers** for simple queries such as all rows in a table or
all distinct values across a set of columns.
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
* `Learn how to use dataset in five minutes <quickstart>`_ Next steps:
* `Browse the complete API docs <api>`_
`Learn how to use dataset in five minutes <quickstart>`_
`Browse the complete API docs <api>`_