From c284d05bd6f057f3c61b1de0cb0b706657bfec38 Mon Sep 17 00:00:00 2001 From: Gregor Aisch Date: Wed, 3 Apr 2013 01:48:26 +0200 Subject: [PATCH] docs! --- dataset/persistence/table.py | 19 +++++++++- docs/index.rst | 6 +-- docs/quickstart.rst | 71 +++++++++++++++++++++++++++--------- 3 files changed, 74 insertions(+), 22 deletions(-) diff --git a/dataset/persistence/table.py b/dataset/persistence/table.py index 3f8f958..adaf4f2 100644 --- a/dataset/persistence/table.py +++ b/dataset/persistence/table.py @@ -19,7 +19,12 @@ class Table(object): @property def columns(self): - """ Get a listing of all columns that exist in the table. """ + """ + Get a listing of all columns that exist in the table. + + >>> print 'age' in table.columns + True + """ return set(self.table.columns.keys()) def drop(self): @@ -129,6 +134,12 @@ class Table(object): return and_(*clauses) def create_column(self, name, type): + """ + Explicitely create a new column ``name`` of a specified type. ``type`` must be a `SQLAlchemy column type `_. + :: + + table.create_column('person', sqlalchemy.String) + """ with self.database.lock: if name not in self.table.columns.keys(): col = Column(name, type) @@ -136,6 +147,12 @@ class Table(object): connection=self.database.engine) def create_index(self, columns, name=None): + """ + Create an index to speed up queries on a table. If no ``name`` is given a random name is created. + :: + + table.create_index(['name', 'country']) + """ with self.database.lock: if not name: sig = abs(hash('||'.join(columns))) diff --git a/docs/index.rst b/docs/index.rst index 4acaddc..3c3bfb8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,9 +12,9 @@ dataset: databases for lazy people Although managing data in relational database has plenty of benefits, we find them rarely being used in the typical day-to-day work with small to medium scale datasets. But why is that? Why do we see an awful lot of data stored in static files in CSV or JSON format? -Because **programmers are lazy**, and thus they tend to prefer the easiest solution they find. And managing data in a databases simply wasn't the simplest solution to store a bunch of structured data. This is where ``dataset`` steps in! +Because **programmers are lazy**, and thus they tend to prefer the easiest solution they find. And managing data in a databases simply wasn't the simplest solution to store a bunch of structured data. This is where **dataset** steps in! -Dataset is here to **take the pain out of databases**. It makes reading and writing data in databases as simple as reading and writing JSON files. +In short, **dataset** makes reading and writing data in databases as simple as reading and writing JSON files. :: @@ -37,7 +37,7 @@ Features whether an existing version can be found. * **Query helpers** for simple queries such as :py:meth:`all ` rows in a table or all :py:meth:`distinct ` values across a set of columns. -* **Compatibility**: Being built on top of `SQLAlchemy `_, ``dataset`` works with all major databases, such as SQLite, PostgreSQL and MySQL. +* **Compatibility**: Being built on top of `SQLAlchemy `_, ``dataset` works with all major databases, such as SQLite, PostgreSQL and MySQL. Contents -------- diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 1afd111..47810e1 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -3,7 +3,7 @@ Quickstart ========== -Hi, welcome to the five-minute quick-start tutorial. +Hi, welcome to the twelve-minute quick-start tutorial. Connecting to a database ------------------------ @@ -49,37 +49,72 @@ Updating existing entries is easy, too:: table.update(dict(name='John Doe', age=47), ['name']) +Inspecting databases and tables +------------------------------- + +When dealing with unknown databases we might want to check its structure first. To begin with, let's find out what tables are stored in the database: + + >>> print db.tables + set([u'user', u'action']) + +Now, let's list all columns available in the table ``user``: + + >>> print db['user'].columns + set([u'id', u'name', u'email', u'pwd', u'country']) + +Using ``len()`` we can get the total number of rows in a table: + + >>> print len(db['user']) + 187 + Reading data from tables ------------------------ -Checking:: +Now let's get some real data out of the table:: - table = db['population'] - - # Let's grab a list of all items/rows/entries in the table: - table.all() - - table.distinct() + users = db['user'].all() Searching for specific entries:: - # Returns the first item where the column country equals 'China' - table.find_one(country='China') + # All users from China + users = table.find(country='China') + + # Get a specific user + john = table.find_one(email='john.doe@example.org') + +Using :py:meth:`distinct() ` we can grab a set of rows with unique values in one or more columns:: + + # Get one user per country + db['user'].distinct('country') - # Returns all items - table.find(country='China') Running custom SQL queries -------------------------- -Of course the main reason you're using a database is that you want to use the full power of SQL queries. Here's how you run them using dataset:: +Of course the main reason you're using a database is that you want to use the full power of SQL queries. Here's how you run them with ``dataset``:: - result = db.query('SELECT user, COUNT(*) c FROM photos GROUP BY user ORDER BY c DESC') + result = db.query('SELECT country, COUNT(*) c FROM user GROUP BY country') for row in result: - print row['user'], row['c'] - -Freezing your data ------------------- + print row['country'], row['c'] +Exporting your data +------------------- +While playing around with your database in Python is a nice thing, sometimes we want to use our data –or parts of it– elsewhere, say in a interactive web application. Therefor ``dataset`` supports serializing rows of data into static files such as JSON using the :py:meth:`freeze() ` function:: + + # export all users into a single JSON + result = db['users'].all() + dataset.freeze(result, 'users.json') + +You can create one file per row by setting ``mode`` to "item":: + + # export one JSON file per user + dataset.freeze(result, 'users/{{ id }}.json', mode='item') + + +Since this is a common operation we made it available via command line utility ``datafreeze``. Read more about the `freezefile markup `_. + +.. code-block:: bash + + $ datafreeze freezefile.yaml