Remove datafreeze component, fixes #217

2017-09-09 18:24:34 +02:00 · 2017-09-09 18:24:34 +02:00 · a049691749
commit a049691749
parent cd091eadca
24 changed files with 136 additions and 895 deletions
--- a/Example.yaml
+++ b/Example.yaml
@ -1,22 +0,0 @@
 common:
  database: "postgresql://user:password@localhost/operational_database"
  prefix: my_project/dumps/
  format: json
 exports:
  - query: "SELECT id, title, date FROM events"
    filename: "index.json"
  - query: "SELECT id, title, date, country FROM events"
    filename: "countries/{{country}}.csv"
    format: csv
  - query: "SELECT * FROM events"
    filename: "events/{{id}}.json"
    mode: item
  - query: "SELECT * FROM events"
    filename: "all.json"
    format: tabson
--- a/dataset/init.py
+++ b/dataset/init.py
@ -3,7 +3,6 @@ import warnings
 from dataset.persistence.database import Database
 from dataset.persistence.table import Table
 from dataset.persistence.util import row_type
 from dataset.freeze.app import freeze
 # shut up useless SA warning:
 warnings.filterwarnings(
--- a/dataset/persistence/database.py
+++ b/dataset/persistence/database.py
@ -14,10 +14,10 @@ from sqlalchemy.engine.reflection import Inspector
 from alembic.migration import MigrationContext
 from alembic.operations import Operations
-from dataset.persistence.table import Table
+from dataset.table import Table
-from dataset.persistence.util import ResultIter, row_type, safe_url, QUERY_STEP
+from dataset.util import ResultIter, row_type, safe_url, QUERY_STEP
-from dataset.persistence.util import normalize_table_name
+from dataset.util import normalize_table_name
-from dataset.persistence.types import Types
+from dataset.types import Types
 log = logging.getLogger(__name__)
--- a/dataset/freeze/init.py
+++ b/dataset/freeze/init.py
--- a/dataset/freeze/app.py
+++ b/dataset/freeze/app.py
@ -1,167 +0,0 @@
 import logging
 import argparse
 from sqlalchemy.exc import ProgrammingError, OperationalError
 from dataset.util import FreezeException
 from dataset.persistence.table import Table
 from dataset.persistence.database import Database
 from dataset.freeze.config import Configuration, Export
 from dataset.freeze.format import get_serializer
 log = logging.getLogger(__name__)
 def create_parser():
    parser = argparse.ArgumentParser(
        prog='datafreeze',
        description='Generate static JSON and CSV extracts from a SQL database.',
        epilog='For further information, please check the documentation.')
    parser.add_argument('config', metavar='CONFIG', type=str,
                        help='freeze file cofiguration')
    parser.add_argument('--db', default=None,
                        help='Override the freezefile database URI')
    return parser
 def freeze(result, format='csv', filename='freeze.csv', fileobj=None,
           prefix='.', mode='list', **kw):
    """
    Perform a data export of a given result set. This is a very
    flexible exporter, allowing for various output formats, metadata
    assignment, and file name templating to dump each record (or a set
    of records) into individual files.
    ::
        result = db['person'].all()
        dataset.freeze(result, format='json', filename='all-persons.json')
    Instead of passing in the file name, you can also pass a file object::
        result = db['person'].all()
        fh = open('/dev/null', 'wb')
        dataset.freeze(result, format='json', fileobj=fh)
    Be aware that this will disable file name templating and store all
    results to the same file.
    If ``result`` is a table (rather than a result set), all records in
    the table are exported (as if ``result.all()`` had been called).
    freeze supports two values for ``mode``:
        *list* (default)
            The entire result set is dumped into a single file.
        *item*
            One file is created for each row in the result set.
    You should set a ``filename`` for the exported file(s). If ``mode``
    is set to *item* the function would generate one file per row. In
    that case you can  use values as placeholders in filenames::
            dataset.freeze(res, mode='item', format='json',
                           filename='item-{{id}}.json')
    The following output ``format`` s are supported:
        *csv*
            Comma-separated values, first line contains column names.
        *json*
            A JSON file containing a list of dictionaries for each row
            in the table. If a ``callback`` is given, JSON with padding
            (JSONP) will be generated.
        *tabson*
            Tabson is a smart combination of the space-efficiency of the
            CSV and the parsability and structure of JSON.
    You can pass additional named parameters specific to the used format.
        As an example, you can freeze to minified JSON with the following:
            dataset.freeze(res, format='json', indent=4, wrap=False,
                           filename='output.json')
        *json* and *tabson*
            *callback*:
                if provided, generate a JSONP string using the given callback
                function, i.e. something like `callback && callback({...})`
            *indent*:
                if *indent* is a non-negative integer (it is ``2`` by default
                when you call `dataset.freeze`, and ``None`` via the
                ``datafreeze`` command), then JSON array elements and object
                members will be pretty-printed with that indent level.
                An indent level of 0 will only insert newlines.
                ``None`` is the most compact representation.
            *meta*:
                if *meta* is not ``None`` (default: ``{}``), it will be included
                in the JSON output (for *json*, only if *wrap* is ``True``).
            *wrap* (only for *json*):
                if *wrap* is ``True`` (default), the JSON output is an object
                of the form ``{"count": 2, "results": [...]}``.
                if ``meta`` is not ``None``, a third property ``meta`` is added
                to the wrapping object, with this value.
    """
    kw.update({
        'format': format,
        'filename': filename,
        'fileobj': fileobj,
        'prefix': prefix,
        'mode': mode
    })
    # Special cases when freezing comes from dataset.freeze
    if format in ['json', 'tabson'] and 'indent' not in kw:
        kw['indent'] = 2
    records = result.all() if isinstance(result, Table) else result
    return freeze_export(Export({}, kw), result=records)
 def freeze_export(export, result=None):
    try:
        if result is None:
            database = Database(export.get('database'))
            query = database.query(export.get('query'))
        else:
            query = result
        serializer_cls = get_serializer(export)
        serializer = serializer_cls(export, query)
        serializer.serialize()
    except (OperationalError, ProgrammingError) as e:
        raise FreezeException("Invalid query: %s" % e)
 def freeze_with_config(config, db=None):
    for export in config.exports:
        if db is not None:
            export.data['database'] = db
        if export.skip:
            log.info("Skipping: %s", export.name)
            continue
        log.info("Running: %s", export.name)
        freeze_export(export)
 def main():  # pragma: no cover
    # Set up default logger.
    logging.basicConfig(level=logging.INFO)
    try:
        parser = create_parser()
        args = parser.parse_args()
        freeze_with_config(Configuration(args.config), args.db)
    except FreezeException as fe:
        log.error(fe)
 if __name__ == '__main__':  # pragma: no cover
    logging.basicConfig(level=logging.DEBUG)
    main()
--- a/dataset/freeze/config.py
+++ b/dataset/freeze/config.py
@ -1,88 +0,0 @@
 import json
 import yaml
 from six import text_type, PY3
 from dataset.util import FreezeException
 TRUISH = ['true', 'yes', '1', 'on']
 DECODER = {
    'json': json,
    'yaml': yaml
    }
 def merge_overlay(data, overlay):
    out = overlay.copy()
    for k, v in data.items():
        if isinstance(v, dict) and isinstance(out.get(k), dict):
            v = merge_overlay(v, out.get(k))
        out[k] = v
    return out
 class Configuration(object):
    def __init__(self, file_name):
        self.file_name = file_name
        extension = file_name.rsplit('.', 1)[-1]
        loader = DECODER.get(extension, json)
        try:
            if loader == json and PY3:  # pragma: no cover
                fh = open(file_name, encoding='utf8')
            else:
                fh = open(file_name, 'rb')
            try:
                self.data = loader.load(fh)
            except ValueError as ve:
                raise FreezeException("Invalid freeze file: %s" % ve)
            fh.close()
        except IOError as ioe:
            raise FreezeException(text_type(ioe))
    @property
    def exports(self):
        if not isinstance(self.data, dict):
            raise FreezeException("The root element of the freeze file needs to be a hash")
        if not isinstance(self.data.get('exports'), list):
            raise FreezeException("The freeze file needs to have a list of exports")
        common = self.data.get('common', {})
        for export in self.data.get('exports'):
            yield Export(common, export)
 class Export(object):
    def __init__(self, common, data):
        self.data = merge_overlay(data, common)
    def get(self, name, default=None):
        return self.data.get(name, default)
    def get_normalized(self, name, default=None):
        value = self.get(name, default=default)
        if value not in [None, default]:
            value = text_type(value).lower().strip()
        return value
    def get_bool(self, name, default=False):
        value = self.get_normalized(name)
        if value is None:
            return default
        return value in TRUISH
    def get_int(self, name, default=None):
        value = self.get_normalized(name)
        if value is None:
            return default
        return int(value)
    @property
    def skip(self):
        return self.get_bool('skip')
    @property
    def name(self):
        return self.get('name', self.get('query'))
--- a/dataset/freeze/format/init.py
+++ b/dataset/freeze/format/init.py
@ -1,14 +0,0 @@
 from dataset.freeze.format.fjson import JSONSerializer
 from dataset.freeze.format.fcsv import CSVSerializer
 from dataset.freeze.format.ftabson import TabsonSerializer
 SERIALIZERS = {
    'json': JSONSerializer,
    'csv': CSVSerializer,
    'tabson': TabsonSerializer
    }
 def get_serializer(config):
    serializer = config.get_normalized('format', 'json')
    return SERIALIZERS.get(serializer)
--- a/dataset/freeze/format/common.py
+++ b/dataset/freeze/format/common.py
@ -1,93 +0,0 @@
 import os
 import re
 import sys
 import locale
 from six import binary_type, text_type
 from normality import slugify
 from dataset.util import FreezeException
 TMPL_KEY = re.compile("{{([^}]*)}}")
 OPERATIONS = {
        'identity': lambda x: x,
        'lower': lambda x: text_type(x).lower(),
        'slug': slugify
        }
 class Serializer(object):
    def __init__(self, export, query):
        self._encoding = locale.getpreferredencoding()
        self.export = export
        self.query = query
        self._paths = []
        self._get_basepath()
        if export.get('filename') == '-':
            export.data['fileobj'] = sys.stdout
        self.fileobj = export.get('fileobj')
    def _get_basepath(self):
        prefix = self.export.get('prefix', '')
        if isinstance(prefix, binary_type):
            prefix = text_type(prefix, encoding=self._encoding)
        prefix = os.path.abspath(prefix)
        prefix = os.path.realpath(prefix)
        self._prefix = prefix
        filename = self.export.get('filename')
        if isinstance(filename, binary_type):
            filename = text_type(filename, encoding=self._encoding)
        if filename is None:
            raise FreezeException("No 'filename' is specified")
        self._basepath = os.path.join(prefix, filename)
    def _tmpl(self, data):
        def repl(m):
            op, key = 'identity', m.group(1)
            if ':' in key:
                op, key = key.split(':', 1)
            return str(OPERATIONS.get(op)(data.get(key, '')))
        path = TMPL_KEY.sub(repl, self._basepath)
        return os.path.realpath(path)
    def file_name(self, row):
        # signal that there is a fileobj available:
        if self.fileobj is not None:
            return None
        path = self._tmpl(row)
        if path not in self._paths:
            if not path.startswith(self._prefix):
                raise FreezeException("Possible path escape detected.")
            dn = os.path.dirname(path)
            if not os.path.isdir(dn):
                os.makedirs(dn)
            self._paths.append(path)
        return path
    @property
    def mode(self):
        mode = self.export.get_normalized('mode', 'list')
        if mode not in ['list', 'item']:
            raise FreezeException("Invalid mode: %s" % mode)
        return mode
    @property
    def wrap(self):
        return self.export.get_bool('wrap', default=self.mode == 'list')
    def serialize(self):
        self.init()
        transforms = self.export.get('transform', {})
        for row in self.query:
            for field, operation in transforms.items():
                row[field] = OPERATIONS.get(operation)(row.get(field))
            self.write(self.file_name(row), row)
        self.close()
--- a/dataset/freeze/format/fcsv.py
+++ b/dataset/freeze/format/fcsv.py
@ -1,50 +0,0 @@
 from __future__ import unicode_literals
 import csv
 from datetime import datetime, date
 from six import PY3, text_type
 from dataset.freeze.format.common import Serializer
 def value_to_str(value):
    if isinstance(value, (date, datetime)):
        return text_type(value.isoformat())
    if not PY3 and hasattr(value, 'encode'):
        return value.encode('utf-8')
    if value is None:
        return ''
    return value
 class CSVSerializer(Serializer):
    def init(self):
        self.handles = {}
    def write(self, path, result):
        keys = list(result.keys())
        if path not in self.handles:
            # handle fileobj that has been passed in:
            if path is not None:
                if PY3:  # pragma: no cover
                    fh = open(path, 'wt', encoding='utf8', newline='')
                else:
                    fh = open(path, 'wb')
            else:
                fh = self.fileobj
            writer = csv.writer(fh)
            if PY3:  # pragma: no cover
                writer.writerow(keys)
            else:
                writer.writerow([value_to_str(k) for k in keys])
            self.handles[path] = (writer, fh)
        writer, fh = self.handles[path]
        values = [value_to_str(result.get(k)) for k in keys]
        writer.writerow(values)
    def close(self):
        for writer, fh in self.handles.values():
            if fh != self.fileobj:
                fh.close()
--- a/dataset/freeze/format/fjson.py
+++ b/dataset/freeze/format/fjson.py
@ -1,63 +0,0 @@
 import json
 from datetime import datetime, date
 from collections import defaultdict, OrderedDict
 from decimal import Decimal
 from six import PY3
 from dataset.freeze.format.common import Serializer
 class JSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (datetime, date)):
            return obj.isoformat()
        if isinstance(obj, Decimal):
            return str(obj)
 class JSONSerializer(Serializer):
    def init(self):
        self.buckets = defaultdict(list)
    def write(self, path, result):
        self.buckets[path].append(result)
    def wrap(self, result):
        if self.mode == 'item':
            result = result[0]
        if self.export.get_bool('wrap', True):
            result = OrderedDict([
                ('count', len(result)),
                ('results', result),
            ])
            meta = self.export.get('meta', {})
            if meta is not None:
                result['meta'] = meta
        return result
    def close(self):
        for path, result in self.buckets.items():
            result = self.wrap(result)
            if self.fileobj is None:
                if PY3:  # pragma: no cover
                    fh = open(path, 'w', encoding='utf8')
                else:
                    fh = open(path, 'wb')
            else:
                fh = self.fileobj
            data = json.dumps(result,
                              cls=JSONEncoder,
                              indent=self.export.get_int('indent'))
            callback = self.export.get('callback')
            if callback:
                data = "%s && %s(%s);" % (callback, callback, data)
            fh.write(data)
            if self.fileobj is None:
                fh.close()
--- a/dataset/freeze/format/ftabson.py
+++ b/dataset/freeze/format/ftabson.py
@ -1,23 +0,0 @@
 from dataset.freeze.format.fjson import JSONSerializer
 class TabsonSerializer(JSONSerializer):
    def wrap(self, result):
        fields = []
        data = []
        if len(result):
            keys = list(result[0].keys())
            fields = [{'id': k} for k in keys]
            for row in result:
                d = [row.get(k) for k in keys]
                data.append(d)
        result = {
            'count': len(data),
            'fields': fields,
            'data': data
            }
        meta = self.export.get('meta', {})
        if meta is not None:
            result['meta'] = meta
        return result
--- a/dataset/persistence/init.py
+++ b/dataset/persistence/init.py
--- a/dataset/persistence/util.py
+++ b/dataset/persistence/util.py
@ -1,102 +0,0 @@
 try:
    from urlparse import urlparse
 except ImportError:
    from urllib.parse import urlparse
 try:
    from collections import OrderedDict
 except ImportError:  # pragma: no cover
    from ordereddict import OrderedDict
 from six import string_types
 from collections import Sequence
 from hashlib import sha1
 QUERY_STEP = 1000
 row_type = OrderedDict
 def convert_row(row_type, row):
    if row is None:
        return None
    return row_type(row.items())
 def iter_result_proxy(rp, step=None):
    """Iterate over the ResultProxy."""
    while True:
        if step is None:
            chunk = rp.fetchall()
        else:
            chunk = rp.fetchmany(step)
        if not chunk:
            break
        for row in chunk:
            yield row
 class ResultIter(object):
    """ SQLAlchemy ResultProxies are not iterable to get a
    list of dictionaries. This is to wrap them. """
    def __init__(self, result_proxy, row_type=row_type, step=None):
        self.row_type = row_type
        self.result_proxy = result_proxy
        self.keys = list(result_proxy.keys())
        self._iter = iter_result_proxy(result_proxy, step=step)
    def __next__(self):
        return convert_row(self.row_type, next(self._iter))
    next = __next__
    def __iter__(self):
        return self
    def close(self):
        self.result_proxy.close()
 def normalize_column_name(name):
    """Check if a string is a reasonable thing to use as a column name."""
    if not isinstance(name, string_types):
        raise ValueError('%r is not a valid column name.' % name)
    name = name.strip()
    if not len(name) or '.' in name or '-' in name:
        raise ValueError('%r is not a valid column name.' % name)
    return name
 def normalize_table_name(name):
    """Check if the table name is obviously invalid."""
    if not isinstance(name, string_types):
        raise ValueError("Invalid table name: %r" % name)
    name = name.strip()
    if not len(name):
        raise ValueError("Invalid table name: %r" % name)
    return name
 def safe_url(url):
    """Remove password from printed connection URLs."""
    parsed = urlparse(url)
    if parsed.password is not None:
        pwd = ':%s@' % parsed.password
        url = url.replace(pwd, ':*****@')
    return url
 def index_name(table, columns):
    """Generate an artificial index name."""
    sig = '||'.join(columns)
    key = sha1(sig.encode('utf-8')).hexdigest()[:16]
    return 'ix_%s_%s' % (table, key)
 def ensure_tuple(obj):
    """Try and make the given argument into a tuple."""
    if obj is None:
        return tuple()
    if isinstance(obj, Sequence) and not isinstance(obj, string_types):
        return tuple(obj)
    return obj,
--- a/dataset/persistence/table.py
+++ b/dataset/persistence/table.py
@ -9,11 +9,10 @@ from sqlalchemy import func, select, false
 from sqlalchemy.schema import Table as SQLATable
 from sqlalchemy.exc import NoSuchTableError
-from dataset.persistence.types import Types
+from dataset.types import Types
-from dataset.persistence.util import normalize_column_name, index_name
+from dataset.util import normalize_column_name, index_name, ensure_tuple
-from dataset.persistence.util import ensure_tuple, ResultIter, QUERY_STEP
+from dataset.util import DatasetException, ResultIter, QUERY_STEP
-from dataset.persistence.util import normalize_table_name
+from dataset.util import normalize_table_name
 from dataset.util import DatasetException
 log = logging.getLogger(__name__)
--- a/dataset/persistence/types.py
+++ b/dataset/persistence/types.py
--- a/dataset/util.py
+++ b/dataset/util.py
@ -1,12 +1,105 @@
-# coding: utf-8
+try:
-import re
+    from urlparse import urlparse
 except ImportError:
    from urllib.parse import urlparse
-SLUG_REMOVE = re.compile(r'[,\s\.\(\)/\\;:]*')
+try:
    from collections import OrderedDict
 except ImportError:  # pragma: no cover
    from ordereddict import OrderedDict
 from six import string_types
 from collections import Sequence
 from hashlib import sha1
 QUERY_STEP = 1000
 row_type = OrderedDict
 class DatasetException(Exception):
    pass
-class FreezeException(DatasetException):
+def convert_row(row_type, row):
-    pass
+    if row is None:
        return None
    return row_type(row.items())
 def iter_result_proxy(rp, step=None):
    """Iterate over the ResultProxy."""
    while True:
        if step is None:
            chunk = rp.fetchall()
        else:
            chunk = rp.fetchmany(step)
        if not chunk:
            break
        for row in chunk:
            yield row
 class ResultIter(object):
    """ SQLAlchemy ResultProxies are not iterable to get a
    list of dictionaries. This is to wrap them. """
    def __init__(self, result_proxy, row_type=row_type, step=None):
        self.row_type = row_type
        self.result_proxy = result_proxy
        self.keys = list(result_proxy.keys())
        self._iter = iter_result_proxy(result_proxy, step=step)
    def __next__(self):
        return convert_row(self.row_type, next(self._iter))
    next = __next__
    def __iter__(self):
        return self
    def close(self):
        self.result_proxy.close()
 def normalize_column_name(name):
    """Check if a string is a reasonable thing to use as a column name."""
    if not isinstance(name, string_types):
        raise ValueError('%r is not a valid column name.' % name)
    name = name.strip()
    if not len(name) or '.' in name or '-' in name:
        raise ValueError('%r is not a valid column name.' % name)
    return name
 def normalize_table_name(name):
    """Check if the table name is obviously invalid."""
    if not isinstance(name, string_types):
        raise ValueError("Invalid table name: %r" % name)
    name = name.strip()
    if not len(name):
        raise ValueError("Invalid table name: %r" % name)
    return name
 def safe_url(url):
    """Remove password from printed connection URLs."""
    parsed = urlparse(url)
    if parsed.password is not None:
        pwd = ':%s@' % parsed.password
        url = url.replace(pwd, ':*****@')
    return url
 def index_name(table, columns):
    """Generate an artificial index name."""
    sig = '||'.join(columns)
    key = sha1(sig.encode('utf-8')).hexdigest()[:16]
    return 'ix_%s_%s' % (table, key)
 def ensure_tuple(obj):
    """Try and make the given argument into a tuple."""
    if obj is None:
        return tuple()
    if isinstance(obj, Sequence) and not isinstance(obj, string_types):
        return tuple(obj)
    return obj,
--- a/docs/conf.py
+++ b/docs/conf.py
@ -48,9 +48,9 @@ copyright = u'2013-2015, Friedrich Lindenberg, Gregor Aisch, Stefan Wehrmeyer'
 # built documents.
 #
 # The short X.Y version.
-version = '0.6'
+version = '1.0'
 # The full version, including alpha/beta/rc tags.
-release = '0.6.0'
+release = '1.0.0'
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
--- a/docs/freezefile.rst
+++ b/docs/freezefile.rst
@ -1,98 +0,0 @@
 Freezefiles and the ``datafreeze`` command
 ==========================================
 ``datafreeze`` creates static extracts of SQL databases for use in interactive
 web applications. SQL databases are a great way to manage relational data, but
 exposing them on the web to drive data apps can be cumbersome. Often, the
 capacities of a proper database are not actually required, a few static JSON
 files and a bit of JavaScript can have the same effect. Still, exporting JSON
 by hand (or with a custom script) can also become a messy process.
 With ``datafreeze``, exports are scripted in a Makefile-like description, making them simple to repeat and replicate.
 Basic Usage
 -----------
 Calling DataFreeze is simple, the application is called with a
 freeze file as its argument:
 .. code-block:: bash
    datafreeze Freezefile.yaml
 Freeze files can be either written in JSON or in YAML. The database URI 
 indicated in the Freezefile can also be overridden via the command line:
    datafreeze --db sqlite:///foo.db Freezefile.yaml
 Example Freezefile.yaml
 -----------------------
 A freeze file is composed of a set of scripted queries and
 specifications on how their output is to be handled. An example could look
 like this:
 .. code-block:: yaml
    common:
      database: "postgresql://user:password@localhost/operational_database"
      prefix: my_project/dumps/
      format: json
    exports:
      - query: "SELECT id, title, date FROM events"
        filename: "index.json"
      - query: "SELECT id, title, date, country FROM events"
        filename: "countries/{{country}}.csv"
        format: csv
      - query: "SELECT * FROM events"
        filename: "events/{{id}}.json"
        mode: item
      - query: "SELECT * FROM events"
        filename: "all.json"
        format: tabson
 An identical JSON configuration can be found in this repository.
 Options in detail
 -----------------
 The freeze file has two main sections, ``common`` and ``exports``. Both
 accept many of the same arguments, with ``exports`` specifying a list of 
 exports while ``common`` defines some shared properties, such as the 
 database connection string.
 The following options are recognized: 
 * ``database`` is a database URI, including the database type, username 
  and password, hostname and database name. Valid database types include 
  ``sqlite``, ``mysql`` and ``postgresql`` (requires psycopg2).
 * ``prefix`` specifies a common root directory for all extracted files.
 * ``format`` identifies the format to be generated, ``csv``, ``json`` and
  ``tabson`` are supported. ``tabson`` is a condensed JSON
  representation in which rows are not represented by objects but by
  lists of values.
 * ``query`` needs to be a valid SQL statement. All selected fields will
  become keys or columns in the output, so it may make sense to define 
  proper aliases if any overlap is to be expected.
 * ``mode`` specifies whether the query output is to be combined into a 
  single file (``list``) or whether a file should be generated for each 
  result row (``item``).
 * ``filename`` is the output file name, appended to ``prefix``. All
  occurences of ``{{field}}`` are expanded to a fields value to allow the
  generation of file names e.g. by primary key. In list mode, templating
  can be used to group records into several buckets, e.g. by country or
  category.
 * ``wrap`` can be used to specify whether the output should be wrapped 
  in a ``results`` hash in JSON output. This defaults to ``true`` for 
  ``list``-mode output and ``false`` for ``item``-mode. 
--- a/docs/index.rst
+++ b/docs/index.rst
@ -10,22 +10,19 @@ dataset: databases for lazy people
   :hidden:
-Although managing data in relational database has plenty of benefits, they're rarely used in day-to-day work with small to medium scale datasets. But why is that? Why do we see an awful lot of data stored in static files in CSV or JSON format, even though they are hard
+Although managing data in relational database has plenty of benefits, they're
-to query and update incrementally?
+rarely used in day-to-day work with small to medium scale datasets. But why is
 that? Why do we see an awful lot of data stored in static files in CSV or JSON
 format, even though they are hard to query and update incrementally?
-The answer is that **programmers are lazy**, and thus they tend to prefer the easiest solution they find. And in **Python**, a database isn't the simplest solution for storing a bunch of structured data. This is what **dataset** is going to change!
+The answer is that **programmers are lazy**, and thus they tend to prefer the
 easiest solution they find. And in **Python**, a database isn't the simplest
 solution for storing a bunch of structured data. This is what **dataset** is
 going to change!
-**dataset** provides two key functions that make using SQL databases in
+**dataset** provides a simple abstraction layer removes most direct SQL
-Python a breeze: 
+statements without the necessity for a full ORM model - essentially, databases
-
+can be used like a JSON file or NoSQL store.
 * A simple abstraction layer removes most direct SQL statements without
  the necessity for a full ORM model - essentially, databases can be
  used like a JSON file or NoSQL store.
 * Database contents can be exported (*frozen*) using a :doc:`sophisticated
  plain file generator <freezefile>` with JSON and CSV support. Exports can be configured
  to include metadata and dynamic file names depending on the exported
  data. The exporter can also be used as a command-line tool, ``datafreeze``.
 A simple data loading script using **dataset** might look like this:
@ -55,8 +52,6 @@ Features
 * **Query helpers** for simple queries such as :py:meth:`all <dataset.Table.all>` rows in a table or
  all :py:meth:`distinct <dataset.Table.distinct>` values across a set of columns.
 * **Compatibility**: Being built on top of `SQLAlchemy <http://www.sqlalchemy.org/>`_, ``dataset`` works with all major databases, such as SQLite, PostgreSQL and MySQL.
 * **Scripted exports**: Data can be exported based on a scripted
  configuration, making the process easy and replicable.
 Contents
 --------
@ -66,12 +61,14 @@ Contents
   install
   quickstart
   freezefile
   api
 Contributors
 ------------
-``dataset`` is written and maintained by `Friedrich Lindenberg <https://github.com/pudo>`_, `Gregor Aisch <https://github.com/gka>`_ and `Stefan Wehrmeyer <https://github.com/stefanw>`_. Its code is largely based on the preceding libraries `sqlaload <https://github.com/okfn/sqlaload>`_ and datafreeze. And of course, we're standing on the `shoulders of giants <http://www.sqlalchemy.org/>`_.
+``dataset`` is written and maintained by `Friedrich Lindenberg <https://github.com/pudo>`_,
 `Gregor Aisch <https://github.com/gka>`_ and `Stefan Wehrmeyer <https://github.com/stefanw>`_.
 Its code is largely based on the preceding libraries `sqlaload <https://github.com/okfn/sqlaload>`_
 and datafreeze. And of course, we're standing on the `shoulders of giants <http://www.sqlalchemy.org/>`_.
 Our cute little `naked mole rat <http://www.youtube.com/watch?feature=player_detailpage&v=A5DcOEzW1wA#t=14s>`_ was drawn by `Johannes Koch <http://chechuchape.com/>`_.
--- a/docs/install.rst
+++ b/docs/install.rst
@ -2,7 +2,8 @@
 Installation Guide
 ==================
-The easiest way is to install ``dataset`` from the `Python Package Index <https://pypi.python.org/pypi/dataset/>`_ using ``pip`` or ``easy_install``:
+The easiest way is to install ``dataset`` from the `Python Package Index
 <https://pypi.python.org/pypi/dataset/>`_ using ``pip`` or ``easy_install``:
 .. code-block:: bash
@ -16,4 +17,6 @@ To install it manually simply download the repository from Github:
   $ cd dataset/
   $ python setup.py install
-Depending on the type of database backend, you may also need to install a database specific driver package. For MySQL, this is ``MySQLdb``, for Postgres its ``psycopg2``. SQLite support is integrated into Python.
+Depending on the type of database backend, you may also need to install a
 database specific driver package. For MySQL, this is ``MySQLdb``, for Postgres
 its ``psycopg2``. SQLite support is integrated into Python.
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@ -30,8 +30,8 @@ so you can initialize database connection without explicitly passing an `URL`::
 Depending on which database you're using, you may also have to install
 the database bindings to support that database. SQLite is included in
-the Python core, but PostgreSQL requires ``psycopg2`` to be installed. 
+the Python core, but PostgreSQL requires ``psycopg2`` to be installed.
-MySQL can be enabled by installing the ``mysql-db`` drivers. 
+MySQL can be enabled by installing the ``mysql-db`` drivers.
 Storing data
@ -110,7 +110,7 @@ database:
 Now, let's list all columns available in the table ``user``:
   >>> print(db['user'].columns)
-   [u'id', u'country', u'age', u'name', u'gender'] 
+   [u'id', u'country', u'age', u'name', u'gender']
 Using ``len()`` we can get the total number of rows in a table:
@ -156,7 +156,7 @@ results will be returned::
    db = dataset.connect('sqlite:///mydatabase.db', row_type=stuf)
 Now contents will be returned in ``stuf`` objects (basically, ``dict``
-objects whose elements can be acessed as attributes (``item.name``) as well as 
+objects whose elements can be acessed as attributes (``item.name``) as well as
 by index (``item['name']``).
 Running custom SQL queries
@ -169,36 +169,10 @@ use the full power of SQL queries. Here's how you run them with ``dataset``::
   for row in result:
      print(row['country'], row['c'])
-The :py:meth:`query() <dataset.Table.query>` method can also be used to 
+The :py:meth:`query() <dataset.Table.query>` method can also be used to
 access the underlying `SQLAlchemy core API <http://docs.sqlalchemy.org/en/latest/orm/query.html#the-query-object>`_, which allows for the
 programmatic construction of more complex queries::
   table = db['user'].table
   statement = table.select(table.c.name.like('%John%'))
-   result = db.query(statement) 
+   result = db.query(statement)
 Exporting data
 --------------
 While playing around with our database in Python is a nice thing, they are 
 sometimes just a processing stage until we go on to use it in another
 place, say in an interactive web application. To make this seamless,
 ``dataset`` supports serializing rows of data into static JSON and CSV files
 such using the :py:meth:`freeze() <dataset.freeze>` function::
   # export all users into a single JSON
   result = db['users'].all()
   dataset.freeze(result, format='json', filename='users.json')
 You can create one file per row by setting ``mode`` to "item"::
   # export one JSON file per user
   dataset.freeze(result, format='json', filename='users/{{ id }}.json', mode='item')
 Since this is a common operation we made it available via command line
 utility ``datafreeze``. Read more about the :doc:`freezefile markup <freezefile>`.
 .. code-block:: bash
   $ datafreeze freezefile.yaml
--- a/setup.py
+++ b/setup.py
@ -8,7 +8,7 @@ if sys.version_info[:2] <= (2, 6):
 setup(
    name='dataset',
-    version='0.8.0',
+    version='1.0.0',
    description="Toolkit for Python-based data processing.",
    long_description="",
    classifiers=[
@ -34,14 +34,9 @@ setup(
        'sqlalchemy >= 1.1.0',
        'alembic >= 0.6.2',
        'normality >= 0.3.9',
        "PyYAML >= 3.10",
        "six >= 1.7.3"
    ] + py26_dependency,
    tests_require=[],
    test_suite='test',
-    entry_points={
+    entry_points={}
        'console_scripts': [
            'datafreeze = dataset.freeze.app:main',
        ]
    }
 )
--- a/test/Freezefile.yaml
+++ b/test/Freezefile.yaml
@ -1,32 +0,0 @@
 common:
  database: "postgresql://user:password@localhost/operational_database"
  prefix: my_project/dumps/
  format: json
  nested:
    property: "inner"
 exports:
  - query: "SELECT id, title, date FROM events"
    filename: "index.json"
    number: 5
    bool: true
    nested:
      property: "override"
  - query: "SELECT id, title, date, country FROM events"
    filename: "countries/{{country}}.csv"
    format: csv
  - query: "SELECT * FROM events"
    filename: "events/{{id}}.json"
    mode: item
    wrap: true
  - query: "SELECT * FROM events"
    filename: "all.json"
    format: tabson
--- a/test/test_freeze_app.py
+++ b/test/test_freeze_app.py
@ -1,67 +0,0 @@
 # coding: utf-8
 """
 Test CLI following the recipe at http://dustinrcollins.com/testing-python-command-line-apps
 """
 import os
 import unittest
 from tempfile import mkdtemp
 from shutil import rmtree
 from copy import copy
 from six import StringIO
 from dataset import connect
 from dataset.util import FreezeException
 from dataset.freeze.config import Configuration, Export
 from dataset.freeze.app import create_parser, freeze_with_config, freeze_export
 from .sample_data import TEST_DATA
 class FreezeAppTestCase(unittest.TestCase):
    """
    Base TestCase class, sets up a CLI parser
    """
    def setUp(self):
        parser = create_parser()
        self.parser = parser
        self.d = mkdtemp()
        self.db_path = os.path.abspath(os.path.join(self.d, 'db.sqlite'))
        self.db = 'sqlite:///' + self.db_path
        _db = connect(self.db)
        tbl = _db['weather']
        for i, row in enumerate(TEST_DATA):
            _row = copy(row)
            _row['count'] = i
            _row['bool'] = True
            _row['none'] = None
            tbl.insert(_row)
    def tearDown(self):
        rmtree(self.d, ignore_errors=True)
    def test_with_config(self):
        cfg = Configuration(os.path.join(os.path.dirname(__file__), 'Freezefile.yaml'))
        cfg.data['common']['database'] = self.db
        cfg.data['common']['prefix'] = self.d
        cfg.data['common']['query'] = 'SELECT * FROM weather'
        cfg.data['exports'] = [
            {'filename': '{{identity:count}}.json', 'mode': 'item', 'transform': {'bool': 'identity'}},
            {'filename': 'weather.json', 'format': 'tabson'},
            {'filename': 'weather.csv', 'fileobj': StringIO(), 'format': 'csv'},
            {'filename': 'weather.json', 'fileobj': StringIO(), 'format': 'tabson'},
            {'filename': 'weather.json', 'format': 'tabson', 'callback': 'read'},
            {'skip': True}]
        freeze_with_config(cfg, db=self.db)
        self.assertRaises(FreezeException, freeze_export, Export(cfg.data['common'], {'query': 'SELECT * FROM notable'}))
    def test_unicode_path(self):
        cfg = Configuration(os.path.join(os.path.dirname(__file__), 'Freezefile.yaml'))
        cfg.data['common']['database'] = self.db
        cfg.data['common']['prefix'] = os.path.join(self.d, u'über')
        cfg.data['common']['query'] = 'SELECT * FROM weather'
        cfg.data['exports'] = [{'filename': 'weather.csv', 'format': 'csv'}]
        freeze_with_config(cfg, db=self.db)
 if __name__ == '__main__':
    unittest.main()