diff --git a/dataset/__init__.py b/dataset/__init__.py index 74d6bc1..150e30c 100644 --- a/dataset/__init__.py +++ b/dataset/__init__.py @@ -5,7 +5,7 @@ warnings.filterwarnings( from dataset.persistence.database import Database from dataset.persistence.table import Table - +from dataset.freeze.app import freeze def connect(url): """ @@ -18,3 +18,5 @@ def connect(url): .. _SQLAlchemy Engine URL: http://docs.sqlalchemy.org/en/latest/core/engines.html#sqlalchemy.create_engine """ return Database(url) + + diff --git a/dataset/freeze/app.py b/dataset/freeze/app.py index 1f57700..35a0092 100644 --- a/dataset/freeze/app.py +++ b/dataset/freeze/app.py @@ -1,9 +1,10 @@ import logging import argparse +from sqlalchemy.exc import ProgrammingError from dataset.util import FreezeException -from dataset.freeze.config import Configuration -from dataset.freeze.engine import ExportEngine +from dataset.persistence.database import Database +from dataset.freeze.config import Configuration, Export from dataset.freeze.format import get_serializer @@ -16,6 +17,37 @@ parser = argparse.ArgumentParser( parser.add_argument('config', metavar='CONFIG', type=str, help='freeze file cofiguration') +def freeze(database, query, format='csv', filename='freeze.csv', + prefix='.', meta={}, indent=2, mode='list', wrap=True, **kw): + """ + Perform a data export of a given SQL statement. This is a very + flexible exporter, allowing for various output formats, metadata + assignment, and file name templating to dump each record (or a set + of records) into individual files. + """ + kw.update({ + 'database': database, + 'query': query, + 'format': format, + 'filename': filename, + 'prefix': prefix, + 'meta': meta, + 'indent': indent, + 'mode': mode, + 'wrap': wrap + }) + return freeze_export(Export(kw)) + +def freeze_export(export): + try: + database = Database(export.get('database')) + query = database.query(export.get('query')) + serializer_cls = get_serializer(export) + serializer = serializer_cls(export, query) + serializer.serialize() + except ProgrammingError, pe: + raise FreezeException("Invalid query: %s" % pe) + def main(): try: args = parser.parse_args() @@ -25,13 +57,10 @@ def main(): log.info("Skipping: %s", export.name) continue log.info("Running: %s", export.name) - engine = ExportEngine(export) - query = engine.query() - serializer_cls = get_serializer(export) - serializer = serializer_cls(engine) - serializer.serialize() + freeze_export(export) except FreezeException, fe: log.error(fe) if __name__ == '__main__': main() + diff --git a/dataset/freeze/engine.py b/dataset/freeze/engine.py deleted file mode 100644 index 3fcf229..0000000 --- a/dataset/freeze/engine.py +++ /dev/null @@ -1,42 +0,0 @@ -from sqlalchemy import create_engine -from sqlalchemy.exc import ProgrammingError - -from dataset.util import FreezeException - -class Query(object): - - def __init__(self, query, rp): - self.query = query - self.rp = rp - - def __len__(self): - return self.rp.rowcount - - def __iter__(self): - keys = self.rp.keys() - while True: - row = self.rp.fetchone() - if row is None: - return - yield dict(zip(keys, row)) - - -class ExportEngine(object): - - def __init__(self, config): - self.config = config - - @property - def engine(self): - if not hasattr(self, '_engine'): - self._engine = create_engine(self.config.get('database')) - return self._engine - - def query(self): - try: - q = self.config.get('query') - rp = self.engine.execute(q) - return Query(q, rp) - except ProgrammingError, pe: - raise FreezeException("Invalid query: %s - %s" % (q, pe)) - diff --git a/dataset/freeze/format/common.py b/dataset/freeze/format/common.py index bc00af5..a186c5d 100644 --- a/dataset/freeze/format/common.py +++ b/dataset/freeze/format/common.py @@ -17,18 +17,18 @@ OPERATIONS = { class Serializer(object): - def __init__(self, engine): - self.engine = engine - self.config = engine.config + def __init__(self, export, query): + self.export = export + self.query = query self._paths = [] self._get_basepath() def _get_basepath(self): - prefix = self.config.get('prefix') + prefix = self.export.get('prefix') prefix = os.path.abspath(prefix) prefix = os.path.realpath(prefix) self._prefix = prefix - filename = self.config.get('filename') + filename = self.export.get('filename') if filename is None: raise FreezeException("No 'filename' is specified") self._basepath = os.path.join(prefix, filename) @@ -56,20 +56,19 @@ class Serializer(object): @property def mode(self): - mode = self.config.get_normalized('mode', 'list') + mode = self.export.get_normalized('mode', 'list') if mode not in ['list', 'item']: raise FreezeException("Invalid mode: %s" % mode) return mode @property def wrap(self): - return self.config.get_bool('wrap', + return self.export.get_bool('wrap', default=self.mode=='list') def serialize(self): self.init() - query = self.engine.query() - for row in query: + for row in self.query: self.write(self.file_name(row), row) self.close() diff --git a/dataset/freeze/format/fjson.py b/dataset/freeze/format/fjson.py index 446a33e..f0fc8ad 100644 --- a/dataset/freeze/format/fjson.py +++ b/dataset/freeze/format/fjson.py @@ -21,15 +21,14 @@ class JSONSerializer(Serializer): self.buckets[path].append(result) def wrap(self, result): - count = len(result) if self.mode == 'item': result = result[0] if self.wrap: result = { - 'count': count, + 'count': self.query.count, 'results': result } - meta = self.config.get('meta') + meta = self.export.get('meta', {}) if meta is not None: result['meta'] = meta return result @@ -40,6 +39,6 @@ class JSONSerializer(Serializer): fh = open(path, 'wb') json.dump(result, fh, cls=JSONEncoder, - indent=self.config.get_int('indent')) + indent=self.export.get_int('indent')) fh.close() diff --git a/dataset/freeze/format/ftabson.py b/dataset/freeze/format/ftabson.py index f49aefc..6b90468 100644 --- a/dataset/freeze/format/ftabson.py +++ b/dataset/freeze/format/ftabson.py @@ -13,11 +13,11 @@ class TabsonSerializer(JSONSerializer): d = [row.get(k) for k in keys] data.append(d) result = { - 'count': len(result), + 'count': self.query.count, 'fields': fields, 'data': data } - meta = self.config.get('meta') + meta = self.export.get('meta', {}) if meta is not None: result['meta'] = meta return result diff --git a/dataset/persistence/database.py b/dataset/persistence/database.py index 8241f05..3536068 100644 --- a/dataset/persistence/database.py +++ b/dataset/persistence/database.py @@ -9,7 +9,7 @@ from sqlalchemy.schema import Table as SQLATable from sqlalchemy import Integer from dataset.persistence.table import Table -from dataset.persistence.util import resultiter +from dataset.persistence.util import ResultIter log = logging.getLogger(__name__) @@ -37,7 +37,8 @@ class Database(object): >>> print db.tables set([u'user', u'action']) """ - return set(self.metadata.tables.keys() + self._tables.keys()) + return list(set(self.metadata.tables.keys() + + self._tables.keys())) def create_table(self, table_name): """ @@ -111,7 +112,8 @@ class Database(object): for row in res: print row['user'], row['c'] """ - return resultiter(self.engine.execute(query)) + return ResultIter(self.engine.execute(query)) def __repr__(self): return '' % self.url + diff --git a/dataset/persistence/table.py b/dataset/persistence/table.py index 356787e..a143967 100644 --- a/dataset/persistence/table.py +++ b/dataset/persistence/table.py @@ -319,3 +319,4 @@ class Table(object): """ for row in self.all(): yield row + diff --git a/dataset/persistence/util.py b/dataset/persistence/util.py index 9287549..bd902c1 100644 --- a/dataset/persistence/util.py +++ b/dataset/persistence/util.py @@ -15,15 +15,22 @@ def guess_type(sample): return UnicodeText -def resultiter(rp): +class ResultIter(object): """ SQLAlchemy ResultProxies are not iterable to get a list of dictionaries. This is to wrap them. """ - keys = rp.keys() - while True: - row = rp.fetchone() + + def __init__(self, rp): + self.rp = rp + self.count = rp.rowcount + self.keys = self.rp.keys() + + def next(self): + row = self.rp.fetchone() if row is None: - break - yield dict(zip(keys, row)) - + raise StopIteration + return dict(zip(self.keys, row)) + + def __iter__(self): + return self diff --git a/docs/api.rst b/docs/api.rst index dc6b070..c302ab8 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -4,6 +4,8 @@ API documentation .. autofunction:: dataset.connect +.. autofunction:: dataset.freeze + Database --------