Speed up insert_many by sync columns before input, not on the go

This commit is contained in:
Abdurrahmaan Iqbal 2019-07-08 23:08:30 +01:00
parent a9f3eb86b2
commit 76b6165181
2 changed files with 15 additions and 8 deletions

View File

@ -122,17 +122,28 @@ class Table(object):
rows = [dict(name='Dolly')] * 10000 rows = [dict(name='Dolly')] * 10000
table.insert_many(rows) table.insert_many(rows)
""" """
# Sync table before inputting rows.
sync_row = {}
for row in rows:
# Only get non-existing columns.
for key in set(row.keys()).difference(set(sync_row.keys())):
# Get a sample of the new column(s) from the row.
sync_row[key] = row[key]
self._sync_columns(sync_row, ensure, types=types)
# Get columns name list to be used for padding later.
columns = sync_row.keys()
chunk = [] chunk = []
for row in rows: for row in rows:
row = self._sync_columns(row, ensure, types=types)
chunk.append(row) chunk.append(row)
if len(chunk) == chunk_size: if len(chunk) == chunk_size:
chunk = pad_chunk_columns(chunk) chunk = pad_chunk_columns(chunk, columns)
self.table.insert().execute(chunk) self.table.insert().execute(chunk)
chunk = [] chunk = []
if len(chunk): if len(chunk):
chunk = pad_chunk_columns(chunk) chunk = pad_chunk_columns(chunk, columns)
self.table.insert().execute(chunk) self.table.insert().execute(chunk)
def update(self, row, keys, ensure=None, types=None, return_count=False): def update(self, row, keys, ensure=None, types=None, return_count=False):
@ -198,7 +209,6 @@ class Table(object):
) )
self.db.executable.execute(stmt, chunk) self.db.executable.execute(stmt, chunk)
chunk = [] chunk = []
columns = set()
def upsert(self, row, keys, ensure=None, types=None): def upsert(self, row, keys, ensure=None, types=None):
"""An UPSERT is a smart combination of insert and update. """An UPSERT is a smart combination of insert and update.

View File

@ -108,12 +108,9 @@ def ensure_tuple(obj):
return obj, return obj,
def pad_chunk_columns(chunk): def pad_chunk_columns(chunk, columns):
"""Given a set of items to be inserted, make sure they all have the """Given a set of items to be inserted, make sure they all have the
same columns by padding columns with None if they are missing.""" same columns by padding columns with None if they are missing."""
columns = set()
for record in chunk:
columns.update(record.keys())
for record in chunk: for record in chunk:
for column in columns: for column in columns:
record.setdefault(column, None) record.setdefault(column, None)