Speed up insert_many by sync columns before input, not on the go

This commit is contained in:
Abdurrahmaan Iqbal 2019-07-08 23:08:30 +01:00
parent a9f3eb86b2
commit 76b6165181
2 changed files with 15 additions and 8 deletions

View File

@ -122,17 +122,28 @@ class Table(object):
rows = [dict(name='Dolly')] * 10000
table.insert_many(rows)
"""
# Sync table before inputting rows.
sync_row = {}
for row in rows:
# Only get non-existing columns.
for key in set(row.keys()).difference(set(sync_row.keys())):
# Get a sample of the new column(s) from the row.
sync_row[key] = row[key]
self._sync_columns(sync_row, ensure, types=types)
# Get columns name list to be used for padding later.
columns = sync_row.keys()
chunk = []
for row in rows:
row = self._sync_columns(row, ensure, types=types)
chunk.append(row)
if len(chunk) == chunk_size:
chunk = pad_chunk_columns(chunk)
chunk = pad_chunk_columns(chunk, columns)
self.table.insert().execute(chunk)
chunk = []
if len(chunk):
chunk = pad_chunk_columns(chunk)
chunk = pad_chunk_columns(chunk, columns)
self.table.insert().execute(chunk)
def update(self, row, keys, ensure=None, types=None, return_count=False):
@ -198,7 +209,6 @@ class Table(object):
)
self.db.executable.execute(stmt, chunk)
chunk = []
columns = set()
def upsert(self, row, keys, ensure=None, types=None):
"""An UPSERT is a smart combination of insert and update.

View File

@ -108,12 +108,9 @@ def ensure_tuple(obj):
return obj,
def pad_chunk_columns(chunk):
def pad_chunk_columns(chunk, columns):
"""Given a set of items to be inserted, make sure they all have the
same columns by padding columns with None if they are missing."""
columns = set()
for record in chunk:
columns.update(record.keys())
for record in chunk:
for column in columns:
record.setdefault(column, None)