Speed up insert_many by sync columns before input, not on the go
This commit is contained in:
parent
a9f3eb86b2
commit
76b6165181
@ -122,17 +122,28 @@ class Table(object):
|
|||||||
rows = [dict(name='Dolly')] * 10000
|
rows = [dict(name='Dolly')] * 10000
|
||||||
table.insert_many(rows)
|
table.insert_many(rows)
|
||||||
"""
|
"""
|
||||||
|
# Sync table before inputting rows.
|
||||||
|
sync_row = {}
|
||||||
|
for row in rows:
|
||||||
|
# Only get non-existing columns.
|
||||||
|
for key in set(row.keys()).difference(set(sync_row.keys())):
|
||||||
|
# Get a sample of the new column(s) from the row.
|
||||||
|
sync_row[key] = row[key]
|
||||||
|
self._sync_columns(sync_row, ensure, types=types)
|
||||||
|
|
||||||
|
# Get columns name list to be used for padding later.
|
||||||
|
columns = sync_row.keys()
|
||||||
|
|
||||||
chunk = []
|
chunk = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
row = self._sync_columns(row, ensure, types=types)
|
|
||||||
chunk.append(row)
|
chunk.append(row)
|
||||||
if len(chunk) == chunk_size:
|
if len(chunk) == chunk_size:
|
||||||
chunk = pad_chunk_columns(chunk)
|
chunk = pad_chunk_columns(chunk, columns)
|
||||||
self.table.insert().execute(chunk)
|
self.table.insert().execute(chunk)
|
||||||
chunk = []
|
chunk = []
|
||||||
|
|
||||||
if len(chunk):
|
if len(chunk):
|
||||||
chunk = pad_chunk_columns(chunk)
|
chunk = pad_chunk_columns(chunk, columns)
|
||||||
self.table.insert().execute(chunk)
|
self.table.insert().execute(chunk)
|
||||||
|
|
||||||
def update(self, row, keys, ensure=None, types=None, return_count=False):
|
def update(self, row, keys, ensure=None, types=None, return_count=False):
|
||||||
@ -198,7 +209,6 @@ class Table(object):
|
|||||||
)
|
)
|
||||||
self.db.executable.execute(stmt, chunk)
|
self.db.executable.execute(stmt, chunk)
|
||||||
chunk = []
|
chunk = []
|
||||||
columns = set()
|
|
||||||
|
|
||||||
def upsert(self, row, keys, ensure=None, types=None):
|
def upsert(self, row, keys, ensure=None, types=None):
|
||||||
"""An UPSERT is a smart combination of insert and update.
|
"""An UPSERT is a smart combination of insert and update.
|
||||||
|
|||||||
@ -108,12 +108,9 @@ def ensure_tuple(obj):
|
|||||||
return obj,
|
return obj,
|
||||||
|
|
||||||
|
|
||||||
def pad_chunk_columns(chunk):
|
def pad_chunk_columns(chunk, columns):
|
||||||
"""Given a set of items to be inserted, make sure they all have the
|
"""Given a set of items to be inserted, make sure they all have the
|
||||||
same columns by padding columns with None if they are missing."""
|
same columns by padding columns with None if they are missing."""
|
||||||
columns = set()
|
|
||||||
for record in chunk:
|
|
||||||
columns.update(record.keys())
|
|
||||||
for record in chunk:
|
for record in chunk:
|
||||||
for column in columns:
|
for column in columns:
|
||||||
record.setdefault(column, None)
|
record.setdefault(column, None)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user