# Tools for data merge from two databases into third one
def read_rows(cur, table_name, column_spec, pk, pk_values):
ps = cur.prep("select %s from %s where %s = ?" % (column_spec, table_name, pk))
for pk_value in pk_values:
cur.execute(ps,pk_value)
for out_row in cur:
yield out_row
def copy_data(target, insert, sources):
for source in sources:
target.executemany(insert,source)
def merge_table(con_A, con_B, con_target, table_name, pk):
# Cursors
cursor_A = con_A.cursor()
cursor_B = con_B.cursor()
cursor_target = con_target.cursor()
# Get column names
ps = cursor_A.prep("select * from %s" % table_name)
column_names = [desc[0] for desc in ps.description]
column_spec = ",".join(column_names)
# Get information about data
cursor_A.execute("select %s from %s" % (pk, table_name))
pks_A = set(cursor_A)
cursor_B.execute("select %s from %s" % (pk, table_name))
pks_B = set(cursor_B)
miss_A = pks_B - pks_A
miss_B = pks_A - pks_B
# merge in best possible way
select_all = "select %s from %s" % (column_spec, table_name)
insert = cursor_target.prep("insert into %s (%s) values (%s)" % (table_name, column_spec,
','.join('?' for i in column_names)))
if not miss_A or not (miss_A or miss_B):
# Both are equal or A is superset of B, move A only
cursor_A.execute(select_all)
copy_data(cursor_target, insert, (cursor_A,))
elif miss_A and not miss_B:
# B is superset of A, move B only
cursor_B.execute(select_all)
copy_data(cursor_target, insert, (cursor_B,))
elif pks_A.isdisjoint(pks_B):
# Tables don't overlap, copy both
cursor_A.execute(select_all)
cursor_B.execute(select_all)
copy_data(cursor_target, insert, (cursor_A, cursor_B))
elif len(miss_A) < len(miss_B):
# Copy all from A, add B
cursor_A.execute(select_all)
copy_data(cursor_target, insert, (cursor_A,
read_rows(cursor_B, table_name, column_spec, pk, miss_A)))
else:
# Copy all from B, add A
cursor_B.execute(select_all)
copy_data(cursor_target, insert, (cursor_B,
read_rows(cursor_A, table_name, column_spec, pk, miss_B)))
con_target.commit()