fbmerge.py

# Tools for data merge from two databases into third one

def read_rows(cur, table_name, column_spec, pk, pk_values):
    ps = cur.prep("select %s from %s where %s = ?" % (column_spec, table_name, pk))
    for pk_value in pk_values:
        cur.execute(ps,pk_value)
        for out_row in cur:
            yield out_row

def copy_data(target, insert, sources):
    for source in sources:
        target.executemany(insert,source)

def merge_table(con_A, con_B, con_target, table_name, pk):
    # Cursors
    cursor_A = con_A.cursor()
    cursor_B = con_B.cursor()
    cursor_target = con_target.cursor()

    # Get column names
    ps = cursor_A.prep("select * from %s" % table_name)
    column_names = [desc[0] for desc in ps.description]
    column_spec = ",".join(column_names)

    # Get information about data
    cursor_A.execute("select %s from %s" % (pk, table_name))
    pks_A = set(cursor_A)
    cursor_B.execute("select %s from %s" % (pk, table_name))
    pks_B = set(cursor_B)
    miss_A = pks_B - pks_A
    miss_B = pks_A - pks_B

    # merge in best possible way
    select_all = "select %s from %s" % (column_spec, table_name)
    insert = cursor_target.prep("insert into %s (%s) values (%s)" % (table_name, column_spec,
         ','.join('?' for i in column_names)))
    if not miss_A or not (miss_A or miss_B):
        # Both are equal or A is superset of B, move A only
        cursor_A.execute(select_all)
        copy_data(cursor_target, insert, (cursor_A,))
    elif miss_A and not miss_B:
        # B is superset of A, move B only
        cursor_B.execute(select_all)
        copy_data(cursor_target, insert, (cursor_B,))
    elif pks_A.isdisjoint(pks_B):
        # Tables don't overlap, copy both
        cursor_A.execute(select_all)
        cursor_B.execute(select_all)
        copy_data(cursor_target, insert, (cursor_A, cursor_B))
    elif len(miss_A) < len(miss_B):
        # Copy all from A, add B
        cursor_A.execute(select_all)
        copy_data(cursor_target, insert, (cursor_A,
            read_rows(cursor_B, table_name, column_spec, pk, miss_A)))
    else:
        # Copy all from B, add A
        cursor_B.execute(select_all)
        copy_data(cursor_target, insert, (cursor_B,
            read_rows(cursor_A, table_name, column_spec, pk, miss_B)))
    con_target.commit()

Previous topic

Examples for Fun with Firebird and Python

Next topic

merge_all.py