#!/usr/bin/env python3 """ Merge Duplicate Contacts This script finds and merges duplicate contacts in the database. Duplicates are defined as contacts with the same Ethereum address. Usage: python merge_duplicate_contacts.py """ import os import sys import argparse import psycopg2 from psycopg2.extras import RealDictCursor from dotenv import load_dotenv # Load environment variables load_dotenv() def merge_duplicate_contacts(): """ Find and merge duplicate contacts. """ # Get database connection string from environment variables db_url = os.getenv("PYTHON_DATABASE_URL") if not db_url: db_url = os.getenv("DATABASE_URL").split("?schema=")[0] # Connect to the database conn = psycopg2.connect(db_url) conn.autocommit = True try: with conn.cursor(cursor_factory=RealDictCursor) as cursor: # Find duplicate Ethereum addresses query = """ SELECT "ethereumAddress", COUNT(*) as count FROM "Contact" GROUP BY "ethereumAddress" HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC """ cursor.execute(query) duplicates = cursor.fetchall() print(f"Found {len(duplicates)} Ethereum addresses with duplicate contacts") # Process each set of duplicates total_merged = 0 for duplicate in duplicates: eth_address = duplicate["ethereumAddress"] # Get all contacts with this address query = """ SELECT id, "ethereumAddress", "ensName", name, email, twitter, discord, telegram, farcaster, "otherSocial", "warpcastAddress", "ethereumAddress2", "createdAt" FROM "Contact" WHERE "ethereumAddress" = %s ORDER BY "createdAt" ASC """ cursor.execute(query, (eth_address,)) contacts = cursor.fetchall() # Skip if we somehow don't have duplicates if len(contacts) <= 1: continue # Choose the oldest contact as the primary primary_contact = contacts[0] primary_id = primary_contact["id"] print(f"Processing {len(contacts)} duplicates for address {eth_address}") print(f" Primary contact: {primary_id}") # Merge data from other contacts into the primary for contact in contacts[1:]: contact_id = contact["id"] # Move NFT holdings print(f" Moving NFT holdings from {contact_id} to {primary_id}") query = """ INSERT INTO "NftHolding" ( id, "contactId", "contractAddress", "tokenId", "collectionName", "acquiredAt", "createdAt", "updatedAt" ) SELECT gen_random_uuid(), %s, "contractAddress", "tokenId", "collectionName", "acquiredAt", "createdAt", NOW() FROM "NftHolding" WHERE "contactId" = %s ON CONFLICT ("contactId", "contractAddress", "tokenId") DO NOTHING """ cursor.execute(query, (primary_id, contact_id)) # Move token holdings print(f" Moving token holdings from {contact_id} to {primary_id}") query = """ INSERT INTO "TokenHolding" ( id, "contactId", "contractAddress", "tokenSymbol", balance, "lastUpdated", "createdAt", "updatedAt" ) SELECT gen_random_uuid(), %s, "contractAddress", "tokenSymbol", balance, "lastUpdated", "createdAt", NOW() FROM "TokenHolding" WHERE "contactId" = %s ON CONFLICT ("contactId", "contractAddress") DO NOTHING """ cursor.execute(query, (primary_id, contact_id)) # Move DAO memberships print(f" Moving DAO memberships from {contact_id} to {primary_id}") query = """ INSERT INTO "DaoMembership" ( id, "contactId", "daoName", "daoType", "joinedAt", "createdAt", "updatedAt" ) SELECT gen_random_uuid(), %s, "daoName", "daoType", "joinedAt", "createdAt", NOW() FROM "DaoMembership" WHERE "contactId" = %s ON CONFLICT ("contactId", "daoName") DO NOTHING """ cursor.execute(query, (primary_id, contact_id)) # Move notes print(f" Moving notes from {contact_id} to {primary_id}") query = """ INSERT INTO "Note" ( id, "contactId", content, "createdAt", "updatedAt" ) SELECT gen_random_uuid(), %s, content, "createdAt", NOW() FROM "Note" WHERE "contactId" = %s """ cursor.execute(query, (primary_id, contact_id)) # Move tags print(f" Moving tags from {contact_id} to {primary_id}") query = """ INSERT INTO "TagsOnContacts" ( "contactId", "tagId", "assignedAt" ) SELECT %s, "tagId", "assignedAt" FROM "TagsOnContacts" WHERE "contactId" = %s ON CONFLICT ("contactId", "tagId") DO NOTHING """ cursor.execute(query, (primary_id, contact_id)) # Check if ContactSource table exists query = """ SELECT EXISTS ( SELECT FROM information_schema.tables WHERE table_name = 'ContactSource' ) as exists """ cursor.execute(query) result = cursor.fetchone() # Move contact sources if table exists if result and result["exists"]: print(f" Moving contact sources from {contact_id} to {primary_id}") query = """ INSERT INTO "ContactSource" ( id, "contactId", "dataSourceId", "createdAt", "updatedAt" ) SELECT gen_random_uuid(), %s, "dataSourceId", "createdAt", NOW() FROM "ContactSource" WHERE "contactId" = %s ON CONFLICT ("contactId", "dataSourceId") DO NOTHING """ cursor.execute(query, (primary_id, contact_id)) # Update primary contact with non-null values from this contact update_fields = [] update_values = [] for field in ["ensName", "name", "email", "twitter", "discord", "telegram", "farcaster", "otherSocial", "warpcastAddress", "ethereumAddress2"]: if contact[field] is not None and primary_contact[field] is None: update_fields.append(f'"{field}" = %s') update_values.append(contact[field]) print(f" Updating primary contact {field} to {contact[field]}") if update_fields: update_values.append(primary_id) query = f""" UPDATE "Contact" SET {', '.join(update_fields)}, "updatedAt" = NOW() WHERE id = %s """ cursor.execute(query, update_values) # Delete the duplicate contact print(f" Deleting duplicate contact {contact_id}") query = """ DELETE FROM "Contact" WHERE id = %s """ cursor.execute(query, (contact_id,)) total_merged += 1 print(f"Merged {total_merged} duplicate contacts") finally: conn.close() def main(): """Main entry point for the script.""" parser = argparse.ArgumentParser(description="Merge duplicate contacts") args = parser.parse_args() merge_duplicate_contacts() if __name__ == "__main__": main()