224 lines
9.2 KiB
Python
224 lines
9.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Merge Duplicate Contacts
|
|
|
|
This script finds and merges duplicate contacts in the database.
|
|
Duplicates are defined as contacts with the same Ethereum address.
|
|
|
|
Usage:
|
|
python merge_duplicate_contacts.py
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import psycopg2
|
|
from psycopg2.extras import RealDictCursor
|
|
from dotenv import load_dotenv
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
def merge_duplicate_contacts():
|
|
"""
|
|
Find and merge duplicate contacts.
|
|
"""
|
|
# Get database connection string from environment variables
|
|
db_url = os.getenv("PYTHON_DATABASE_URL")
|
|
if not db_url:
|
|
db_url = os.getenv("DATABASE_URL").split("?schema=")[0]
|
|
|
|
# Connect to the database
|
|
conn = psycopg2.connect(db_url)
|
|
conn.autocommit = True
|
|
|
|
try:
|
|
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
|
# Find duplicate Ethereum addresses
|
|
query = """
|
|
SELECT "ethereumAddress", COUNT(*) as count
|
|
FROM "Contact"
|
|
GROUP BY "ethereumAddress"
|
|
HAVING COUNT(*) > 1
|
|
ORDER BY COUNT(*) DESC
|
|
"""
|
|
|
|
cursor.execute(query)
|
|
duplicates = cursor.fetchall()
|
|
print(f"Found {len(duplicates)} Ethereum addresses with duplicate contacts")
|
|
|
|
# Process each set of duplicates
|
|
total_merged = 0
|
|
for duplicate in duplicates:
|
|
eth_address = duplicate["ethereumAddress"]
|
|
|
|
# Get all contacts with this address
|
|
query = """
|
|
SELECT id, "ethereumAddress", "ensName", name, email,
|
|
twitter, discord, telegram, farcaster, "otherSocial",
|
|
"warpcastAddress", "ethereumAddress2", "createdAt"
|
|
FROM "Contact"
|
|
WHERE "ethereumAddress" = %s
|
|
ORDER BY "createdAt" ASC
|
|
"""
|
|
|
|
cursor.execute(query, (eth_address,))
|
|
contacts = cursor.fetchall()
|
|
|
|
# Skip if we somehow don't have duplicates
|
|
if len(contacts) <= 1:
|
|
continue
|
|
|
|
# Choose the oldest contact as the primary
|
|
primary_contact = contacts[0]
|
|
primary_id = primary_contact["id"]
|
|
|
|
print(f"Processing {len(contacts)} duplicates for address {eth_address}")
|
|
print(f" Primary contact: {primary_id}")
|
|
|
|
# Merge data from other contacts into the primary
|
|
for contact in contacts[1:]:
|
|
contact_id = contact["id"]
|
|
|
|
# Move NFT holdings
|
|
print(f" Moving NFT holdings from {contact_id} to {primary_id}")
|
|
query = """
|
|
INSERT INTO "NftHolding" (
|
|
id, "contactId", "contractAddress", "tokenId", "collectionName",
|
|
"acquiredAt", "createdAt", "updatedAt"
|
|
)
|
|
SELECT
|
|
gen_random_uuid(), %s, "contractAddress", "tokenId", "collectionName",
|
|
"acquiredAt", "createdAt", NOW()
|
|
FROM "NftHolding"
|
|
WHERE "contactId" = %s
|
|
ON CONFLICT ("contactId", "contractAddress", "tokenId") DO NOTHING
|
|
"""
|
|
cursor.execute(query, (primary_id, contact_id))
|
|
|
|
# Move token holdings
|
|
print(f" Moving token holdings from {contact_id} to {primary_id}")
|
|
query = """
|
|
INSERT INTO "TokenHolding" (
|
|
id, "contactId", "contractAddress", "tokenSymbol", balance,
|
|
"lastUpdated", "createdAt", "updatedAt"
|
|
)
|
|
SELECT
|
|
gen_random_uuid(), %s, "contractAddress", "tokenSymbol", balance,
|
|
"lastUpdated", "createdAt", NOW()
|
|
FROM "TokenHolding"
|
|
WHERE "contactId" = %s
|
|
ON CONFLICT ("contactId", "contractAddress") DO NOTHING
|
|
"""
|
|
cursor.execute(query, (primary_id, contact_id))
|
|
|
|
# Move DAO memberships
|
|
print(f" Moving DAO memberships from {contact_id} to {primary_id}")
|
|
query = """
|
|
INSERT INTO "DaoMembership" (
|
|
id, "contactId", "daoName", "daoType", "joinedAt", "createdAt", "updatedAt"
|
|
)
|
|
SELECT
|
|
gen_random_uuid(), %s, "daoName", "daoType", "joinedAt", "createdAt", NOW()
|
|
FROM "DaoMembership"
|
|
WHERE "contactId" = %s
|
|
ON CONFLICT ("contactId", "daoName") DO NOTHING
|
|
"""
|
|
cursor.execute(query, (primary_id, contact_id))
|
|
|
|
# Move notes
|
|
print(f" Moving notes from {contact_id} to {primary_id}")
|
|
query = """
|
|
INSERT INTO "Note" (
|
|
id, "contactId", content, "createdAt", "updatedAt"
|
|
)
|
|
SELECT
|
|
gen_random_uuid(), %s, content, "createdAt", NOW()
|
|
FROM "Note"
|
|
WHERE "contactId" = %s
|
|
"""
|
|
cursor.execute(query, (primary_id, contact_id))
|
|
|
|
# Move tags
|
|
print(f" Moving tags from {contact_id} to {primary_id}")
|
|
query = """
|
|
INSERT INTO "TagsOnContacts" (
|
|
"contactId", "tagId", "assignedAt"
|
|
)
|
|
SELECT
|
|
%s, "tagId", "assignedAt"
|
|
FROM "TagsOnContacts"
|
|
WHERE "contactId" = %s
|
|
ON CONFLICT ("contactId", "tagId") DO NOTHING
|
|
"""
|
|
cursor.execute(query, (primary_id, contact_id))
|
|
|
|
# Check if ContactSource table exists
|
|
query = """
|
|
SELECT EXISTS (
|
|
SELECT FROM information_schema.tables
|
|
WHERE table_name = 'ContactSource'
|
|
) as exists
|
|
"""
|
|
cursor.execute(query)
|
|
result = cursor.fetchone()
|
|
|
|
# Move contact sources if table exists
|
|
if result and result["exists"]:
|
|
print(f" Moving contact sources from {contact_id} to {primary_id}")
|
|
query = """
|
|
INSERT INTO "ContactSource" (
|
|
id, "contactId", "dataSourceId", "createdAt", "updatedAt"
|
|
)
|
|
SELECT
|
|
gen_random_uuid(), %s, "dataSourceId", "createdAt", NOW()
|
|
FROM "ContactSource"
|
|
WHERE "contactId" = %s
|
|
ON CONFLICT ("contactId", "dataSourceId") DO NOTHING
|
|
"""
|
|
cursor.execute(query, (primary_id, contact_id))
|
|
|
|
# Update primary contact with non-null values from this contact
|
|
update_fields = []
|
|
update_values = []
|
|
|
|
for field in ["ensName", "name", "email", "twitter", "discord",
|
|
"telegram", "farcaster", "otherSocial", "warpcastAddress",
|
|
"ethereumAddress2"]:
|
|
if contact[field] is not None and primary_contact[field] is None:
|
|
update_fields.append(f'"{field}" = %s')
|
|
update_values.append(contact[field])
|
|
print(f" Updating primary contact {field} to {contact[field]}")
|
|
|
|
if update_fields:
|
|
update_values.append(primary_id)
|
|
query = f"""
|
|
UPDATE "Contact"
|
|
SET {', '.join(update_fields)}, "updatedAt" = NOW()
|
|
WHERE id = %s
|
|
"""
|
|
cursor.execute(query, update_values)
|
|
|
|
# Delete the duplicate contact
|
|
print(f" Deleting duplicate contact {contact_id}")
|
|
query = """
|
|
DELETE FROM "Contact"
|
|
WHERE id = %s
|
|
"""
|
|
cursor.execute(query, (contact_id,))
|
|
total_merged += 1
|
|
|
|
print(f"Merged {total_merged} duplicate contacts")
|
|
|
|
finally:
|
|
conn.close()
|
|
|
|
def main():
|
|
"""Main entry point for the script."""
|
|
parser = argparse.ArgumentParser(description="Merge duplicate contacts")
|
|
args = parser.parse_args()
|
|
|
|
merge_duplicate_contacts()
|
|
|
|
if __name__ == "__main__":
|
|
main() |