stones/scripts/utils/db_connector.py

462 lines
17 KiB
Python

#!/usr/bin/env python3
"""
Database Connector
Utility for connecting to the PostgreSQL database and performing operations.
"""
import os
import sys
from typing import Dict, List, Optional, Any
import psycopg2
from psycopg2.extras import RealDictCursor
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class DatabaseConnector:
"""Connector for the PostgreSQL database."""
def __init__(self):
"""Initialize the database connector."""
self.db_url = os.getenv("PYTHON_DATABASE_URL")
if not self.db_url:
# Fallback to DATABASE_URL but remove the schema parameter
db_url = os.getenv("DATABASE_URL")
if db_url and "?schema=" in db_url:
self.db_url = db_url.split("?schema=")[0]
else:
raise ValueError("DATABASE_URL not found in environment variables")
# Connect to the database
self.conn = psycopg2.connect(self.db_url)
self.conn.autocommit = True
def __del__(self):
"""Close the database connection when the object is destroyed."""
if hasattr(self, 'conn') and self.conn:
self.conn.close()
def execute_query(self, query: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
"""
Execute a SQL query and return the results.
Args:
query: SQL query to execute
params: Parameters for the query
Returns:
List of dictionaries containing the query results
"""
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
cursor.execute(query, params or {})
if cursor.description:
return cursor.fetchall()
return []
def execute_update(self, query: str, params: Optional[Dict[str, Any]] = None) -> int:
"""
Execute a SQL update query and return the number of affected rows.
Args:
query: SQL query to execute
params: Parameters for the query
Returns:
Number of affected rows
"""
with self.conn.cursor() as cursor:
cursor.execute(query, params or {})
return cursor.rowcount
def upsert_contact(self, ethereum_address: str, ens_name: Optional[str] = None,
ethereum_address2: Optional[str] = None, warpcast_address: Optional[str] = None,
farcaster: Optional[str] = None, other_social: Optional[str] = None) -> str:
"""
Insert or update a contact in the database.
Args:
ethereum_address: Ethereum address of the contact
ens_name: ENS name of the contact, if available
ethereum_address2: Second Ethereum address of the contact, if available
warpcast_address: Warpcast address of the contact, if available
farcaster: Farcaster handle of the contact, if available
other_social: Other social media information of the contact, if available
Returns:
ID of the inserted or updated contact
"""
query = """
INSERT INTO "Contact" (
id, "ethereumAddress", "ensName", "ethereumAddress2",
"warpcastAddress", "farcaster", "otherSocial", "createdAt", "updatedAt"
)
VALUES (
gen_random_uuid(), %(address)s, %(ens_name)s, %(address2)s,
%(warpcast)s, %(farcaster)s, %(other_social)s, NOW(), NOW()
)
ON CONFLICT ("ethereumAddress") DO UPDATE
SET "ensName" = COALESCE(EXCLUDED."ensName", "Contact"."ensName"),
"ethereumAddress2" = COALESCE(EXCLUDED."ethereumAddress2", "Contact"."ethereumAddress2"),
"warpcastAddress" = COALESCE(EXCLUDED."warpcastAddress", "Contact"."warpcastAddress"),
"farcaster" = COALESCE(EXCLUDED."farcaster", "Contact"."farcaster"),
"otherSocial" = COALESCE(EXCLUDED."otherSocial", "Contact"."otherSocial"),
"updatedAt" = NOW()
RETURNING id
"""
result = self.execute_query(query, {
"address": ethereum_address,
"ens_name": ens_name,
"address2": ethereum_address2,
"warpcast": warpcast_address,
"farcaster": farcaster,
"other_social": other_social
})
return result[0]["id"]
def update_contact(self, contact_id: str, data: Dict[str, Any]) -> None:
"""
Update a contact with additional information.
Args:
contact_id: ID of the contact to update
data: Dictionary of fields to update
"""
# Build the SET clause dynamically based on provided data
set_clauses = []
params = {"id": contact_id}
for key, value in data.items():
if value is not None:
set_clauses.append(f'"{key}" = %({key})s')
params[key] = value
if not set_clauses:
return
set_clause = ", ".join(set_clauses)
set_clause += ', "updatedAt" = NOW()'
query = f"""
UPDATE "Contact"
SET {set_clause}
WHERE id = %(id)s
"""
self.execute_update(query, params)
def add_nft_holding(self, contact_id: str, contract_address: str, token_id: str,
collection_name: Optional[str] = None) -> None:
"""
Add an NFT holding for a contact.
Args:
contact_id: ID of the contact
contract_address: Contract address of the NFT
token_id: Token ID of the NFT
collection_name: Name of the NFT collection
"""
query = """
INSERT INTO "NftHolding" (
id, "contactId", "contractAddress", "tokenId", "collectionName", "createdAt", "updatedAt"
)
VALUES (
gen_random_uuid(), %(contact_id)s, %(contract_address)s, %(token_id)s,
%(collection_name)s, NOW(), NOW()
)
ON CONFLICT ("contactId", "contractAddress", "tokenId") DO UPDATE
SET "collectionName" = COALESCE(EXCLUDED."collectionName", "NftHolding"."collectionName"),
"updatedAt" = NOW()
"""
self.execute_update(query, {
"contact_id": contact_id,
"contract_address": contract_address,
"token_id": token_id,
"collection_name": collection_name
})
def add_token_holding(self, contact_id: str, contract_address: str, balance: str,
token_symbol: Optional[str] = None) -> None:
"""
Add a token holding for a contact.
Args:
contact_id: ID of the contact
contract_address: Contract address of the token
balance: Token balance
token_symbol: Symbol of the token
"""
query = """
INSERT INTO "TokenHolding" (
id, "contactId", "contractAddress", "tokenSymbol", balance, "lastUpdated", "createdAt", "updatedAt"
)
VALUES (
gen_random_uuid(), %(contact_id)s, %(contract_address)s, %(token_symbol)s,
%(balance)s, NOW(), NOW(), NOW()
)
ON CONFLICT ("contactId", "contractAddress") DO UPDATE
SET "tokenSymbol" = COALESCE(EXCLUDED."tokenSymbol", "TokenHolding"."tokenSymbol"),
balance = %(balance)s,
"lastUpdated" = NOW(),
"updatedAt" = NOW()
"""
self.execute_update(query, {
"contact_id": contact_id,
"contract_address": contract_address,
"token_symbol": token_symbol,
"balance": balance
})
def add_dao_membership(self, contact_id: str, dao_name: str, dao_type: str,
joined_at: Optional[str] = None) -> None:
"""
Add a DAO membership for a contact.
Args:
contact_id: ID of the contact
dao_name: Name of the DAO
dao_type: Type of the DAO
joined_at: Date when the contact joined the DAO
"""
query = """
INSERT INTO "DaoMembership" (
id, "contactId", "daoName", "daoType", "joinedAt", "createdAt", "updatedAt"
)
VALUES (
gen_random_uuid(), %(contact_id)s, %(dao_name)s, %(dao_type)s,
%(joined_at)s, NOW(), NOW()
)
ON CONFLICT ("contactId", "daoName") DO UPDATE
SET "daoType" = COALESCE(EXCLUDED."daoType", "DaoMembership"."daoType"),
"joinedAt" = COALESCE(EXCLUDED."joinedAt", "DaoMembership"."joinedAt"),
"updatedAt" = NOW()
"""
self.execute_update(query, {
"contact_id": contact_id,
"dao_name": dao_name,
"dao_type": dao_type,
"joined_at": joined_at
})
def add_tag_to_contact(self, contact_id: str, tag_name: str, color: Optional[str] = None) -> None:
"""
Add a tag to a contact.
Args:
contact_id: ID of the contact
tag_name: Name of the tag
color: Color of the tag
"""
# First, ensure the tag exists
tag_query = """
INSERT INTO "Tag" (id, name, color, "createdAt", "updatedAt")
VALUES (gen_random_uuid(), %(name)s, %(color)s, NOW(), NOW())
ON CONFLICT (name) DO UPDATE
SET color = COALESCE(EXCLUDED.color, "Tag".color),
"updatedAt" = NOW()
RETURNING id
"""
tag_result = self.execute_query(tag_query, {
"name": tag_name,
"color": color
})
tag_id = tag_result[0]["id"]
# Then, add the tag to the contact
relation_query = """
INSERT INTO "TagsOnContacts" ("contactId", "tagId", "assignedAt")
VALUES (%(contact_id)s, %(tag_id)s, NOW())
ON CONFLICT ("contactId", "tagId") DO NOTHING
"""
self.execute_update(relation_query, {
"contact_id": contact_id,
"tag_id": tag_id
})
def add_note_to_contact(self, contact_id: str, content: str) -> None:
"""
Add a note to a contact.
Args:
contact_id: ID of the contact
content: Content of the note
"""
query = """
INSERT INTO "Note" (id, "contactId", content, "createdAt", "updatedAt")
VALUES (gen_random_uuid(), %(contact_id)s, %(content)s, NOW(), NOW())
"""
self.execute_update(query, {
"contact_id": contact_id,
"content": content
})
def link_contact_to_data_source(self, contact_id: str, data_source_id: str) -> None:
"""
Link a contact to a data source in the ContactSource table.
Args:
contact_id: ID of the contact
data_source_id: ID of the data source
"""
# Check if the ContactSource table exists
query = """
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name = 'ContactSource'
)
"""
result = self.execute_query(query)
if not result[0]["exists"]:
# Table doesn't exist, create it
query = """
CREATE TABLE "ContactSource" (
id TEXT PRIMARY KEY,
"contactId" TEXT NOT NULL,
"dataSourceId" TEXT NOT NULL,
"createdAt" TIMESTAMP NOT NULL,
"updatedAt" TIMESTAMP NOT NULL,
FOREIGN KEY ("contactId") REFERENCES "Contact"(id) ON DELETE CASCADE,
FOREIGN KEY ("dataSourceId") REFERENCES "DataSource"(id) ON DELETE CASCADE,
UNIQUE("contactId", "dataSourceId")
);
CREATE INDEX "ContactSource_contactId_idx" ON "ContactSource"("contactId");
CREATE INDEX "ContactSource_dataSourceId_idx" ON "ContactSource"("dataSourceId");
"""
self.execute_update(query)
# Insert the link
query = """
INSERT INTO "ContactSource" (
id, "contactId", "dataSourceId", "createdAt", "updatedAt"
)
VALUES (
gen_random_uuid(), %(contact_id)s, %(data_source_id)s, NOW(), NOW()
)
ON CONFLICT ("contactId", "dataSourceId") DO UPDATE
SET "updatedAt" = NOW()
"""
self.execute_update(query, {
"contact_id": contact_id,
"data_source_id": data_source_id
})
def get_contact_sources(self, contact_id: str) -> List[Dict[str, Any]]:
"""
Get all data sources for a contact.
Args:
contact_id: ID of the contact
Returns:
List of data sources for the contact
"""
query = """
SELECT ds.id, ds.name, ds.type, ds.description
FROM "ContactSource" cs
JOIN "DataSource" ds ON cs."dataSourceId" = ds.id
WHERE cs."contactId" = %(contact_id)s
"""
return self.execute_query(query, {"contact_id": contact_id})
def upsert_data_source(self, name: str, source_type: str, description: Optional[str] = None) -> str:
"""
Insert or update a data source in the database.
Args:
name: Name of the data source
source_type: Type of the data source
description: Description of the data source
Returns:
ID of the inserted or updated data source
"""
query = """
INSERT INTO "DataSource" (id, name, type, description, "createdAt", "updatedAt")
VALUES (gen_random_uuid(), %(name)s, %(type)s, %(description)s, NOW(), NOW())
ON CONFLICT (name) DO UPDATE
SET type = EXCLUDED.type,
description = COALESCE(EXCLUDED.description, "DataSource".description),
"updatedAt" = NOW()
RETURNING id
"""
result = self.execute_query(query, {
"name": name,
"type": source_type,
"description": description
})
return result[0]["id"]
def create_scraping_job(self, source_name: str, status: str = "pending") -> str:
"""
Create a new scraping job.
Args:
source_name: Name of the data source
status: Initial status of the job
Returns:
ID of the created job
"""
query = """
INSERT INTO "ScrapingJob" (
id, "sourceName", status, "startedAt", "createdAt", "updatedAt"
)
VALUES (
gen_random_uuid(), %(source_name)s, %(status)s,
CASE WHEN %(status)s = 'running' THEN NOW() ELSE NULL END,
NOW(), NOW()
)
RETURNING id
"""
result = self.execute_query(query, {
"source_name": source_name,
"status": status
})
return result[0]["id"]
def update_scraping_job(self, job_id: str, status: str,
records_processed: int = 0, records_added: int = 0,
records_updated: int = 0, error_message: Optional[str] = None) -> None:
"""
Update a scraping job.
Args:
job_id: ID of the job to update
status: New status of the job
records_processed: Number of records processed
records_added: Number of records added
records_updated: Number of records updated
error_message: Error message if the job failed
"""
query = """
UPDATE "ScrapingJob"
SET status = %(status)s,
"startedAt" = CASE
WHEN %(status)s = 'running' AND "startedAt" IS NULL THEN NOW()
ELSE "startedAt"
END,
"completedAt" = CASE
WHEN %(status)s IN ('completed', 'failed') THEN NOW()
ELSE "completedAt"
END,
"recordsProcessed" = "recordsProcessed" + %(records_processed)s,
"recordsAdded" = "recordsAdded" + %(records_added)s,
"recordsUpdated" = "recordsUpdated" + %(records_updated)s,
"errorMessage" = CASE
WHEN %(error_message)s IS NOT NULL THEN %(error_message)s
ELSE "errorMessage"
END,
"updatedAt" = NOW()
WHERE id = %(job_id)s
"""
self.execute_update(query, {
"job_id": job_id,
"status": status,
"records_processed": records_processed,
"records_added": records_added,
"records_updated": records_updated,
"error_message": error_message
})