#!/usr/bin/env python3 """ Public Nouns NFT Holders Scraper This script fetches holders of the Public Nouns NFT contract and stores their Ethereum addresses in the database. It also attempts to resolve ENS names for the addresses. Usage: python public_nouns_scraper.py """ import os import sys import argparse import json import time from datetime import datetime from typing import Dict, List, Optional, Any import requests from web3 import Web3 from dotenv import load_dotenv # Add parent directory to path to import utils sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from utils.db_connector import DatabaseConnector from utils.ens_resolver import ENSResolver from utils.logger import setup_logger # Load environment variables load_dotenv() # Setup logging logger = setup_logger("public_nouns_scraper") class PublicNounsHoldersScraper: """Scraper for Public Nouns NFT holders.""" def __init__(self, contract_address: str = "0x93ecac71499147627DFEc6d0E494d50fCFFf10EE", collection_name: str = "Public Nouns"): """ Initialize the Public Nouns NFT holders scraper. Args: contract_address: Ethereum address of the Public Nouns NFT contract collection_name: Name of the NFT collection """ self.contract_address = Web3.to_checksum_address(contract_address) self.collection_name = collection_name self.etherscan_api_key = os.getenv("ETHEREUM_ETHERSCAN_API_KEY") self.alchemy_api_key = os.getenv("ALCHEMY_API_KEY") self.web3 = Web3(Web3.HTTPProvider(f"https://eth-mainnet.g.alchemy.com/v2/{self.alchemy_api_key}")) self.db = DatabaseConnector() self.ens_resolver = ENSResolver(self.web3) # Validate API keys if not self.etherscan_api_key: logger.error("ETHEREUM_ETHERSCAN_API_KEY not found in environment variables") sys.exit(1) if not self.alchemy_api_key: logger.error("ALCHEMY_API_KEY not found in environment variables") sys.exit(1) # Register data source self.register_data_source() def register_data_source(self) -> None: """Register this NFT collection as a data source in the database.""" self.db.upsert_data_source( name=f"NFT:{self.collection_name}", source_type="NFT", description=f"Holders of {self.collection_name} NFT ({self.contract_address})" ) def get_token_owner(self, token_id: int) -> Optional[str]: """ Get the owner of a specific token ID. Args: token_id: The token ID to check Returns: The owner's Ethereum address or None if not found """ url = f"https://eth-mainnet.g.alchemy.com/nft/v2/{self.alchemy_api_key}/getOwnersForToken" params = { "contractAddress": self.contract_address, "tokenId": hex(token_id) if isinstance(token_id, int) else token_id } try: response = requests.get(url, params=params) if response.status_code == 200: data = response.json() owners = data.get("owners", []) if owners and len(owners) > 0: return owners[0] return None except Exception as e: logger.error(f"Error fetching owner for token {token_id}: {str(e)}") return None def get_token_holders(self, max_token_id: int = 465) -> List[Dict[str, Any]]: """ Fetch all token holders for the Public Nouns NFT contract. Args: max_token_id: The maximum token ID to check (default: 465) Returns: List of dictionaries containing token ID and holder address """ logger.info(f"Fetching token holders for {self.collection_name} ({self.contract_address})") # Start a scraping job job_id = self.db.create_scraping_job( source_name=f"NFT:{self.collection_name}", status="running" ) holders = [] records_processed = 0 records_added = 0 try: # Iterate through token IDs from 0 to max_token_id for token_id in range(max_token_id + 1): records_processed += 1 # Log progress every 10 tokens if token_id % 10 == 0: logger.info(f"Processing token ID {token_id}/{max_token_id}") # Get the owner of this token owner = self.get_token_owner(token_id) if owner: holders.append({ "address": owner, "token_id": str(token_id), "collection_name": self.collection_name }) records_added += 1 # Rate limiting to avoid API throttling time.sleep(0.2) # Update job with success self.db.update_scraping_job( job_id=job_id, status="completed", records_processed=records_processed, records_added=records_added ) except Exception as e: logger.error(f"Error fetching token holders: {str(e)}") self.db.update_scraping_job(job_id, "failed", error_message=str(e)) return [] logger.info(f"Found {len(holders)} token holders") return holders def process_holders(self, holders: List[Dict[str, Any]]) -> None: """ Process the list of holders and store in database. Args: holders: List of dictionaries containing token ID and holder address """ logger.info(f"Processing {len(holders)} holders") for holder in holders: address = Web3.to_checksum_address(holder["address"]) token_id = holder["token_id"] # Try to resolve ENS name ens_name = self.ens_resolver.get_ens_name(address) # Get ENS profile if available ens_profile = None if ens_name: ens_profile = self.ens_resolver.get_ens_profile(ens_name) # Check for Farcaster information in the ENS profile farcaster_info = None if ens_profile and "farcaster" in ens_profile: farcaster_info = json.dumps(ens_profile["farcaster"]) # Store in database contact_id = self.db.upsert_contact( ethereum_address=address, ens_name=ens_name, farcaster=farcaster_info ) # Add NFT holding self.db.add_nft_holding( contact_id=contact_id, contract_address=self.contract_address, token_id=token_id, collection_name=self.collection_name ) # If we have an ENS name, try to get additional profile information if ens_name: self.ens_resolver.update_contact_from_ens(contact_id, ens_name) # Rate limiting to avoid API throttling time.sleep(0.1) def run(self, max_token_id: int = 465) -> None: """ Run the scraper to fetch and process Public Nouns NFT holders. Args: max_token_id: The maximum token ID to check (default: 465) """ holders = self.get_token_holders(max_token_id) if holders: self.process_holders(holders) logger.info("Public Nouns NFT holders scraping completed successfully") else: logger.warning("No holders found or error occurred") def main(): """Main entry point for the script.""" parser = argparse.ArgumentParser(description="Scrape Public Nouns NFT holders") parser.add_argument("--max-token-id", type=int, default=465, help="Maximum token ID to check (default: 465)") args = parser.parse_args() scraper = PublicNounsHoldersScraper() scraper.run(args.max_token_id) if __name__ == "__main__": main()