#!/usr/bin/env python # -*- coding: utf-8 -*- # Local path and filenames blocklist_path = "/home/foo/path/to/update_iceshrimp_net_blocklist/" blocklist_filename = "_unified_tier0_blocklist.csv" # Same as the filename at the end of the URL (something.csv) blocklist_log_path = "/home/foo/Logs/" blocklist_log_filename = "iceshrimp_net_blocklist_single_updated" # The url library import urllib.request # The pandas library (pip install pandas) import pandas as pd # The SQL_Alchemy library (pip3 install sqlalchemy) from sqlalchemy import create_engine, text # The OS library (for checking whether files and directories exist) import os # Check that a 'blocklist_path' directory exists if os.path.isdir(blocklist_path): print("A 'blocklist_path' directory exists, continuing...") print("") else: print("The 'blocklist_path' directory does not exist, exiting...") print("") quit() # Check that a 'blocklist_log_path' directory exists (the most common error when running for the first time) if os.path.isdir(blocklist_log_path): print("A 'blocklist_log_path' directory exists, continuing...") print("") else: print("The 'blocklist_log_path' directory does not exist, exiting...") print("") quit() # Establish a SQL_Alchemy DB connection alchemyConn = create_engine('postgresql://iceshrimp.user:super-long-password@localhost:5432/iceshrimp.database') # Link to the Blocklist to be used url = ("https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists/_unified_tier0_blocklist.csv") # Go grab the Blocklist urllib.request.urlretrieve(url, blocklist_path+blocklist_filename) # Read the CSV file into a pandas DataFrame df_original = pd.read_csv(blocklist_path+blocklist_filename) # DROP extraneous column by COPY'ing DataFrame (df_original) to a new DataFrame (df_trimmed) df_trimmed = df_original[['domain', 'public_comment']].copy() # Rename a couple of the DataFrame (df_trimmed) columns to match the current 'public.blocked_instance' schema df_trimmed.rename(columns={'domain': 'host', 'public_comment': 'reason'}, inplace=True) # Print the # of DataFrame (df_trimmed) rows print(len(df_trimmed.index), "TOTAL rows in the DataFrame (df_trimmed)") print("") # Execute and error-trap try: # DELETE any existing records in 'public.blocked_instance' with alchemyConn.connect() as conn: truncate_statement = text("TRUNCATE TABLE blocked_instance") conn.execute(truncate_statement) # Commit the DELETE changes to the database conn.commit() print("DELETE COMMITTED") print("") # INSERT the DataFrame (df_trimmed) into the 'public.blocked_instance' table df_trimmed.to_sql("blocked_instance", alchemyConn, if_exists='append', index=False) print(len(df_trimmed.index), "new rows INSERTED into 'public.blocked_instance' table from the DataFrame (df_trimmed) from file: " +blocklist_filename) print("") # Close the alchemyConn connection alchemyConn.dispose() print("Connection CLOSED") print("") # Create a log file to show that the UPDATE has successfully completed open(blocklist_log_path+blocklist_log_filename, 'w') print("Log file CREATED") except: # Roll back in case of error with alchemyConn.connect() as conn: conn.rollback() print("FAILED to INSERT from the DataFrame") print("FAILED to DELETE and rolled back")