#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Local path and filenames
blocklist_path = "/home/foo/path/to/update_iceshrimp_net_blocklist/"
blocklist_filename = "_unified_tier0_blocklist.csv" # Same as the filename at the end of the URL (something.csv)
blocklist_log_path = "/home/foo/Logs/"
blocklist_log_filename = "iceshrimp_net_blocklist_single_updated"

# The url library
import urllib.request

# The pandas library (pip install pandas)
import pandas as pd

# The SQL_Alchemy library (pip3 install sqlalchemy)
from sqlalchemy import create_engine, text

# The OS library (for checking whether files and directories exist)
import os

# Check that a 'blocklist_path' directory exists
if os.path.isdir(blocklist_path):
    print("A 'blocklist_path' directory exists, continuing...")
    print("")
else:
    print("The 'blocklist_path' directory does not exist, exiting...")
    print("")
    quit()

# Check that a 'blocklist_log_path' directory exists (the most common error when running for the first time)
if os.path.isdir(blocklist_log_path):
    print("A 'blocklist_log_path' directory exists, continuing...")
    print("")
else:
    print("The 'blocklist_log_path' directory does not exist, exiting...")
    print("")
    quit()

# Establish a SQL_Alchemy DB connection
alchemyConn = create_engine('postgresql://iceshrimp.user:super-long-password@localhost:5432/iceshrimp.database')

# Link to the Blocklist to be used
url = ("https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists/_unified_tier0_blocklist.csv")

# Go grab the Blocklist
urllib.request.urlretrieve(url, blocklist_path+blocklist_filename)

# Read the CSV file into a pandas DataFrame
df_original = pd.read_csv(blocklist_path+blocklist_filename)

# DROP extraneous column by COPY'ing DataFrame (df_original) to a new DataFrame (df_trimmed)
df_trimmed = df_original[['domain', 'public_comment']].copy()

# Rename a couple of the DataFrame (df_trimmed) columns to match the current 'public.blocked_instance' schema
df_trimmed.rename(columns={'domain': 'host', 'public_comment': 'reason'}, inplace=True)

# Print the # of DataFrame (df_trimmed) rows
print(len(df_trimmed.index), "TOTAL rows in the DataFrame (df_trimmed)")
print("")

# Execute and error-trap
try:
    # DELETE any existing records in 'public.blocked_instance'
    with alchemyConn.connect() as conn:
        truncate_statement = text("TRUNCATE TABLE blocked_instance")
        conn.execute(truncate_statement)
        # Commit the DELETE changes to the database
        conn.commit()
        print("DELETE COMMITTED")
        print("")

        # INSERT the DataFrame (df_trimmed) into the 'public.blocked_instance' table
        df_trimmed.to_sql("blocked_instance", alchemyConn, if_exists='append', index=False)
        print(len(df_trimmed.index), "new rows INSERTED into 'public.blocked_instance' table from the DataFrame (df_trimmed) from file: " +blocklist_filename)
        print("")

        # Close the alchemyConn connection
        alchemyConn.dispose()
        print("Connection CLOSED")
        print("")

        # Create a log file to show that the UPDATE has successfully completed
        open(blocklist_log_path+blocklist_log_filename, 'w')
        print("Log file CREATED")

except:
    # Roll back in case of error
    with alchemyConn.connect() as conn:
        conn.rollback()

    print("FAILED to INSERT from the DataFrame")
    print("FAILED to DELETE and rolled back")