Skip to content

Cleaning Emails

note that - still building this code, but works for individual txt file cleaning of emails

  • removes surrounding spaces from email addresses in a text file


import os
import re

# Prompt the user for the input filename
input_filename = input("Enter the name of the input text file (e.g., emails.txt): ")

# Check if the file actually exists before proceeding
if not os.path.exists(input_filename):
    print(f"Error: The file '{input_filename}' was not found.")
else:
    # Prompt for the output filename
    output_filename = input(
        "Enter the name for the cleaned output file (e.g., cleaned.txt): "
    )

    # Read the original file contents
    with open(input_filename, "r", encoding="utf-8") as file:
        text = file.read()

    # Use regex to find all email addresses
    email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
    emails = re.findall(email_pattern, text)

    # Clean whitespace from the extracted emails
    cleaned_emails = [email.strip() for email in emails if email.strip()]

    # Write the cleaned emails to the user-specified output file
    with open(output_filename, "w", encoding="utf-8") as output_file:
        for email in cleaned_emails:
            output_file.write(email + "\n")

    print(
        f"Success! Found {len(cleaned_emails)} emails and saved them to '{output_filename}'."
    )