Skip to content

Combine Emails

note that - still building this code, but works for same folder txt file combining of emails

  • removes surrounding spaces from email addresses in a text files and combine to a single text file

import os
import re

# Ask the user for the folder containing the text files
folder_path = input("Enter the path to the folder with your text files: ")

if not os.path.isdir(folder_path):
    print(f"Error: The folder '{folder_path}' does not exist.")
else:
    # Ask for the output filename
    output_filename = input(
        "Enter the name for the combined output file (e.g., all_emails.txt): "
    )

    # Use a set instead of a list to automatically remove duplicate emails
    unique_emails = set()

    # Regex pattern to find standard email formats
    email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"

    # Loop through every file in the specified folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            # Only process files ending in .txt
            if file.endswith(".txt") and file != output_filename:
                file_path = os.path.join(root, file)

                try:
                    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                        text = f.read()
                        # Find all emails in the current file
                        found_emails = re.findall(email_pattern, text)
                        # Strip spaces and add to our unique collection
                        for email in found_emails:
                            unique_emails.add(email.strip())
                except Exception as e:
                    print(f"Could not read file {file}: {e}")

    # Write all combined, clean emails to the final file
    with open(output_filename, "w", encoding="utf-8") as output_file:
        for email in sorted(unique_emails):
            output_file.write(email + "\n")

    print(
        f"\nSuccess! Combined {len(unique_emails)} unique emails into '{output_filename}'."
    )