monorepo/band/band_contacts_to_google_contacts.py

import csv
import argparse
import datetime
import re
import sys

def clean_instrument_name(instrument_raw):
    """Cleans the instrument name based on Percussion, Winds, and Default rules."""
    # Strip numerical prefix
    if '-' in instrument_raw:
        instrument = instrument_raw.split('-', 1)[-1].strip()
    else:
        instrument = instrument_raw.strip()

    lower_inst = instrument.lower()

    # Pre-catch Drum Major so they are always cleanly named
    if 'drum major' in lower_inst:
        return 'Drum Major'

    # 1. The Percussion Rule
    if 'percussion' in lower_inst and '(' in instrument and ')' in instrument:
        match = re.search(r'\((.*?)\)', instrument)
        if match:
            extracted = match.group(1).strip().title()

            # Ensure "Drum" is in the name
            if 'Drum' not in extracted:
                if extracted.lower() in ['tenor', 'tenors']:
                    return 'Tenor Drums'
                else:
                    return f"{extracted} Drum"

            return extracted

    # 2. The Winds/General Rule
    elif '(' in instrument and ')' in instrument:
        return re.sub(r'\(.*?\)', '', instrument).strip()

    # 3. Default Rule
    return instrument

def get_category(instrument):
    """Categorize the instrument into groups, isolating Drum Majors first."""
    inst_lower = instrument.lower()

    # Catch Drum Majors first so they don't get trapped by the 'drum' keyword in Percussion
    if 'drum major' in inst_lower:
        return 'Drum Majors'

    if any(x in inst_lower for x in ['flute', 'clarinet', 'sax', 'oboe', 'bassoon', 'piccolo']):
        return 'Woodwinds'
    if any(x in inst_lower for x in ['trumpet', 'mellophone', 'horn', 'trombone', 'baritone', 'euphonium', 'tuba', 'sousaphone']):
        return 'Brass'
    if any(x in inst_lower for x in ['percussion', 'snare', 'tenor', 'drum', 'cymbal', 'marimba', 'vibraphone', 'timpani', 'bells', 'electronics']):
        return 'Percussion'
    if 'guard' in inst_lower or 'color' in inst_lower:
        return 'Colorguard'

    return 'Leadership' # Fallback for other potential roles

def format_phone(phone_str):
    """Clean and standardize the phone numbers."""
    phone = str(phone_str).strip()
    if not phone:
        return ""

    # Leave non-US international codes as they are
    if phone.startswith('+') and not phone.startswith('+1'):
        return phone

    # Standardize US numbers to XXX-XXX-XXXX
    digits = re.sub(r'\D', '', phone)
    if len(digits) == 10:
        return f"{digits[:3]}-{digits[3:6]}-{digits[6:]}"
    elif len(digits) == 11 and digits.startswith('1'):
        return f"{digits[1:4]}-{digits[4:7]}-{digits[7:]}"

    return phone

def main():
    parser = argparse.ArgumentParser(description="Convert Band Leadership CSV to Google Contacts format.")
    parser.add_argument("input_file", help="Path to the input CSV file")
    parser.add_argument("-y", "--year", type=int, default=datetime.datetime.now().year,
                        help="Target year for labels and grade calculation (defaults to current year)")
    args = parser.parse_args()

    target_year = args.year

    # Google Contacts Headers matching your expected output
    google_headers = [
        "First Name", "Middle Name", "Last Name", "Phonetic First Name", "Phonetic Middle Name",
        "Phonetic Last Name", "Name Prefix", "Name Suffix", "Nickname", "File As",
        "Organization Name", "Organization Title", "Organization Department", "Birthday", "Notes",
        "Photo", "Labels", "E-mail 1 - Label", "E-mail 1 - Value", "Phone 1 - Label",
        "Phone 1 - Value", "Phone 2 - Label", "Phone 2 - Value", "Phone 3 - Label", "Phone 3 - Value",
        "Address 1 - Label", "Address 1 - Formatted", "Address 1 - Street", "Address 1 - City",
        "Address 1 - PO Box", "Address 1 - Region", "Address 1 - Postal Code", "Address 1 - Country",
        "Address 1 - Extended Address"
    ]

    with open(args.input_file, mode='r', encoding='utf-8') as infile:
        reader = csv.DictReader(infile)
        # lineterminator='\n' ensures consistent newlines across OS when printing to stdout
        writer = csv.DictWriter(sys.stdout, fieldnames=google_headers, lineterminator='\n')
        writer.writeheader()

        for row in reader:
            # Skip empty rows or the trailing blank commas in the source file
            name_field = row.get('NAME', '').strip()
            if not name_field or name_field == ',':
                continue

            # Parse Name ("Last, First")
            name_parts = name_field.split(',')
            last_name = name_parts[0].strip() if len(name_parts) > 0 else ""
            first_name = name_parts[1].strip() if len(name_parts) > 1 else ""

            # Parse and Clean Instrument
            section_raw = row.get('SECTION', '').strip()
            instrument = clean_instrument_name(section_raw)
            if not instrument:
                instrument = "Unknown"

            # Parse Grade and formulate Notes
            grade_raw = row.get('GRADE', '').strip()
            grade_match = re.match(r"(\d{4})\s*\((.*?)\)", grade_raw)
            if grade_match:
                grad_year = grade_match.group(1)
                grade_level = grade_match.group(2).capitalize()
                notes = f"Class of {grad_year}. A {grade_level} in {target_year}"
            else:
                notes = grade_raw

            # Determine Label
            category = get_category(instrument)
            label = f"{target_year} {category} ::: Marching Band ::: * myContacts"

            # Build the output row
            out_row = {key: "" for key in google_headers}
            out_row["First Name"] = first_name
            out_row["Last Name"] = f"{last_name} ({instrument})"
            out_row["Notes"] = notes
            out_row["Labels"] = label

            # Map Email
            email = row.get('EMAIL', '').strip()
            if email:
                out_row["E-mail 1 - Label"] = "Home"
                out_row["E-mail 1 - Value"] = email

            # Map Phone
            phone = row.get('PHONE', '').strip()
            if phone:
                out_row["Phone 1 - Label"] = "Mobile"
                out_row["Phone 1 - Value"] = format_phone(phone)

            writer.writerow(out_row)

if __name__ == '__main__':
    main()