2025-06-10 13:05:21 -06:00

41 KiB

Code

Copy and past the following code into your appscript.

You can review the documentation for all the codes functions here.

// Code.gs - Gmail to Google Sheets Export Script with Real-time Output

/**
 * =====================================
 * CONFIGURATION SECTION
 * =====================================
 * Modify these variables to customize the script behavior
 */

// Email Label Configuration
const LABEL_NAME = "ocm_autopull"; // Gmail label to process

// Processing Configuration  
const BATCH_SIZE = 10; // Number of emails to write to sheet immediately (smaller for real-time output)
const MAX_EXECUTION_TIME = 5 * 60 * 1000; // Maximum script run time (5 minutes)

// Spreadsheet Configuration
const SPREADSHEET_NAME_PREFIX = "OCM_Contact_Export"; // Prefix for generated spreadsheet name
const MAIN_SHEET_NAME = "OCM Contact Export"; // Name of the main data sheet
const ERROR_SHEET_NAME = "Errors"; // Name of the error log sheet
const LOG_SHEET_NAME = "Processing Log"; // Name of the processing log sheet

// Generate unique sheet names with timestamp to avoid conflicts
function getUniqueSheetNames() {
  var timestamp = Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "MM-dd_HH-mm");
  return {
    mainSheet: MAIN_SHEET_NAME,
    errorSheet: `${ERROR_SHEET_NAME}_${timestamp}`,
    logSheet: `${LOG_SHEET_NAME}_${timestamp}`
  };
}

// Column Headers Configuration
const SHEET_HEADERS = [
  "Thread Number", "Confirmation Number", "Submission Date", "First Name", "Last Name", 
  "Email Address", "Phone Number", "Fax", "Organization", "Street Address", 
  "Street Address (cont.)", "City", "Province", "Country", "Postal Code", 
  "Full Address", "Subject", "Comments", "Gmail Date", "Gmail From", 
  "Gmail Subject", "Thread ID", "Email ID"
];

// Error Log Headers
const ERROR_HEADERS = ["Thread Number", "Error Type", "Error Details", "Email Date", "Email From", "Timestamp"];

// Processing Log Headers
const LOG_HEADERS = ["Timestamp", "Thread Number", "Action", "Details", "Status"];

// Validation Configuration - RELAXED FOR LABELED EMAILS
const REQUIRED_OCM_PHRASES = ["Contact Council web form"]; // Only require this phrase since all labeled emails should be OCM
const REPLY_INDICATORS = ["re:", "fwd:", "fw:"]; // Subject line indicators of replies/forwards
const SKIP_SENDER_DOMAINS = ["edmonton.ca"]; // Skip emails from these domains
const REQUIRED_FIELDS = []; // No required fields - process all labeled emails

// Address Construction Configuration
const EXCLUDE_COUNTRIES = ["canada"]; // Countries to exclude from full address (case insensitive)

// Progress Logging Configuration  
const PROGRESS_LOG_INTERVAL = 10; // Log progress every N threads
const TIME_CHECK_INTERVAL = 10; // Check execution time every N threads

// Resume Configuration
const RESUME_DELAY_MINUTES = 1; // Minutes to wait before auto-resuming

// Global variables for sheets (will be initialized in main function)
var dataSheet, errorSheet, logSheet;

/**
 * =====================================
 * MAIN EXPORT FUNCTION
 * =====================================
 */

/**
 * Main function to export emails by label to Google Sheets with real-time output
 */
function downloadEmailsByLabel() {
  var startTime = new Date().getTime();
  
  try {
    // Check if we're resuming from a checkpoint
    var checkpoint = getCheckpoint();
    var isResume = checkpoint && checkpoint.spreadsheetId;
    
    var spreadsheet, label, allThreads;
    var processedCount = 0;
    var startIndex = 0;
    var totalEmails = 0;
    
    if (isResume) {
      // Resume from checkpoint
      logMessage("RESUME", 0, "Resuming export from checkpoint", `Processed ${checkpoint.processedCount} threads`);
      spreadsheet = SpreadsheetApp.openById(checkpoint.spreadsheetId);
      
      // FIXED: Initialize sheets for resume (this will now preserve existing data)
      initializeSheets(spreadsheet, true); // Pass true to indicate this is a resume
      
      label = GmailApp.getUserLabelByName(LABEL_NAME);
      allThreads = getAllThreadsForLabel(label);
      processedCount = checkpoint.processedCount;
      startIndex = checkpoint.processedCount;
      totalEmails = checkpoint.totalEmailsFound || 0;
      
    } else {
      // Start fresh - Create spreadsheet and sheets immediately
      logMessage("START", 0, "Starting new export", "Creating spreadsheet and sheets");
      
      // Get the label first
      label = GmailApp.getUserLabelByName(LABEL_NAME);
      if (!label) {
        throw new Error(`Label "${LABEL_NAME}" not found. Please check the label name.`);
      }
      
      // Create spreadsheet with timestamp - ALWAYS NEW SPREADSHEET
      var timestamp = Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "yyyy-MM-dd_HH-mm-ss");
      var spreadsheetName = `${SPREADSHEET_NAME_PREFIX}_${timestamp}`;
      spreadsheet = SpreadsheetApp.create(spreadsheetName);
      
      // ✅ OUTPUT SPREADSHEET URL IMMEDIATELY
      var spreadsheetUrl = spreadsheet.getUrl();
      Logger.log("🔗 NEW SPREADSHEET CREATED!");
      Logger.log("📊 Spreadsheet Name: " + spreadsheet.getName());
      Logger.log("🌐 Spreadsheet URL: " + spreadsheetUrl);
      Logger.log("📅 Created: " + new Date().toString());
      Logger.log("=".repeat(80));
      
      // FIXED: Initialize all sheets (creates fresh sheets with unique names)
      initializeSheets(spreadsheet, false); // Pass false to indicate this is a fresh start
      
      logMessage("INIT", 0, "Spreadsheet created", `Name: ${spreadsheet.getName()}`);
      logMessage("INIT", 0, "Spreadsheet URL", spreadsheetUrl);
      
      // Get all threads
      logMessage("SCAN", 0, "Getting all threads for label", LABEL_NAME);
      allThreads = getAllThreadsForLabel(label);
      logMessage("SCAN", 0, "Thread scan complete", `Found ${allThreads.length} total threads`);
      
      // Save initial checkpoint
      saveCheckpoint({
        spreadsheetId: spreadsheet.getId(),
        labelName: LABEL_NAME,
        totalThreads: allThreads.length,
        processedCount: 0,
        totalEmailsFound: 0,
        startTime: startTime
      });
    }
    
    // Process threads starting from checkpoint
    for (var i = startIndex; i < allThreads.length; i++) {
      // Check execution time every 10 threads
      if (i % TIME_CHECK_INTERVAL === 0) {
        var currentTime = new Date().getTime();
        var elapsedTime = currentTime - startTime;
        
        if (elapsedTime > MAX_EXECUTION_TIME) {
          logMessage("CHECKPOINT", i, "Approaching time limit", `Saving checkpoint at thread ${i}`);
          
          // Save checkpoint and schedule continuation
          saveCheckpoint({
            spreadsheetId: spreadsheet.getId(),
            labelName: LABEL_NAME,
            totalThreads: allThreads.length,
            processedCount: i,
            totalEmailsFound: totalEmails,
            startTime: startTime
          });
          
          // Schedule next run
          scheduleResume();
          
          logMessage("CHECKPOINT", i, "Checkpoint saved", `Processed ${i}/${allThreads.length} threads, found ${totalEmails} emails. Next batch in 1 minute`);
          Logger.log(`Checkpoint saved. Processed ${i}/${allThreads.length} threads, found ${totalEmails} emails`);
          Logger.log(`Next batch will be triggered automatically in 1 minute`);
          return spreadsheet.getUrl();
        }
      }
      
      try {
        var thread = allThreads[i];
        var messages = thread.getMessages();
        var threadNumber = i + 1;
        
        logMessage("PROCESS", threadNumber, "Processing thread", `${messages.length} messages`);
        
        // Sort messages by date to ensure we get the original email
        messages.sort(function(a, b) {
          return a.getDate().getTime() - b.getDate().getTime();
        });
        
        // Only process the FIRST email (root email) in each thread
        if (messages.length > 0) {
          var rootMessage = messages[0];
          var rowData = parseOCMContactForm(rootMessage, thread, threadNumber);
          
          // Process ALL labeled emails (they should all be OCM forms)
          if (rowData) {
            // Write immediately to sheet (real-time output)
            writeRowToSheet(dataSheet, rowData);
            totalEmails++;
            
            logMessage("SUCCESS", threadNumber, "Email processed", `${rowData[3]} ${rowData[4]} - Confirmation: ${rowData[1]}`);
            
            // Update checkpoint periodically
            if (totalEmails % BATCH_SIZE === 0) {
              saveCheckpoint({
                spreadsheetId: spreadsheet.getId(),
                labelName: LABEL_NAME,
                totalThreads: allThreads.length,
                processedCount: i + 1,
                totalEmailsFound: totalEmails,
                startTime: startTime
              });
            }
          } else {
            logMessage("SKIP", threadNumber, "Email skipped", "Did not meet processing criteria");
          }
        } else {
          logMessage("SKIP", threadNumber, "Thread skipped", "No messages found");
        }
        
        processedCount = i + 1;
        
        // Log progress every configured interval
        if ((i + 1) % PROGRESS_LOG_INTERVAL === 0) {
          logMessage("PROGRESS", i + 1, "Progress update", `Processed ${i + 1}/${allThreads.length} threads, found ${totalEmails} emails`);
          Logger.log(`Processed ${i + 1}/${allThreads.length} threads, found ${totalEmails} emails`);
        }
        
      } catch (threadError) {
        var errorMsg = `Error processing thread ${i + 1}: ${threadError.toString()}`;
        Logger.log(errorMsg);
        logMessage("ERROR", i + 1, "Thread processing error", threadError.toString());
        logError(i + 1, "Thread Processing Error", threadError.toString(), "Unknown", "Unknown");
        continue;
      }
    }
    
    // Format the sheets
    formatSheet(dataSheet, dataSheet.getLastRow());
    
    // Clear checkpoint - we're done!
    clearCheckpoint();
    
    logMessage("COMPLETE", allThreads.length, "Export completed", `Processed ${totalEmails} emails from ${allThreads.length} threads`);
    Logger.log(`✅ EXPORT COMPLETE! Processed ${totalEmails} emails from ${allThreads.length} threads`);
    Logger.log(`Spreadsheet URL: ${spreadsheet.getUrl()}`);
    
    // Send completion notification
    GmailApp.sendEmail(
      Session.getActiveUser().getEmail(),
      `✅ OCM Contact Form Export COMPLETE - ${LABEL_NAME}`,
      `OCM Contact form export completed successfully!\n\n` +
      `Label: ${LABEL_NAME}\n` +
      `Threads processed: ${allThreads.length}\n` +
      `Emails found: ${totalEmails}\n` +
      `Spreadsheet: ${spreadsheet.getName()}\n` +
      `URL: ${spreadsheet.getUrl()}\n\n` +
      `Your OCM contact form data is now fully parsed and organized!\n` +
      `Check the "Errors" and "Processing Log" sheets for details.`
    );
    
    return spreadsheet.getUrl();
    
  } catch (error) {
    var errorMsg = `❌ Error: ${error.toString()}`;
    Logger.log(errorMsg);
    
    if (typeof logMessage === 'function') {
      logMessage("FATAL", 0, "Fatal error occurred", error.toString());
    }
    
    // Send error notification
    GmailApp.sendEmail(
      Session.getActiveUser().getEmail(),
      "❌ OCM Export Error",
      `An error occurred during export:\n\n${error.toString()}\n\nCheckpoint data preserved for resume.`
    );
    throw error;
  }
}

/**
 * FIXED: Initialize all sheets in the spreadsheet - Now preserves data when resuming
 * @param {Spreadsheet} spreadsheet - The spreadsheet object
 * @param {boolean} isResume - Whether this is a resume operation (true) or fresh start (false)
 */
function initializeSheets(spreadsheet, isResume) {
  try {
    if (isResume) {
      // RESUMING: Connect to existing sheets without clearing data
      Logger.log("🔄 Resuming - connecting to existing sheets...");
      
      // Find existing sheets by name patterns
      var sheets = spreadsheet.getSheets();
      dataSheet = null;
      errorSheet = null;
      logSheet = null;
      
      // Find the main data sheet
      for (var i = 0; i < sheets.length; i++) {
        var sheetName = sheets[i].getName();
        if (sheetName === MAIN_SHEET_NAME || sheetName.includes("OCM Contact Export")) {
          dataSheet = sheets[i];
          Logger.log(`📋 Found existing data sheet: "${sheetName}"`);
          break;
        }
      }
      
      // Find error and log sheets (look for most recent ones)
      for (var i = 0; i < sheets.length; i++) {
        var sheetName = sheets[i].getName();
        if (sheetName.includes("Errors") && !errorSheet) {
          errorSheet = sheets[i];
          Logger.log(`❌ Found existing error sheet: "${sheetName}"`);
        }
        if (sheetName.includes("Log") && !logSheet) {
          logSheet = sheets[i];
          Logger.log(`📝 Found existing log sheet: "${sheetName}"`);
        }
      }
      
      // If we can't find existing sheets, create new ones (fallback)
      if (!dataSheet) {
        Logger.log("⚠️  Data sheet not found, creating new one...");
        dataSheet = spreadsheet.getActiveSheet();
        if (dataSheet.getName() !== MAIN_SHEET_NAME) {
          dataSheet.setName(MAIN_SHEET_NAME);
        }
        // Only add headers if the sheet is empty
        if (dataSheet.getLastRow() === 0) {
          dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setValues([SHEET_HEADERS]);
          dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setFontWeight("bold");
          dataSheet.setFrozenRows(1);
        }
      }
      
      if (!errorSheet) {
        var timestamp = Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "MM-dd_HH-mm");
        errorSheet = spreadsheet.insertSheet(`${ERROR_SHEET_NAME}_${timestamp}`);
        errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setValues([ERROR_HEADERS]);
        errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setFontWeight("bold");
        errorSheet.setFrozenRows(1);
      }
      
      if (!logSheet) {
        var timestamp = Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "MM-dd_HH-mm");
        logSheet = spreadsheet.insertSheet(`${LOG_SHEET_NAME}_${timestamp}`);
        logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setValues([LOG_HEADERS]);
        logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setFontWeight("bold");
        logSheet.setFrozenRows(1);
      }
      
      Logger.log(`✅ Resume initialization complete! Data preserved.`);
      Logger.log(`   📊 Data rows: ${dataSheet.getLastRow() - 1}`);
      
    } else {
      // FRESH START: Create new sheets with unique names
      var sheetNames = getUniqueSheetNames();
      
      Logger.log("🔧 Fresh start - creating new sheets with unique names:");
      Logger.log(`   📋 Main: "${sheetNames.mainSheet}"`);
      Logger.log(`   ❌ Errors: "${sheetNames.errorSheet}"`);
      Logger.log(`   📝 Log: "${sheetNames.logSheet}"`);
      
      // Set up main data sheet (rename the default sheet)
      dataSheet = spreadsheet.getActiveSheet();
      dataSheet.setName(sheetNames.mainSheet);
      
      // Clear any existing content and set up headers for data sheet
      dataSheet.clear();
      dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setValues([SHEET_HEADERS]);
      dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setFontWeight("bold");
      dataSheet.setFrozenRows(1);
      
      // Create error sheet with unique name
      errorSheet = spreadsheet.insertSheet(sheetNames.errorSheet);
      errorSheet.clear();
      errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setValues([ERROR_HEADERS]);
      errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setFontWeight("bold");
      errorSheet.setFrozenRows(1);
      
      // Create log sheet with unique name
      logSheet = spreadsheet.insertSheet(sheetNames.logSheet);
      logSheet.clear();
      logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setValues([LOG_HEADERS]);
      logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setFontWeight("bold");
      logSheet.setFrozenRows(1);
      
      Logger.log(`✅ Fresh sheets created successfully!`);
    }
    
  } catch (error) {
    Logger.log(`❌ Error initializing sheets: ${error.toString()}`);
    Logger.log(`🔧 Attempting alternative sheet creation method...`);
    
    // Fallback: Try with even more unique names
    try {
      var fallbackTimestamp = new Date().getTime();
      
      if (!dataSheet) {
        dataSheet = spreadsheet.getActiveSheet();
        dataSheet.setName(`Data_${fallbackTimestamp}`);
        if (!isResume) {
          dataSheet.clear();
        }
        // Only add headers if the sheet is empty
        if (dataSheet.getLastRow() === 0) {
          dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setValues([SHEET_HEADERS]);
          dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setFontWeight("bold");
          dataSheet.setFrozenRows(1);
        }
      }
      
      if (!errorSheet) {
        errorSheet = spreadsheet.insertSheet(`Errors_${fallbackTimestamp}`);
        errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setValues([ERROR_HEADERS]);
        errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setFontWeight("bold");
        errorSheet.setFrozenRows(1);
      }
      
      if (!logSheet) {
        logSheet = spreadsheet.insertSheet(`Log_${fallbackTimestamp}`);
        logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setValues([LOG_HEADERS]);
        logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setFontWeight("bold");
        logSheet.setFrozenRows(1);
      }
      
      Logger.log(`✅ Fallback sheet creation successful!`);
      
    } catch (fallbackError) {
      Logger.log(`❌ Fallback also failed: ${fallbackError.toString()}`);
      throw fallbackError;
    }
  }
}

/**
 * Write a single row of data immediately to the sheet
 */
function writeRowToSheet(sheet, rowData) {
  try {
    var lastRow = sheet.getLastRow();
    sheet.getRange(lastRow + 1, 1, 1, rowData.length).setValues([rowData]);
  } catch (error) {
    Logger.log(`Error writing row to sheet: ${error.toString()}`);
    throw error;
  }
}

/**
 * Log a message to the processing log sheet
 */
function logMessage(action, threadNumber, message, details) {
  try {
    if (!logSheet) return; // Skip if log sheet not initialized yet
    
    var timestamp = new Date();
    var status = (action === "ERROR" || action === "FATAL") ? "ERROR" : "SUCCESS";
    
    var logData = [timestamp, threadNumber, action, message + (details ? ": " + details : ""), status];
    
    var lastRow = logSheet.getLastRow();
    logSheet.getRange(lastRow + 1, 1, 1, logData.length).setValues([logData]);
    
    // Also log to console for immediate feedback
    Logger.log(`${action} - Thread ${threadNumber}: ${message}${details ? " - " + details : ""}`);
    
  } catch (error) {
    Logger.log(`Error writing to log sheet: ${error.toString()}`);
  }
}

/**
 * Log an error to the error sheet
 */
function logError(threadNumber, errorType, errorDetails, emailDate, emailFrom) {
  try {
    if (!errorSheet) return; // Skip if error sheet not initialized yet
    
    var timestamp = new Date();
    var errorData = [threadNumber, errorType, errorDetails, emailDate, emailFrom, timestamp];
    
    var lastRow = errorSheet.getLastRow();
    errorSheet.getRange(lastRow + 1, 1, 1, errorData.length).setValues([errorData]);
    
  } catch (error) {
    Logger.log(`Error writing to error sheet: ${error.toString()}`);
  }
}

/**
 * Gets ALL threads for a label, handling Gmail's pagination limits
 */
function getAllThreadsForLabel(label) {
  var allThreads = [];
  var start = 0;
  var maxResults = 500; // Gmail's max per request
  
  do {
    var threads = label.getThreads(start, maxResults);
    allThreads = allThreads.concat(threads);
    start += threads.length;
    
    logMessage("SCAN", 0, "Retrieved thread batch", `${threads.length} threads (total so far: ${allThreads.length})`);
    
    // Break if we got fewer than max results (end of results)
    if (threads.length < maxResults) {
      break;
    }
    
    // Brief pause between batches
    Utilities.sleep(200);
    
  } while (threads.length === maxResults);
  
  return allThreads;
}

/**
 * Parses OCM Contact form from email body and extracts structured data
 * Since all emails have the label, we'll process them all and extract what we can
 */
function parseOCMContactForm(message, thread, threadNumber) {
  try {
    var plainBody = message.getPlainBody() || "";
    var subject = (message.getSubject() || "").toLowerCase();
    var from = message.getFrom() || "";
    
    // Skip if it's clearly a reply or forward (basic filtering)
    var isReply = REPLY_INDICATORS.some(function(indicator) {
      return subject.includes(indicator);
    });
    
    var isFromSkippedDomain = SKIP_SENDER_DOMAINS.some(function(domain) {
      return from.includes(domain);
    });
    
    if (isReply || isFromSkippedDomain) {
      logMessage("SKIP", threadNumber, "Reply/forward detected", `Subject: ${subject}, From: ${from}`);
      return null; // Skip this email
    }
    
    // Initialize all fields with defaults
    var confirmationNumber = "";
    var submissionDate = "";
    var firstName = "";
    var lastName = "";
    var emailAddress = "";
    var phoneNumber = "";
    var fax = "";
    var organization = "";
    var streetAddress = "";
    var streetAddressCont = "";
    var city = "";
    var province = "";
    var country = "";
    var postalCode = "";
    var subjectField = "";
    var comments = "";
    
    // Parse each field using regex patterns - but don't fail if fields are missing
    var patterns = {
      confirmationNumber: /Confirmation Number:\s*([^\r\n]+?)(?:\s|$)/i,
      submissionDate: /Submission Date:\s*([^\r\n]+?)(?:\s*Contact Information|$)/i,
      firstName: /First Name:\s*([^\r\n]+?)(?:\s*Last Name|$)/i,
      lastName: /Last Name:\s*([^\r\n]+?)(?:\s*Email Address|$)/i,
      emailAddress: /Email Address:\s*([^\r\n<\s]+?)(?:<mailto:|Phone Number|\s|$)/i,
      phoneNumber: /Phone Number:\s*([^\r\n]+?)(?:\s*Fax|$)/i,
      fax: /Fax:\s*([^\r\n]*?)(?:\s*Organization|$)/i,
      organization: /Organization:\s*([^\r\n]*?)(?:\s*Street Address|$)/i,
      streetAddress: /Street Address:\s*([^\r\n]+?)(?:\s*Street Address \(cont\.\)|City:|$)/i,
      streetAddressCont: /Street Address \(cont\.\):\s*([^\r\n]*?)(?:\s*City:|$)/i,
      city: /City:\s*([^\r\n]+?)(?:\s*Province:|$)/i,
      province: /Province:\s*([^\r\n]+?)(?:\s*Country:|$)/i,
      country: /Country:\s*([^\r\n]+?)(?:\s*Postal Code:|$)/i,
      postalCode: /Postal Code:\s*([^\r\n]+?)(?:\s*Message|$)/i,
      subject: /Subject:\s*([^\r\n]+?)(?:\s*Comments:|$)/i,
      comments: /Comments:\s*([\s\S]*?)(?=The contents of this message|$)/i
    };
    
    // Extract each field - continue even if some fields are missing
    for (var field in patterns) {
      var match = plainBody.match(patterns[field]);
      if (match && match[1]) {
        var value = match[1].trim();
        
        // Clean up common parsing issues
        value = value.replace(/^[:\s]+|[:\s]+$/g, ''); // Remove leading/trailing colons and spaces
        value = value.replace(/^[>]+\s*/, ''); // Remove email quote markers
        value = value.replace(/\s+/g, ' '); // Normalize whitespace
        
        // Specific cleaning for email addresses
        if (field === 'emailAddress') {
          value = value.replace(/<mailto:.*?>/g, '').replace(/[<>]/g, '').trim();
        }
        
        // Specific cleaning for organization (often empty)
        if (field === 'organization' && (value === '' || value === ':' || value.length < 2)) {
          value = '';
        }
        
        // Specific cleaning for fax (often empty)
        if (field === 'fax' && (value === '' || value === ':' || value.length < 2)) {
          value = '';
        }
        
        // Clean up comments (remove extra whitespace and footer)
        if (field === 'comments') {
          value = value.replace(/The contents of this message.*$/i, '').trim();
          value = value.replace(/^[>\s]+/gm, '').trim(); // Remove quote markers
        }
        
        // Clean up street address continuation (often empty)
        if (field === 'streetAddressCont' && (value === '' || value === ':' || value.length < 2)) {
          value = '';
        }
        
        // Assign to variables
        switch(field) {
          case 'confirmationNumber': confirmationNumber = value; break;
          case 'submissionDate': submissionDate = value; break;
          case 'firstName': firstName = value; break;
          case 'lastName': lastName = value; break;
          case 'emailAddress': emailAddress = value; break;
          case 'phoneNumber': phoneNumber = value; break;
          case 'fax': fax = value; break;
          case 'organization': organization = value; break;
          case 'streetAddress': streetAddress = value; break;
          case 'streetAddressCont': streetAddressCont = value; break;
          case 'city': city = value; break;
          case 'province': province = value; break;
          case 'country': country = value; break;
          case 'postalCode': postalCode = value; break;
          case 'subject': subjectField = value; break;
          case 'comments': comments = value; break;
        }
      }
    }
    
    // Use placeholder values for missing essential data
    if (!firstName || firstName.trim() === '') {
      firstName = "Unknown";
    }
    if (!lastName || lastName.trim() === '') {
      lastName = "Unknown";
    }
    if (!confirmationNumber || confirmationNumber.trim() === '') {
      confirmationNumber = "Unknown";
    }
    
    // Construct full address
    var fullAddress = constructFullAddress(streetAddress, streetAddressCont, city, province, country, postalCode);
    
    return [
      threadNumber,
      confirmationNumber,
      submissionDate,
      firstName,
      lastName,
      emailAddress,
      phoneNumber,
      fax,
      organization,
      streetAddress,
      streetAddressCont,
      city,
      province,
      country,
      postalCode,
      fullAddress,
      subjectField,
      comments,
      message.getDate(),
      message.getFrom(),
      message.getSubject(),
      thread.getId(),
      message.getId()
    ];
    
  } catch (error) {
    logError(threadNumber, "Parsing Error", error.toString(), 
             message.getDate ? message.getDate() : "Unknown", 
             message.getFrom ? message.getFrom() : "Unknown");
    
    Logger.log(`Error parsing email for thread ${threadNumber}: ${error.toString()}`);
    return [
      threadNumber,
      "ERROR",
      "ERROR PARSING",
      "Unknown",
      "Unknown",
      "",
      "",
      "",
      "",
      "",
      "",
      "",
      "",
      "",
      "",
      "",
      "",
      "Error: " + error.toString(),
      message.getDate ? message.getDate() : new Date(),
      message.getFrom ? message.getFrom() : "Unknown",
      message.getSubject ? message.getSubject() : "Unknown",
      thread.getId(),
      message.getId ? message.getId() : "Unknown"
    ];
  }
}

/**
 * Constructs a full address from individual components
 */
function constructFullAddress(street, streetCont, city, province, country, postal) {
  var addressParts = [];
  
  // Clean and validate each component first
  street = (street || "").trim();
  streetCont = (streetCont || "").trim();
  city = (city || "").trim();
  province = (province || "").trim();
  country = (country || "").trim();
  postal = (postal || "").trim();
  
  // Add street address (only if it doesn't already contain city info)
  if (street && street !== "" && !street.toLowerCase().includes("city:")) {
    addressParts.push(street);
  }
  
  // Add continuation of street address (only if different from main street and not empty)
  if (streetCont && streetCont !== "" && streetCont !== street && !streetCont.toLowerCase().includes("city:")) {
    addressParts.push(streetCont);
  }
  
  // Add city (clean up any "City:" prefix that might have snuck in)
  if (city && city !== "") {
    city = city.replace(/^City:\s*/i, '').trim();
    if (city !== "") {
      addressParts.push(city);
    }
  }
  
  // Add province (clean up any prefixes)
  if (province && province !== "") {
    province = province.replace(/^Province:\s*/i, '').trim();
    if (province !== "") {
      addressParts.push(province);
    }
  }
  
  // Add postal code (clean up any prefixes)
  if (postal && postal !== "") {
    postal = postal.replace(/^Postal Code:\s*/i, '').trim();
    if (postal !== "") {
      addressParts.push(postal);
    }
  }
  
  // Add country only if it's not in the exclude list and not empty
  if (country && country !== "") {
    country = country.replace(/^Country:\s*/i, '').trim();
    var isExcludedCountry = EXCLUDE_COUNTRIES.some(function(excludedCountry) {
      return country.toLowerCase() === excludedCountry.toLowerCase();
    });
    
    if (country !== "" && !isExcludedCountry) {
      addressParts.push(country);
    }
  }
  
  return addressParts.join(", ");
}

/**
 * Formats the spreadsheet for better readability
 */
function formatSheet(sheet, totalRows) {
  try {
    // Auto-resize columns
    sheet.autoResizeColumns(1, sheet.getLastColumn());
    
    // Set column widths for OCM data
    sheet.setColumnWidth(1, 80);  // Thread Number
    sheet.setColumnWidth(2, 120); // Confirmation Number
    sheet.setColumnWidth(3, 150); // Submission Date
    sheet.setColumnWidth(4, 100); // First Name
    sheet.setColumnWidth(5, 100); // Last Name
    sheet.setColumnWidth(6, 200); // Email Address
    sheet.setColumnWidth(7, 120); // Phone Number
    sheet.setColumnWidth(8, 80);  // Fax
    sheet.setColumnWidth(9, 150); // Organization
    sheet.setColumnWidth(10, 200); // Street Address
    sheet.setColumnWidth(11, 150); // Street Address (cont.)
    sheet.setColumnWidth(12, 100); // City
    sheet.setColumnWidth(13, 100); // Province
    sheet.setColumnWidth(14, 100); // Country
    sheet.setColumnWidth(15, 100); // Postal Code
    sheet.setColumnWidth(16, 300); // Full Address
    sheet.setColumnWidth(17, 200); // Subject
    sheet.setColumnWidth(18, 400); // Comments
    
    // Add alternating row colors
    if (totalRows > 1) {
      var range = sheet.getRange(2, 1, totalRows - 1, sheet.getLastColumn());
      range.applyRowBanding(SpreadsheetApp.BandingTheme.LIGHT_GREY);
    }
    
    // Format date columns
    if (totalRows > 1) {
      // Submission Date (column 3)
      var submissionDateRange = sheet.getRange(2, 3, totalRows - 1, 1);
      submissionDateRange.setNumberFormat("yyyy-mm-dd hh:mm:ss");
      
      // Gmail Date (column 19)
      var gmailDateRange = sheet.getRange(2, 19, totalRows - 1, 1);
      gmailDateRange.setNumberFormat("yyyy-mm-dd hh:mm:ss");
    }
    
  } catch (error) {
    Logger.log(`Error formatting sheet: ${error.toString()}`);
  }
}

/**
 * =====================================
 * TEST FUNCTIONS
 * =====================================
 */

/**
 * Test function to check label and get count - RUN THIS FIRST
 */
function testLabelCount() {
  try {
    Logger.log("=".repeat(50));
    Logger.log("GMAIL EXPORT READINESS TEST");
    Logger.log("=".repeat(50));
    
    // Test 1: Check if label exists
    Logger.log("Test 1: Checking if label exists...");
    var label = GmailApp.getUserLabelByName(LABEL_NAME);
    if (!label) {
      Logger.log(`❌ FAILED: Label "${LABEL_NAME}" not found.`);
      Logger.log("\nAvailable labels:");
      var labels = GmailApp.getUserLabels();
      for (var i = 0; i < labels.length; i++) {
        Logger.log(`- ${labels[i].getName()}`);
      }
      return false;
    }
    Logger.log(`✅ PASSED: Label "${LABEL_NAME}" found successfully`);
    
    // Test 2: Get thread count
    Logger.log("\nTest 2: Getting thread count...");
    var allThreads = getAllThreadsForLabel(label);
    Logger.log(`✅ PASSED: Found ${allThreads.length} threads with label "${LABEL_NAME}"`);
    
    if (allThreads.length === 0) {
      Logger.log("⚠️  WARNING: No threads found with this label. Nothing to export.");
      return false;
    }
    
    // Test 3: Sample message analysis
    Logger.log("\nTest 3: Analyzing sample messages...");
    var sampleSize = Math.min(5, allThreads.length);
    var totalSampleMessages = 0;
    var threadSizes = [];
    
    for (var i = 0; i < sampleSize; i++) {
      var messages = allThreads[i].getMessages();
      totalSampleMessages += messages.length;
      threadSizes.push(messages.length);
    }
    
    var avgMessagesPerThread = totalSampleMessages / sampleSize;
    var maxMessages = Math.max.apply(Math, threadSizes);
    var minMessages = Math.min.apply(Math, threadSizes);
    
    Logger.log(`✅ PASSED: Analyzed ${sampleSize} sample threads`);
    Logger.log(`   - Average messages per thread: ${avgMessagesPerThread.toFixed(1)}`);
    Logger.log(`   - Largest thread: ${maxMessages} messages`);
    Logger.log(`   - Smallest thread: ${minMessages} messages`);
    
    // Test 4: Sample parsing
    Logger.log("\nTest 4: Testing email parsing...");
    var parseableCount = 0;
    for (var i = 0; i < Math.min(3, allThreads.length); i++) {
      var thread = allThreads[i];
      var messages = thread.getMessages();
      if (messages.length > 0) {
        var rootMessage = messages[0];
        var testData = parseOCMContactForm(rootMessage, thread, i + 1);
        if (testData) {
          parseableCount++;
          Logger.log(`   - Thread ${i + 1}: Successfully parsed data`);
        }
      }
    }
    Logger.log(`✅ PASSED: Successfully parsed ${parseableCount} sample emails`);
    
    // Final summary
    Logger.log("\n" + "=".repeat(50));
    Logger.log("READINESS TEST SUMMARY");
    Logger.log("=".repeat(50));
    Logger.log(`Label: ${LABEL_NAME}`);
    Logger.log(`Threads: ${allThreads.length}`);
    Logger.log(`Sample parsing success: ${parseableCount}/${Math.min(3, allThreads.length)}`);
    Logger.log("Status: ✅ READY FOR FULL EXPORT");
    Logger.log("\nTo proceed with full export, run: downloadEmailsByLabel()");
    
    return true;
    
  } catch (error) {
    Logger.log(`❌ CRITICAL ERROR: ${error.toString()}`);
    return false;
  }
}

/**
 * Quick test with just 3 submissions to verify parsing works with real-time output - RUN THIS SECOND
 */
function testSmallSample() {
  const TEST_LIMIT = 3; // Only process 3 submissions for testing
  
  try {
    Logger.log("Starting email parsing test with real-time output...");
    
    var label = GmailApp.getUserLabelByName(LABEL_NAME);
    if (!label) {
      throw new Error(`Label "${LABEL_NAME}" not found`);
    }
    
    // Create test spreadsheet with all sheets
    var spreadsheetName = `TEST_${SPREADSHEET_NAME_PREFIX}_${Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "yyyy-MM-dd_HH-mm-ss")}`;
    var spreadsheet = SpreadsheetApp.create(spreadsheetName);
    
    // Initialize all sheets
    initializeSheets(spreadsheet, false); // Fresh start for test
    
    logMessage("TEST", 0, "Test started", `Testing with up to ${TEST_LIMIT} emails`);
    
    // Get first few threads
    var threads = label.getThreads(0, 10); // Get first 10 threads
    var submissionCount = 0;
    
    for (var i = 0; i < threads.length && submissionCount < TEST_LIMIT; i++) {
      var thread = threads[i];
      var messages = thread.getMessages();
      var threadNumber = i + 1;
      
      logMessage("TEST", threadNumber, "Testing thread", `${messages.length} messages`);
      
      // Sort messages by date to get the root email
      messages.sort(function(a, b) {
        return a.getDate().getTime() - b.getDate().getTime();
      });
      
      if (messages.length > 0) {
        var rootMessage = messages[0];
        var rowData = parseOCMContactForm(rootMessage, thread, threadNumber);
        
        // Process the email regardless (since it's labeled)
        if (rowData) {
          writeRowToSheet(dataSheet, rowData);
          submissionCount++;
          
          logMessage("TEST", threadNumber, "Test email processed", `${rowData[3]} ${rowData[4]} - ${rowData[1]}`);
          Logger.log(`✅ Found labeled email ${submissionCount}: ${rowData[3]} ${rowData[4]} - ${rowData[1]}`);
        }
      }
    }
    
    if (submissionCount === 0) {
      logMessage("TEST", 0, "Test completed", "No emails found in first 10 threads");
      Logger.log("⚠️  No emails found in the first 10 threads. Check your label.");
      return null;
    }
    
    // Format the test sheet
    formatSheet(dataSheet, dataSheet.getLastRow());
    
    logMessage("TEST", 0, "Test completed successfully", `Processed ${submissionCount} emails`);
    Logger.log(`✅ Test completed successfully!`);
    Logger.log(`Found and parsed ${submissionCount} labeled emails`);
    Logger.log(`Test spreadsheet: ${spreadsheet.getUrl()}`);
    Logger.log(`\nReview the parsed data in all sheets. If it looks good, run downloadEmailsByLabel() for full export.`);
    
    return spreadsheet.getUrl();
    
  } catch (error) {
    Logger.log(`❌ Test failed: ${error.toString()}`);
    throw error;
  }
}

/**
 * Checkpoint and resume functions
 */
function saveCheckpoint(checkpointData) {
  try {
    PropertiesService.getScriptProperties().setProperty('OCM_EXPORT_CHECKPOINT', JSON.stringify(checkpointData));
    Logger.log('Checkpoint saved successfully');
  } catch (error) {
    Logger.log('Error saving checkpoint: ' + error.toString());
  }
}

function getCheckpoint() {
  try {
    var checkpointString = PropertiesService.getScriptProperties().getProperty('OCM_EXPORT_CHECKPOINT');
    if (checkpointString) {
      return JSON.parse(checkpointString);
    }
  } catch (error) {
    Logger.log('Error loading checkpoint: ' + error.toString());
  }
  return null;
}

function clearCheckpoint() {
  try {
    PropertiesService.getScriptProperties().deleteProperty('OCM_EXPORT_CHECKPOINT');
    Logger.log('Checkpoint cleared');
  } catch (error) {
    Logger.log('Error clearing checkpoint: ' + error.toString());
  }
}

/**
 * Schedule the next run using a time-based trigger
 */
function scheduleResume() {
  try {
    // Delete any existing resume triggers first
    var triggers = ScriptApp.getProjectTriggers();
    for (var i = 0; i < triggers.length; i++) {
      if (triggers[i].getHandlerFunction() === 'resumeExport') {
        ScriptApp.deleteTrigger(triggers[i]);
      }
    }
    
    // Create new trigger to run after configured delay
    ScriptApp.newTrigger('resumeExport')
      .timeBased()
      .after(RESUME_DELAY_MINUTES * 60 * 1000) // Convert minutes to milliseconds
      .create();
      
    Logger.log(`Resume trigger scheduled for ${RESUME_DELAY_MINUTES} minute(s)`);
  } catch (error) {
    Logger.log('Error scheduling resume: ' + error.toString());
  }
}

/**
 * Resume function called by trigger
 */
function resumeExport() {
  Logger.log('Resuming OCM export from checkpoint...');
  
  // Clean up the trigger that called this function
  var triggers = ScriptApp.getProjectTriggers();
  for (var i = 0; i < triggers.length; i++) {
    if (triggers[i].getHandlerFunction() === 'resumeExport') {
      ScriptApp.deleteTrigger(triggers[i]);
      break;
    }
  }
  
  // Continue the export
  downloadEmailsByLabel();
}

/**
 * Manual function to check checkpoint status
 */
function checkCheckpointStatus() {
  var checkpoint = getCheckpoint();
  if (checkpoint) {
    Logger.log('=== CHECKPOINT STATUS ===');
    Logger.log(`Spreadsheet ID: ${checkpoint.spreadsheetId}`);
    Logger.log(`Label: ${checkpoint.labelName}`);
    Logger.log(`Progress: ${checkpoint.processedCount}/${checkpoint.totalThreads} threads`);
    Logger.log(`Emails found: ${checkpoint.totalEmailsFound || 'Unknown'}`);
    Logger.log(`Started: ${new Date(checkpoint.startTime)}`);
    
    try {
      var spreadsheet = SpreadsheetApp.openById(checkpoint.spreadsheetId);
      Logger.log(`Spreadsheet URL: ${spreadsheet.getUrl()}`);
    } catch (e) {
      Logger.log(`Error accessing spreadsheet: ${e.toString()}`);
    }
    
    Logger.log(`\nTo resume manually, run: downloadEmailsByLabel()`);
    Logger.log(`To clear checkpoint, run: clearCheckpoint()`);
  } else {
    Logger.log('No checkpoint found. No export in progress.');
  }
}

/**
 * Emergency function to manually clear stuck checkpoints
 */
function clearCheckpointAndTriggers() {
  // Clear checkpoint
  clearCheckpoint();
  
  // Delete all resume triggers
  var triggers = ScriptApp.getProjectTriggers();
  var deletedCount = 0;
  for (var i = 0; i < triggers.length; i++) {
    if (triggers[i].getHandlerFunction() === 'resumeExport') {
      ScriptApp.deleteTrigger(triggers[i]);
      deletedCount++;
    }
  }
  
  Logger.log(`Cleared checkpoint and deleted ${deletedCount} resume triggers`);
  Logger.log('You can now start a fresh export with downloadEmailsByLabel()');
}

/**
 * Utility function to get the current spreadsheet URL if one exists
 */
function getCurrentSpreadsheetUrl() {
  var checkpoint = getCheckpoint();
  if (checkpoint && checkpoint.spreadsheetId) {
    try {
      var spreadsheet = SpreadsheetApp.openById(checkpoint.spreadsheetId);
      Logger.log(`Current spreadsheet URL: ${spreadsheet.getUrl()}`);
      return spreadsheet.getUrl();
    } catch (e) {
      Logger.log(`Error accessing current spreadsheet: ${e.toString()}`);
    }
  } else {
    Logger.log('No active export found.');
  }
  return null;
}