41 KiB
41 KiB
Code
Copy and past the following code into your appscript.
You can review the documentation for all the codes functions here.
// Code.gs - Gmail to Google Sheets Export Script with Real-time Output
/**
* =====================================
* CONFIGURATION SECTION
* =====================================
* Modify these variables to customize the script behavior
*/
// Email Label Configuration
const LABEL_NAME = "ocm_autopull"; // Gmail label to process
// Processing Configuration
const BATCH_SIZE = 10; // Number of emails to write to sheet immediately (smaller for real-time output)
const MAX_EXECUTION_TIME = 5 * 60 * 1000; // Maximum script run time (5 minutes)
// Spreadsheet Configuration
const SPREADSHEET_NAME_PREFIX = "OCM_Contact_Export"; // Prefix for generated spreadsheet name
const MAIN_SHEET_NAME = "OCM Contact Export"; // Name of the main data sheet
const ERROR_SHEET_NAME = "Errors"; // Name of the error log sheet
const LOG_SHEET_NAME = "Processing Log"; // Name of the processing log sheet
// Generate unique sheet names with timestamp to avoid conflicts
function getUniqueSheetNames() {
var timestamp = Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "MM-dd_HH-mm");
return {
mainSheet: MAIN_SHEET_NAME,
errorSheet: `${ERROR_SHEET_NAME}_${timestamp}`,
logSheet: `${LOG_SHEET_NAME}_${timestamp}`
};
}
// Column Headers Configuration
const SHEET_HEADERS = [
"Thread Number", "Confirmation Number", "Submission Date", "First Name", "Last Name",
"Email Address", "Phone Number", "Fax", "Organization", "Street Address",
"Street Address (cont.)", "City", "Province", "Country", "Postal Code",
"Full Address", "Subject", "Comments", "Gmail Date", "Gmail From",
"Gmail Subject", "Thread ID", "Email ID"
];
// Error Log Headers
const ERROR_HEADERS = ["Thread Number", "Error Type", "Error Details", "Email Date", "Email From", "Timestamp"];
// Processing Log Headers
const LOG_HEADERS = ["Timestamp", "Thread Number", "Action", "Details", "Status"];
// Validation Configuration - RELAXED FOR LABELED EMAILS
const REQUIRED_OCM_PHRASES = ["Contact Council web form"]; // Only require this phrase since all labeled emails should be OCM
const REPLY_INDICATORS = ["re:", "fwd:", "fw:"]; // Subject line indicators of replies/forwards
const SKIP_SENDER_DOMAINS = ["edmonton.ca"]; // Skip emails from these domains
const REQUIRED_FIELDS = []; // No required fields - process all labeled emails
// Address Construction Configuration
const EXCLUDE_COUNTRIES = ["canada"]; // Countries to exclude from full address (case insensitive)
// Progress Logging Configuration
const PROGRESS_LOG_INTERVAL = 10; // Log progress every N threads
const TIME_CHECK_INTERVAL = 10; // Check execution time every N threads
// Resume Configuration
const RESUME_DELAY_MINUTES = 1; // Minutes to wait before auto-resuming
// Global variables for sheets (will be initialized in main function)
var dataSheet, errorSheet, logSheet;
/**
* =====================================
* MAIN EXPORT FUNCTION
* =====================================
*/
/**
* Main function to export emails by label to Google Sheets with real-time output
*/
function downloadEmailsByLabel() {
var startTime = new Date().getTime();
try {
// Check if we're resuming from a checkpoint
var checkpoint = getCheckpoint();
var isResume = checkpoint && checkpoint.spreadsheetId;
var spreadsheet, label, allThreads;
var processedCount = 0;
var startIndex = 0;
var totalEmails = 0;
if (isResume) {
// Resume from checkpoint
logMessage("RESUME", 0, "Resuming export from checkpoint", `Processed ${checkpoint.processedCount} threads`);
spreadsheet = SpreadsheetApp.openById(checkpoint.spreadsheetId);
// FIXED: Initialize sheets for resume (this will now preserve existing data)
initializeSheets(spreadsheet, true); // Pass true to indicate this is a resume
label = GmailApp.getUserLabelByName(LABEL_NAME);
allThreads = getAllThreadsForLabel(label);
processedCount = checkpoint.processedCount;
startIndex = checkpoint.processedCount;
totalEmails = checkpoint.totalEmailsFound || 0;
} else {
// Start fresh - Create spreadsheet and sheets immediately
logMessage("START", 0, "Starting new export", "Creating spreadsheet and sheets");
// Get the label first
label = GmailApp.getUserLabelByName(LABEL_NAME);
if (!label) {
throw new Error(`Label "${LABEL_NAME}" not found. Please check the label name.`);
}
// Create spreadsheet with timestamp - ALWAYS NEW SPREADSHEET
var timestamp = Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "yyyy-MM-dd_HH-mm-ss");
var spreadsheetName = `${SPREADSHEET_NAME_PREFIX}_${timestamp}`;
spreadsheet = SpreadsheetApp.create(spreadsheetName);
// ✅ OUTPUT SPREADSHEET URL IMMEDIATELY
var spreadsheetUrl = spreadsheet.getUrl();
Logger.log("🔗 NEW SPREADSHEET CREATED!");
Logger.log("📊 Spreadsheet Name: " + spreadsheet.getName());
Logger.log("🌐 Spreadsheet URL: " + spreadsheetUrl);
Logger.log("📅 Created: " + new Date().toString());
Logger.log("=".repeat(80));
// FIXED: Initialize all sheets (creates fresh sheets with unique names)
initializeSheets(spreadsheet, false); // Pass false to indicate this is a fresh start
logMessage("INIT", 0, "Spreadsheet created", `Name: ${spreadsheet.getName()}`);
logMessage("INIT", 0, "Spreadsheet URL", spreadsheetUrl);
// Get all threads
logMessage("SCAN", 0, "Getting all threads for label", LABEL_NAME);
allThreads = getAllThreadsForLabel(label);
logMessage("SCAN", 0, "Thread scan complete", `Found ${allThreads.length} total threads`);
// Save initial checkpoint
saveCheckpoint({
spreadsheetId: spreadsheet.getId(),
labelName: LABEL_NAME,
totalThreads: allThreads.length,
processedCount: 0,
totalEmailsFound: 0,
startTime: startTime
});
}
// Process threads starting from checkpoint
for (var i = startIndex; i < allThreads.length; i++) {
// Check execution time every 10 threads
if (i % TIME_CHECK_INTERVAL === 0) {
var currentTime = new Date().getTime();
var elapsedTime = currentTime - startTime;
if (elapsedTime > MAX_EXECUTION_TIME) {
logMessage("CHECKPOINT", i, "Approaching time limit", `Saving checkpoint at thread ${i}`);
// Save checkpoint and schedule continuation
saveCheckpoint({
spreadsheetId: spreadsheet.getId(),
labelName: LABEL_NAME,
totalThreads: allThreads.length,
processedCount: i,
totalEmailsFound: totalEmails,
startTime: startTime
});
// Schedule next run
scheduleResume();
logMessage("CHECKPOINT", i, "Checkpoint saved", `Processed ${i}/${allThreads.length} threads, found ${totalEmails} emails. Next batch in 1 minute`);
Logger.log(`Checkpoint saved. Processed ${i}/${allThreads.length} threads, found ${totalEmails} emails`);
Logger.log(`Next batch will be triggered automatically in 1 minute`);
return spreadsheet.getUrl();
}
}
try {
var thread = allThreads[i];
var messages = thread.getMessages();
var threadNumber = i + 1;
logMessage("PROCESS", threadNumber, "Processing thread", `${messages.length} messages`);
// Sort messages by date to ensure we get the original email
messages.sort(function(a, b) {
return a.getDate().getTime() - b.getDate().getTime();
});
// Only process the FIRST email (root email) in each thread
if (messages.length > 0) {
var rootMessage = messages[0];
var rowData = parseOCMContactForm(rootMessage, thread, threadNumber);
// Process ALL labeled emails (they should all be OCM forms)
if (rowData) {
// Write immediately to sheet (real-time output)
writeRowToSheet(dataSheet, rowData);
totalEmails++;
logMessage("SUCCESS", threadNumber, "Email processed", `${rowData[3]} ${rowData[4]} - Confirmation: ${rowData[1]}`);
// Update checkpoint periodically
if (totalEmails % BATCH_SIZE === 0) {
saveCheckpoint({
spreadsheetId: spreadsheet.getId(),
labelName: LABEL_NAME,
totalThreads: allThreads.length,
processedCount: i + 1,
totalEmailsFound: totalEmails,
startTime: startTime
});
}
} else {
logMessage("SKIP", threadNumber, "Email skipped", "Did not meet processing criteria");
}
} else {
logMessage("SKIP", threadNumber, "Thread skipped", "No messages found");
}
processedCount = i + 1;
// Log progress every configured interval
if ((i + 1) % PROGRESS_LOG_INTERVAL === 0) {
logMessage("PROGRESS", i + 1, "Progress update", `Processed ${i + 1}/${allThreads.length} threads, found ${totalEmails} emails`);
Logger.log(`Processed ${i + 1}/${allThreads.length} threads, found ${totalEmails} emails`);
}
} catch (threadError) {
var errorMsg = `Error processing thread ${i + 1}: ${threadError.toString()}`;
Logger.log(errorMsg);
logMessage("ERROR", i + 1, "Thread processing error", threadError.toString());
logError(i + 1, "Thread Processing Error", threadError.toString(), "Unknown", "Unknown");
continue;
}
}
// Format the sheets
formatSheet(dataSheet, dataSheet.getLastRow());
// Clear checkpoint - we're done!
clearCheckpoint();
logMessage("COMPLETE", allThreads.length, "Export completed", `Processed ${totalEmails} emails from ${allThreads.length} threads`);
Logger.log(`✅ EXPORT COMPLETE! Processed ${totalEmails} emails from ${allThreads.length} threads`);
Logger.log(`Spreadsheet URL: ${spreadsheet.getUrl()}`);
// Send completion notification
GmailApp.sendEmail(
Session.getActiveUser().getEmail(),
`✅ OCM Contact Form Export COMPLETE - ${LABEL_NAME}`,
`OCM Contact form export completed successfully!\n\n` +
`Label: ${LABEL_NAME}\n` +
`Threads processed: ${allThreads.length}\n` +
`Emails found: ${totalEmails}\n` +
`Spreadsheet: ${spreadsheet.getName()}\n` +
`URL: ${spreadsheet.getUrl()}\n\n` +
`Your OCM contact form data is now fully parsed and organized!\n` +
`Check the "Errors" and "Processing Log" sheets for details.`
);
return spreadsheet.getUrl();
} catch (error) {
var errorMsg = `❌ Error: ${error.toString()}`;
Logger.log(errorMsg);
if (typeof logMessage === 'function') {
logMessage("FATAL", 0, "Fatal error occurred", error.toString());
}
// Send error notification
GmailApp.sendEmail(
Session.getActiveUser().getEmail(),
"❌ OCM Export Error",
`An error occurred during export:\n\n${error.toString()}\n\nCheckpoint data preserved for resume.`
);
throw error;
}
}
/**
* FIXED: Initialize all sheets in the spreadsheet - Now preserves data when resuming
* @param {Spreadsheet} spreadsheet - The spreadsheet object
* @param {boolean} isResume - Whether this is a resume operation (true) or fresh start (false)
*/
function initializeSheets(spreadsheet, isResume) {
try {
if (isResume) {
// RESUMING: Connect to existing sheets without clearing data
Logger.log("🔄 Resuming - connecting to existing sheets...");
// Find existing sheets by name patterns
var sheets = spreadsheet.getSheets();
dataSheet = null;
errorSheet = null;
logSheet = null;
// Find the main data sheet
for (var i = 0; i < sheets.length; i++) {
var sheetName = sheets[i].getName();
if (sheetName === MAIN_SHEET_NAME || sheetName.includes("OCM Contact Export")) {
dataSheet = sheets[i];
Logger.log(`📋 Found existing data sheet: "${sheetName}"`);
break;
}
}
// Find error and log sheets (look for most recent ones)
for (var i = 0; i < sheets.length; i++) {
var sheetName = sheets[i].getName();
if (sheetName.includes("Errors") && !errorSheet) {
errorSheet = sheets[i];
Logger.log(`❌ Found existing error sheet: "${sheetName}"`);
}
if (sheetName.includes("Log") && !logSheet) {
logSheet = sheets[i];
Logger.log(`📝 Found existing log sheet: "${sheetName}"`);
}
}
// If we can't find existing sheets, create new ones (fallback)
if (!dataSheet) {
Logger.log("⚠️ Data sheet not found, creating new one...");
dataSheet = spreadsheet.getActiveSheet();
if (dataSheet.getName() !== MAIN_SHEET_NAME) {
dataSheet.setName(MAIN_SHEET_NAME);
}
// Only add headers if the sheet is empty
if (dataSheet.getLastRow() === 0) {
dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setValues([SHEET_HEADERS]);
dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setFontWeight("bold");
dataSheet.setFrozenRows(1);
}
}
if (!errorSheet) {
var timestamp = Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "MM-dd_HH-mm");
errorSheet = spreadsheet.insertSheet(`${ERROR_SHEET_NAME}_${timestamp}`);
errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setValues([ERROR_HEADERS]);
errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setFontWeight("bold");
errorSheet.setFrozenRows(1);
}
if (!logSheet) {
var timestamp = Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "MM-dd_HH-mm");
logSheet = spreadsheet.insertSheet(`${LOG_SHEET_NAME}_${timestamp}`);
logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setValues([LOG_HEADERS]);
logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setFontWeight("bold");
logSheet.setFrozenRows(1);
}
Logger.log(`✅ Resume initialization complete! Data preserved.`);
Logger.log(` 📊 Data rows: ${dataSheet.getLastRow() - 1}`);
} else {
// FRESH START: Create new sheets with unique names
var sheetNames = getUniqueSheetNames();
Logger.log("🔧 Fresh start - creating new sheets with unique names:");
Logger.log(` 📋 Main: "${sheetNames.mainSheet}"`);
Logger.log(` ❌ Errors: "${sheetNames.errorSheet}"`);
Logger.log(` 📝 Log: "${sheetNames.logSheet}"`);
// Set up main data sheet (rename the default sheet)
dataSheet = spreadsheet.getActiveSheet();
dataSheet.setName(sheetNames.mainSheet);
// Clear any existing content and set up headers for data sheet
dataSheet.clear();
dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setValues([SHEET_HEADERS]);
dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setFontWeight("bold");
dataSheet.setFrozenRows(1);
// Create error sheet with unique name
errorSheet = spreadsheet.insertSheet(sheetNames.errorSheet);
errorSheet.clear();
errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setValues([ERROR_HEADERS]);
errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setFontWeight("bold");
errorSheet.setFrozenRows(1);
// Create log sheet with unique name
logSheet = spreadsheet.insertSheet(sheetNames.logSheet);
logSheet.clear();
logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setValues([LOG_HEADERS]);
logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setFontWeight("bold");
logSheet.setFrozenRows(1);
Logger.log(`✅ Fresh sheets created successfully!`);
}
} catch (error) {
Logger.log(`❌ Error initializing sheets: ${error.toString()}`);
Logger.log(`🔧 Attempting alternative sheet creation method...`);
// Fallback: Try with even more unique names
try {
var fallbackTimestamp = new Date().getTime();
if (!dataSheet) {
dataSheet = spreadsheet.getActiveSheet();
dataSheet.setName(`Data_${fallbackTimestamp}`);
if (!isResume) {
dataSheet.clear();
}
// Only add headers if the sheet is empty
if (dataSheet.getLastRow() === 0) {
dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setValues([SHEET_HEADERS]);
dataSheet.getRange(1, 1, 1, SHEET_HEADERS.length).setFontWeight("bold");
dataSheet.setFrozenRows(1);
}
}
if (!errorSheet) {
errorSheet = spreadsheet.insertSheet(`Errors_${fallbackTimestamp}`);
errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setValues([ERROR_HEADERS]);
errorSheet.getRange(1, 1, 1, ERROR_HEADERS.length).setFontWeight("bold");
errorSheet.setFrozenRows(1);
}
if (!logSheet) {
logSheet = spreadsheet.insertSheet(`Log_${fallbackTimestamp}`);
logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setValues([LOG_HEADERS]);
logSheet.getRange(1, 1, 1, LOG_HEADERS.length).setFontWeight("bold");
logSheet.setFrozenRows(1);
}
Logger.log(`✅ Fallback sheet creation successful!`);
} catch (fallbackError) {
Logger.log(`❌ Fallback also failed: ${fallbackError.toString()}`);
throw fallbackError;
}
}
}
/**
* Write a single row of data immediately to the sheet
*/
function writeRowToSheet(sheet, rowData) {
try {
var lastRow = sheet.getLastRow();
sheet.getRange(lastRow + 1, 1, 1, rowData.length).setValues([rowData]);
} catch (error) {
Logger.log(`Error writing row to sheet: ${error.toString()}`);
throw error;
}
}
/**
* Log a message to the processing log sheet
*/
function logMessage(action, threadNumber, message, details) {
try {
if (!logSheet) return; // Skip if log sheet not initialized yet
var timestamp = new Date();
var status = (action === "ERROR" || action === "FATAL") ? "ERROR" : "SUCCESS";
var logData = [timestamp, threadNumber, action, message + (details ? ": " + details : ""), status];
var lastRow = logSheet.getLastRow();
logSheet.getRange(lastRow + 1, 1, 1, logData.length).setValues([logData]);
// Also log to console for immediate feedback
Logger.log(`${action} - Thread ${threadNumber}: ${message}${details ? " - " + details : ""}`);
} catch (error) {
Logger.log(`Error writing to log sheet: ${error.toString()}`);
}
}
/**
* Log an error to the error sheet
*/
function logError(threadNumber, errorType, errorDetails, emailDate, emailFrom) {
try {
if (!errorSheet) return; // Skip if error sheet not initialized yet
var timestamp = new Date();
var errorData = [threadNumber, errorType, errorDetails, emailDate, emailFrom, timestamp];
var lastRow = errorSheet.getLastRow();
errorSheet.getRange(lastRow + 1, 1, 1, errorData.length).setValues([errorData]);
} catch (error) {
Logger.log(`Error writing to error sheet: ${error.toString()}`);
}
}
/**
* Gets ALL threads for a label, handling Gmail's pagination limits
*/
function getAllThreadsForLabel(label) {
var allThreads = [];
var start = 0;
var maxResults = 500; // Gmail's max per request
do {
var threads = label.getThreads(start, maxResults);
allThreads = allThreads.concat(threads);
start += threads.length;
logMessage("SCAN", 0, "Retrieved thread batch", `${threads.length} threads (total so far: ${allThreads.length})`);
// Break if we got fewer than max results (end of results)
if (threads.length < maxResults) {
break;
}
// Brief pause between batches
Utilities.sleep(200);
} while (threads.length === maxResults);
return allThreads;
}
/**
* Parses OCM Contact form from email body and extracts structured data
* Since all emails have the label, we'll process them all and extract what we can
*/
function parseOCMContactForm(message, thread, threadNumber) {
try {
var plainBody = message.getPlainBody() || "";
var subject = (message.getSubject() || "").toLowerCase();
var from = message.getFrom() || "";
// Skip if it's clearly a reply or forward (basic filtering)
var isReply = REPLY_INDICATORS.some(function(indicator) {
return subject.includes(indicator);
});
var isFromSkippedDomain = SKIP_SENDER_DOMAINS.some(function(domain) {
return from.includes(domain);
});
if (isReply || isFromSkippedDomain) {
logMessage("SKIP", threadNumber, "Reply/forward detected", `Subject: ${subject}, From: ${from}`);
return null; // Skip this email
}
// Initialize all fields with defaults
var confirmationNumber = "";
var submissionDate = "";
var firstName = "";
var lastName = "";
var emailAddress = "";
var phoneNumber = "";
var fax = "";
var organization = "";
var streetAddress = "";
var streetAddressCont = "";
var city = "";
var province = "";
var country = "";
var postalCode = "";
var subjectField = "";
var comments = "";
// Parse each field using regex patterns - but don't fail if fields are missing
var patterns = {
confirmationNumber: /Confirmation Number:\s*([^\r\n]+?)(?:\s|$)/i,
submissionDate: /Submission Date:\s*([^\r\n]+?)(?:\s*Contact Information|$)/i,
firstName: /First Name:\s*([^\r\n]+?)(?:\s*Last Name|$)/i,
lastName: /Last Name:\s*([^\r\n]+?)(?:\s*Email Address|$)/i,
emailAddress: /Email Address:\s*([^\r\n<\s]+?)(?:<mailto:|Phone Number|\s|$)/i,
phoneNumber: /Phone Number:\s*([^\r\n]+?)(?:\s*Fax|$)/i,
fax: /Fax:\s*([^\r\n]*?)(?:\s*Organization|$)/i,
organization: /Organization:\s*([^\r\n]*?)(?:\s*Street Address|$)/i,
streetAddress: /Street Address:\s*([^\r\n]+?)(?:\s*Street Address \(cont\.\)|City:|$)/i,
streetAddressCont: /Street Address \(cont\.\):\s*([^\r\n]*?)(?:\s*City:|$)/i,
city: /City:\s*([^\r\n]+?)(?:\s*Province:|$)/i,
province: /Province:\s*([^\r\n]+?)(?:\s*Country:|$)/i,
country: /Country:\s*([^\r\n]+?)(?:\s*Postal Code:|$)/i,
postalCode: /Postal Code:\s*([^\r\n]+?)(?:\s*Message|$)/i,
subject: /Subject:\s*([^\r\n]+?)(?:\s*Comments:|$)/i,
comments: /Comments:\s*([\s\S]*?)(?=The contents of this message|$)/i
};
// Extract each field - continue even if some fields are missing
for (var field in patterns) {
var match = plainBody.match(patterns[field]);
if (match && match[1]) {
var value = match[1].trim();
// Clean up common parsing issues
value = value.replace(/^[:\s]+|[:\s]+$/g, ''); // Remove leading/trailing colons and spaces
value = value.replace(/^[>]+\s*/, ''); // Remove email quote markers
value = value.replace(/\s+/g, ' '); // Normalize whitespace
// Specific cleaning for email addresses
if (field === 'emailAddress') {
value = value.replace(/<mailto:.*?>/g, '').replace(/[<>]/g, '').trim();
}
// Specific cleaning for organization (often empty)
if (field === 'organization' && (value === '' || value === ':' || value.length < 2)) {
value = '';
}
// Specific cleaning for fax (often empty)
if (field === 'fax' && (value === '' || value === ':' || value.length < 2)) {
value = '';
}
// Clean up comments (remove extra whitespace and footer)
if (field === 'comments') {
value = value.replace(/The contents of this message.*$/i, '').trim();
value = value.replace(/^[>\s]+/gm, '').trim(); // Remove quote markers
}
// Clean up street address continuation (often empty)
if (field === 'streetAddressCont' && (value === '' || value === ':' || value.length < 2)) {
value = '';
}
// Assign to variables
switch(field) {
case 'confirmationNumber': confirmationNumber = value; break;
case 'submissionDate': submissionDate = value; break;
case 'firstName': firstName = value; break;
case 'lastName': lastName = value; break;
case 'emailAddress': emailAddress = value; break;
case 'phoneNumber': phoneNumber = value; break;
case 'fax': fax = value; break;
case 'organization': organization = value; break;
case 'streetAddress': streetAddress = value; break;
case 'streetAddressCont': streetAddressCont = value; break;
case 'city': city = value; break;
case 'province': province = value; break;
case 'country': country = value; break;
case 'postalCode': postalCode = value; break;
case 'subject': subjectField = value; break;
case 'comments': comments = value; break;
}
}
}
// Use placeholder values for missing essential data
if (!firstName || firstName.trim() === '') {
firstName = "Unknown";
}
if (!lastName || lastName.trim() === '') {
lastName = "Unknown";
}
if (!confirmationNumber || confirmationNumber.trim() === '') {
confirmationNumber = "Unknown";
}
// Construct full address
var fullAddress = constructFullAddress(streetAddress, streetAddressCont, city, province, country, postalCode);
return [
threadNumber,
confirmationNumber,
submissionDate,
firstName,
lastName,
emailAddress,
phoneNumber,
fax,
organization,
streetAddress,
streetAddressCont,
city,
province,
country,
postalCode,
fullAddress,
subjectField,
comments,
message.getDate(),
message.getFrom(),
message.getSubject(),
thread.getId(),
message.getId()
];
} catch (error) {
logError(threadNumber, "Parsing Error", error.toString(),
message.getDate ? message.getDate() : "Unknown",
message.getFrom ? message.getFrom() : "Unknown");
Logger.log(`Error parsing email for thread ${threadNumber}: ${error.toString()}`);
return [
threadNumber,
"ERROR",
"ERROR PARSING",
"Unknown",
"Unknown",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"Error: " + error.toString(),
message.getDate ? message.getDate() : new Date(),
message.getFrom ? message.getFrom() : "Unknown",
message.getSubject ? message.getSubject() : "Unknown",
thread.getId(),
message.getId ? message.getId() : "Unknown"
];
}
}
/**
* Constructs a full address from individual components
*/
function constructFullAddress(street, streetCont, city, province, country, postal) {
var addressParts = [];
// Clean and validate each component first
street = (street || "").trim();
streetCont = (streetCont || "").trim();
city = (city || "").trim();
province = (province || "").trim();
country = (country || "").trim();
postal = (postal || "").trim();
// Add street address (only if it doesn't already contain city info)
if (street && street !== "" && !street.toLowerCase().includes("city:")) {
addressParts.push(street);
}
// Add continuation of street address (only if different from main street and not empty)
if (streetCont && streetCont !== "" && streetCont !== street && !streetCont.toLowerCase().includes("city:")) {
addressParts.push(streetCont);
}
// Add city (clean up any "City:" prefix that might have snuck in)
if (city && city !== "") {
city = city.replace(/^City:\s*/i, '').trim();
if (city !== "") {
addressParts.push(city);
}
}
// Add province (clean up any prefixes)
if (province && province !== "") {
province = province.replace(/^Province:\s*/i, '').trim();
if (province !== "") {
addressParts.push(province);
}
}
// Add postal code (clean up any prefixes)
if (postal && postal !== "") {
postal = postal.replace(/^Postal Code:\s*/i, '').trim();
if (postal !== "") {
addressParts.push(postal);
}
}
// Add country only if it's not in the exclude list and not empty
if (country && country !== "") {
country = country.replace(/^Country:\s*/i, '').trim();
var isExcludedCountry = EXCLUDE_COUNTRIES.some(function(excludedCountry) {
return country.toLowerCase() === excludedCountry.toLowerCase();
});
if (country !== "" && !isExcludedCountry) {
addressParts.push(country);
}
}
return addressParts.join(", ");
}
/**
* Formats the spreadsheet for better readability
*/
function formatSheet(sheet, totalRows) {
try {
// Auto-resize columns
sheet.autoResizeColumns(1, sheet.getLastColumn());
// Set column widths for OCM data
sheet.setColumnWidth(1, 80); // Thread Number
sheet.setColumnWidth(2, 120); // Confirmation Number
sheet.setColumnWidth(3, 150); // Submission Date
sheet.setColumnWidth(4, 100); // First Name
sheet.setColumnWidth(5, 100); // Last Name
sheet.setColumnWidth(6, 200); // Email Address
sheet.setColumnWidth(7, 120); // Phone Number
sheet.setColumnWidth(8, 80); // Fax
sheet.setColumnWidth(9, 150); // Organization
sheet.setColumnWidth(10, 200); // Street Address
sheet.setColumnWidth(11, 150); // Street Address (cont.)
sheet.setColumnWidth(12, 100); // City
sheet.setColumnWidth(13, 100); // Province
sheet.setColumnWidth(14, 100); // Country
sheet.setColumnWidth(15, 100); // Postal Code
sheet.setColumnWidth(16, 300); // Full Address
sheet.setColumnWidth(17, 200); // Subject
sheet.setColumnWidth(18, 400); // Comments
// Add alternating row colors
if (totalRows > 1) {
var range = sheet.getRange(2, 1, totalRows - 1, sheet.getLastColumn());
range.applyRowBanding(SpreadsheetApp.BandingTheme.LIGHT_GREY);
}
// Format date columns
if (totalRows > 1) {
// Submission Date (column 3)
var submissionDateRange = sheet.getRange(2, 3, totalRows - 1, 1);
submissionDateRange.setNumberFormat("yyyy-mm-dd hh:mm:ss");
// Gmail Date (column 19)
var gmailDateRange = sheet.getRange(2, 19, totalRows - 1, 1);
gmailDateRange.setNumberFormat("yyyy-mm-dd hh:mm:ss");
}
} catch (error) {
Logger.log(`Error formatting sheet: ${error.toString()}`);
}
}
/**
* =====================================
* TEST FUNCTIONS
* =====================================
*/
/**
* Test function to check label and get count - RUN THIS FIRST
*/
function testLabelCount() {
try {
Logger.log("=".repeat(50));
Logger.log("GMAIL EXPORT READINESS TEST");
Logger.log("=".repeat(50));
// Test 1: Check if label exists
Logger.log("Test 1: Checking if label exists...");
var label = GmailApp.getUserLabelByName(LABEL_NAME);
if (!label) {
Logger.log(`❌ FAILED: Label "${LABEL_NAME}" not found.`);
Logger.log("\nAvailable labels:");
var labels = GmailApp.getUserLabels();
for (var i = 0; i < labels.length; i++) {
Logger.log(`- ${labels[i].getName()}`);
}
return false;
}
Logger.log(`✅ PASSED: Label "${LABEL_NAME}" found successfully`);
// Test 2: Get thread count
Logger.log("\nTest 2: Getting thread count...");
var allThreads = getAllThreadsForLabel(label);
Logger.log(`✅ PASSED: Found ${allThreads.length} threads with label "${LABEL_NAME}"`);
if (allThreads.length === 0) {
Logger.log("⚠️ WARNING: No threads found with this label. Nothing to export.");
return false;
}
// Test 3: Sample message analysis
Logger.log("\nTest 3: Analyzing sample messages...");
var sampleSize = Math.min(5, allThreads.length);
var totalSampleMessages = 0;
var threadSizes = [];
for (var i = 0; i < sampleSize; i++) {
var messages = allThreads[i].getMessages();
totalSampleMessages += messages.length;
threadSizes.push(messages.length);
}
var avgMessagesPerThread = totalSampleMessages / sampleSize;
var maxMessages = Math.max.apply(Math, threadSizes);
var minMessages = Math.min.apply(Math, threadSizes);
Logger.log(`✅ PASSED: Analyzed ${sampleSize} sample threads`);
Logger.log(` - Average messages per thread: ${avgMessagesPerThread.toFixed(1)}`);
Logger.log(` - Largest thread: ${maxMessages} messages`);
Logger.log(` - Smallest thread: ${minMessages} messages`);
// Test 4: Sample parsing
Logger.log("\nTest 4: Testing email parsing...");
var parseableCount = 0;
for (var i = 0; i < Math.min(3, allThreads.length); i++) {
var thread = allThreads[i];
var messages = thread.getMessages();
if (messages.length > 0) {
var rootMessage = messages[0];
var testData = parseOCMContactForm(rootMessage, thread, i + 1);
if (testData) {
parseableCount++;
Logger.log(` - Thread ${i + 1}: Successfully parsed data`);
}
}
}
Logger.log(`✅ PASSED: Successfully parsed ${parseableCount} sample emails`);
// Final summary
Logger.log("\n" + "=".repeat(50));
Logger.log("READINESS TEST SUMMARY");
Logger.log("=".repeat(50));
Logger.log(`Label: ${LABEL_NAME}`);
Logger.log(`Threads: ${allThreads.length}`);
Logger.log(`Sample parsing success: ${parseableCount}/${Math.min(3, allThreads.length)}`);
Logger.log("Status: ✅ READY FOR FULL EXPORT");
Logger.log("\nTo proceed with full export, run: downloadEmailsByLabel()");
return true;
} catch (error) {
Logger.log(`❌ CRITICAL ERROR: ${error.toString()}`);
return false;
}
}
/**
* Quick test with just 3 submissions to verify parsing works with real-time output - RUN THIS SECOND
*/
function testSmallSample() {
const TEST_LIMIT = 3; // Only process 3 submissions for testing
try {
Logger.log("Starting email parsing test with real-time output...");
var label = GmailApp.getUserLabelByName(LABEL_NAME);
if (!label) {
throw new Error(`Label "${LABEL_NAME}" not found`);
}
// Create test spreadsheet with all sheets
var spreadsheetName = `TEST_${SPREADSHEET_NAME_PREFIX}_${Utilities.formatDate(new Date(), Session.getScriptTimeZone(), "yyyy-MM-dd_HH-mm-ss")}`;
var spreadsheet = SpreadsheetApp.create(spreadsheetName);
// Initialize all sheets
initializeSheets(spreadsheet, false); // Fresh start for test
logMessage("TEST", 0, "Test started", `Testing with up to ${TEST_LIMIT} emails`);
// Get first few threads
var threads = label.getThreads(0, 10); // Get first 10 threads
var submissionCount = 0;
for (var i = 0; i < threads.length && submissionCount < TEST_LIMIT; i++) {
var thread = threads[i];
var messages = thread.getMessages();
var threadNumber = i + 1;
logMessage("TEST", threadNumber, "Testing thread", `${messages.length} messages`);
// Sort messages by date to get the root email
messages.sort(function(a, b) {
return a.getDate().getTime() - b.getDate().getTime();
});
if (messages.length > 0) {
var rootMessage = messages[0];
var rowData = parseOCMContactForm(rootMessage, thread, threadNumber);
// Process the email regardless (since it's labeled)
if (rowData) {
writeRowToSheet(dataSheet, rowData);
submissionCount++;
logMessage("TEST", threadNumber, "Test email processed", `${rowData[3]} ${rowData[4]} - ${rowData[1]}`);
Logger.log(`✅ Found labeled email ${submissionCount}: ${rowData[3]} ${rowData[4]} - ${rowData[1]}`);
}
}
}
if (submissionCount === 0) {
logMessage("TEST", 0, "Test completed", "No emails found in first 10 threads");
Logger.log("⚠️ No emails found in the first 10 threads. Check your label.");
return null;
}
// Format the test sheet
formatSheet(dataSheet, dataSheet.getLastRow());
logMessage("TEST", 0, "Test completed successfully", `Processed ${submissionCount} emails`);
Logger.log(`✅ Test completed successfully!`);
Logger.log(`Found and parsed ${submissionCount} labeled emails`);
Logger.log(`Test spreadsheet: ${spreadsheet.getUrl()}`);
Logger.log(`\nReview the parsed data in all sheets. If it looks good, run downloadEmailsByLabel() for full export.`);
return spreadsheet.getUrl();
} catch (error) {
Logger.log(`❌ Test failed: ${error.toString()}`);
throw error;
}
}
/**
* Checkpoint and resume functions
*/
function saveCheckpoint(checkpointData) {
try {
PropertiesService.getScriptProperties().setProperty('OCM_EXPORT_CHECKPOINT', JSON.stringify(checkpointData));
Logger.log('Checkpoint saved successfully');
} catch (error) {
Logger.log('Error saving checkpoint: ' + error.toString());
}
}
function getCheckpoint() {
try {
var checkpointString = PropertiesService.getScriptProperties().getProperty('OCM_EXPORT_CHECKPOINT');
if (checkpointString) {
return JSON.parse(checkpointString);
}
} catch (error) {
Logger.log('Error loading checkpoint: ' + error.toString());
}
return null;
}
function clearCheckpoint() {
try {
PropertiesService.getScriptProperties().deleteProperty('OCM_EXPORT_CHECKPOINT');
Logger.log('Checkpoint cleared');
} catch (error) {
Logger.log('Error clearing checkpoint: ' + error.toString());
}
}
/**
* Schedule the next run using a time-based trigger
*/
function scheduleResume() {
try {
// Delete any existing resume triggers first
var triggers = ScriptApp.getProjectTriggers();
for (var i = 0; i < triggers.length; i++) {
if (triggers[i].getHandlerFunction() === 'resumeExport') {
ScriptApp.deleteTrigger(triggers[i]);
}
}
// Create new trigger to run after configured delay
ScriptApp.newTrigger('resumeExport')
.timeBased()
.after(RESUME_DELAY_MINUTES * 60 * 1000) // Convert minutes to milliseconds
.create();
Logger.log(`Resume trigger scheduled for ${RESUME_DELAY_MINUTES} minute(s)`);
} catch (error) {
Logger.log('Error scheduling resume: ' + error.toString());
}
}
/**
* Resume function called by trigger
*/
function resumeExport() {
Logger.log('Resuming OCM export from checkpoint...');
// Clean up the trigger that called this function
var triggers = ScriptApp.getProjectTriggers();
for (var i = 0; i < triggers.length; i++) {
if (triggers[i].getHandlerFunction() === 'resumeExport') {
ScriptApp.deleteTrigger(triggers[i]);
break;
}
}
// Continue the export
downloadEmailsByLabel();
}
/**
* Manual function to check checkpoint status
*/
function checkCheckpointStatus() {
var checkpoint = getCheckpoint();
if (checkpoint) {
Logger.log('=== CHECKPOINT STATUS ===');
Logger.log(`Spreadsheet ID: ${checkpoint.spreadsheetId}`);
Logger.log(`Label: ${checkpoint.labelName}`);
Logger.log(`Progress: ${checkpoint.processedCount}/${checkpoint.totalThreads} threads`);
Logger.log(`Emails found: ${checkpoint.totalEmailsFound || 'Unknown'}`);
Logger.log(`Started: ${new Date(checkpoint.startTime)}`);
try {
var spreadsheet = SpreadsheetApp.openById(checkpoint.spreadsheetId);
Logger.log(`Spreadsheet URL: ${spreadsheet.getUrl()}`);
} catch (e) {
Logger.log(`Error accessing spreadsheet: ${e.toString()}`);
}
Logger.log(`\nTo resume manually, run: downloadEmailsByLabel()`);
Logger.log(`To clear checkpoint, run: clearCheckpoint()`);
} else {
Logger.log('No checkpoint found. No export in progress.');
}
}
/**
* Emergency function to manually clear stuck checkpoints
*/
function clearCheckpointAndTriggers() {
// Clear checkpoint
clearCheckpoint();
// Delete all resume triggers
var triggers = ScriptApp.getProjectTriggers();
var deletedCount = 0;
for (var i = 0; i < triggers.length; i++) {
if (triggers[i].getHandlerFunction() === 'resumeExport') {
ScriptApp.deleteTrigger(triggers[i]);
deletedCount++;
}
}
Logger.log(`Cleared checkpoint and deleted ${deletedCount} resume triggers`);
Logger.log('You can now start a fresh export with downloadEmailsByLabel()');
}
/**
* Utility function to get the current spreadsheet URL if one exists
*/
function getCurrentSpreadsheetUrl() {
var checkpoint = getCheckpoint();
if (checkpoint && checkpoint.spreadsheetId) {
try {
var spreadsheet = SpreadsheetApp.openById(checkpoint.spreadsheetId);
Logger.log(`Current spreadsheet URL: ${spreadsheet.getUrl()}`);
return spreadsheet.getUrl();
} catch (e) {
Logger.log(`Error accessing current spreadsheet: ${e.toString()}`);
}
} else {
Logger.log('No active export found.');
}
return null;
}