Extracting PDF Attachments from Gmail Using JavaScript

async function getPdfFromEmail(sender, recipient, emailSubject, emailContent, saveDir = './Downloads/') {
  let attempts = 0;
  let targetEmail = null;

  // Get auth token
  const token = await fetchAuthToken(
    clientConfig.ID,
    clientConfig.SECRET,
    clientConfig.REFRESH
  );

  // Search for email
  while (!targetEmail && attempts < 5) {
    targetEmail = await searchGmail({
      auth: token,
      searchParams: `from:${sender} to:${recipient} subject:${emailSubject} ${emailContent} has:attachment`,
      detailLevel: 'complete'
    });
    
    if (!targetEmail) {
      attempts++;
      console.log(`Email not found. Trying again (${attempts}/5)`);
      await delay(5000);
    }
  }

  if (!targetEmail) {
    console.log('Email search failed after 5 attempts');
    return null;
  }

  // Ensure save directory exists
  if (!fs.existsSync(saveDir)) {
    fs.mkdirSync(saveDir, { recursive: true });
  }

  try {
    // Find PDF attachment
    const findPdf = (part) => {
      if (part.mimeType === 'application/pdf' && part.body?.attachmentId) {
        return part;
      }
      return part.parts?.find(findPdf) || null;
    };

    const pdfAttachment = findPdf(targetEmail.payload);

    if (!pdfAttachment) {
      console.log('No PDF found in email');
      return null;
    }

    // Get attachment content
    const pdfContent = await searchGmail({
      auth: token,
      emailId: targetEmail.id,
      attachmentId: pdfAttachment.body.attachmentId
    });

    if (!pdfContent?.data) {
      console.log('Failed to get PDF content');
      return null;
    }

    // Save PDF
    const fileName = pdfAttachment.filename || 'download.pdf';
    const filePath = path.join(saveDir, fileName);
    
    fs.writeFileSync(filePath, Buffer.from(pdfContent.data, 'base64'));
    
    console.log(`PDF saved: ${filePath}`);
    return filePath;
  } catch (err) {
    console.error(`Error: ${err.message}`);
    return null;
  }
}

I’m trying to get PDF attachments from Gmail but I’m running into issues with nested MIME structures. The current code finds the PDF part but gets stuck in a loop when trying to fetch the data. How can I fix this to properly handle nested structures and avoid infinite loops? Any tips on simplifying the attachment retrieval process would be great too.

I’ve had similar issues when working with Gmail’s API. In my case, the solution was to implement a recursive function that carefully traverses each MIME part without reprocessing nodes, thereby avoiding infinite loops.

For instance, this is what I used:

function findAttachment(part, mimeType) {
  if (part.mimeType === mimeType && part.body?.attachmentId) {
    return part;
  }
  if (Array.isArray(part.parts)) {
    for (let childPart of part.parts) {
      const found = findAttachment(childPart, mimeType);
      if (found) return found;
    }
  }
  return null;
}

Then you can retrieve the PDF with:

const pdfAttachment = findAttachment(targetEmail.payload, 'application/pdf');

Modularizing the API calls also helped me track down errors more easily. Using async/await consistently simplified asynchronous logic. I hope these techniques help resolve the issues you’re encountering.

hey pete, i’ve dealt wit this before. the key is to use a recursive function with a depth limit. somethin like this:

function findPdf(part, depth = 0) {
  if (depth > 10) return null;
  if (part.mimeType === 'application/pdf' && part.body?.attachmentId) return part;
  return part.parts?.find(p => findPdf(p, depth + 1)) || null;
}

this should fix ur looping issue. good luck!

I’ve faced similar challenges with Gmail’s MIME structures. One approach that worked for me was implementing a depth-first search algorithm to traverse the MIME tree. This avoids the loop issue you’re experiencing.

Here’s a simplified version of the function I used:

function findPdfAttachment(part, maxDepth = 10) {
  if (maxDepth === 0) return null;
  if (part.mimeType === 'application/pdf' && part.body?.attachmentId) {
    return part;
  }
  if (Array.isArray(part.parts)) {
    for (const childPart of part.parts) {
      const result = findPdfAttachment(childPart, maxDepth - 1);
      if (result) return result;
    }
  }
  return null;
}

This function limits the search depth to prevent infinite recursion. You can then use it like this:

const pdfAttachment = findPdfAttachment(targetEmail.payload);

This should resolve your looping issue and simplify the attachment retrieval process.