Header Ads Widget

How to Convert PDF Pages into Images and Combine Them Using Node.js

How to Convert PDF Pages into Images and Combine Them Using Node.js

When working with PDFs, you may encounter scenarios where you need to convert each page of a PDF into an image and then combine those images into a single file. This task can be accomplished efficiently using Node.js and a few powerful libraries. In this article, I'll walk you through the process using three main packages: pdf-lib, jimp, and pdf2pic

Prerequisites

  • node >= 14.x
  • graphicsmagick
  • ghostscript

To install GraphicsMagick, follow these steps

  • On macOS: Use Homebrew to install GraphicsMagick.
         brew install graphicsmagick
  • On Linux: Use your package manager to install it. For example, on Ubuntu:

         sudo apt-get install graphicsmagic
  • On Windows: Download and install GraphicsMagick from here.

Setting Up the Node.js Project

Create a new Node.js project and install the required libraries:


1. mkdir pdf-to-images
2. cd pdf-to-images
3. npm init -y
4. npm install pdf-lib jimp pdf2pic


Step 1: Reading and Counting Pages in the PDF

The first step is to read the PDF file and count the number of pages using pdf-lib.


const { PDFDocument } = require('pdf-lib');
const fs = require('fs');

async function getPdfPageCount(pdfPath) {
const pdfData = fs.readFileSync(pdfPath);
const pdfDoc = await PDFDocument.load(pdfData);
return pdfDoc.getPageCount();
}

(async () => {
const pageCount = await getPdfPageCount('sample.pdf');
console.log(`The PDF has ${pageCount} pages.`);
})();

Text Copied!



Step 2: Converting PDF Pages to Images

Now that we know how many pages are in the PDF, we can convert each page into an image using pdf2pic.


const { fromPath } = require('pdf2pic');

async function convertPdfToImages(pdfPath) {
const pdf2pic = fromPath(pdfPath, {
density: 300, // DPI
saveFilename: 'page',
savePath: '.',
format: 'png',
width: 800,
height: 1000
});

const pageCount = await getPdfPageCount(pdfPath);

for (let i = 1; i <= pageCount; i++) {
await pdf2pic(i); // Convert each page to an image
console.log(`Converted page ${i} to image.`);
}
}

(async () => {
await convertPdfToImages('sample.pdf');
})();
Text Copied!


Step 3: Combining the Images

After converting the PDF pages into images, the final step is to combine these images into a single image using jimp.


const Jimp = require('jimp');

async function combineImages(imagePaths, outputPath) {
const images = await Promise.all(imagePaths.map(path => Jimp.read(path)));
const totalHeight = images.reduce((sum, img) => sum + img.bitmap.height, 0);
const maxWidth = Math.max(...images.map(img => img.bitmap.width));

const combinedImage = new Jimp(maxWidth, totalHeight);

let yOffset = 0;
for (const img of images) {
combinedImage.composite(img, 0, yOffset);
yOffset += img.bitmap.height;
}

await combinedImage.writeAsync(outputPath);
console.log('Combined image saved to', outputPath);
}

(async () => {
const imagePaths = ['./images/page1.png', './images/page2.png']; // Add paths to all your images
await combineImages(imagePaths, 'combined-image.png');
})();



Combining all the code:- 


const { PDFDocument } = require('pdf-lib');
const fs = require('fs');
const { fromPath } = require('pdf2pic');
const Jimp = require('jimp');

// Step 1: Reading and Counting Pages in the PDF
async function getPdfPageCount(pdfPath) {
const pdfData = fs.readFileSync(pdfPath);
const pdfDoc = await PDFDocument.load(pdfData);
return pdfDoc.getPageCount();
}

// Step 2: Converting PDF Pages to Images
async function convertPdfToImages(pdfPath) {
const pdf2pic = fromPath(pdfPath, {
density: 300, // DPI
saveFilename: 'page',
savePath: '.',
format: 'png',
width: 800,
height: 1000
});

const pageCount = await getPdfPageCount(pdfPath);
const imagePaths = [];

for (let i = 1; i <= pageCount; i++) {
const outputFile = await pdf2pic(i); // Convert each page to an image
imagePaths.push(outputFile.path); // Store the path to the generated image
console.log(`Converted page ${i} to image.`);
}

return imagePaths;
}

// Step 3: Combining the Images
async function combineImages(imagePaths, outputPath) {
const images = await Promise.all(imagePaths.map(path => Jimp.read(path)));
const totalHeight = images.reduce((sum, img) => sum + img.bitmap.height, 0);
const maxWidth = Math.max(...images.map(img => img.bitmap.width));

const combinedImage = new Jimp(maxWidth, totalHeight);

let yOffset = 0;
for (const img of images) {
combinedImage.composite(img, 0, yOffset);
yOffset += img.bitmap.height;
}

await combinedImage.writeAsync(outputPath);
console.log('Combined image saved to', outputPath);
}

// Main function to execute the steps
(async () => {
const pdfPath = 'file.pdf';
const outputImagePath = 'combined-image.png';

// Convert PDF pages to images
const imagePaths = await convertPdfToImages(pdfPath);

// Combine images into a single image
await combineImages(imagePaths, outputImagePath);
})();

Text Copied!

Conclusion

By following the steps outlined above, you can successfully convert a PDF's pages into images and then combine those images into a single file using Node.js. The pdf-lib library allows us to read and count PDF pages, pdf2pic handles the conversion of pages to images, and jimp enables the combination of these images into one.

EasycodingZone_logo

Read More:-

1. How to Define and Reference APIs in Backstage: A Step-by-Step Guide

Post a Comment

0 Comments