Tech Stack
- Frontend: HTML, CSS, Bootstrap
- Backend: Node.js, Express.js, Multer, Tesseract.js, pdf-lib
- OCR Library: Tesseract.js
- PDF Library: pdf-lib
Setup Instructions
1. Install Dependencies
Backend
Navigate to the backend directory and install the required packages:
cd backend
npm install
Frontend
You can use the same directory for serving static files or set up a separate frontend directory. No additional dependencies are needed for the provided HTML.
2. Configure the Backend
Ensure that you have the following packages installed:
npm install express cors multer pdf-lib tesseract.js pdf-poppler
3. Start the Backend Server
Run the server:
node server.js
4. Access the Application
Open your web browser and navigate to http://localhost:3000
. You should see the frontend UI where you can upload a PDF file.
How It Works
-
Frontend:
- Users select a PDF file using the file input.
- Upon form submission, the file is sent to the backend via a POST request.
- A loading spinner is displayed while the PDF is being processed.
-
Backend:
- Receives the PDF file, converts it to images if necessary, and performs OCR using Tesseract.js.
- Extracts text from the PDF images and returns the result to the frontend.
Frontend Code
Here is the index.html
file used for the frontend:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>PDF Upload and OCR</title>
<link href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css" rel="stylesheet">
<style>
.container {
max-width: 600px;
margin: auto;
padding: 20px;
}
.file-label {
display: block;
margin: 10px 0;
}
.submit-btn {
margin-top: 10px;
}
#loading {
display: none;
text-align: center;
}
</style>
</head>
<body>
<div class="container">
<h1 class="text-center">Upload a PDF for OCR</h1>
<form id="uploadForm" enctype="multipart/form-data">
<div class="form-group">
<label for="file" class="file-label">Select PDF</label>
<input type="file" id="file" name="file" class="form-control-file" accept=".pdf" required>
</div>
<button type="submit" class="btn btn-primary submit-btn">Upload</button>
</form>
<div id="loading">
<div class="spinner-border text-primary" role="status">
<span class="sr-only">Loading...</span>
</div>
<p>Processing...</p>
</div>
<div id="result-container" class="mt-4">
<h2>OCR Result</h2>
<pre id="result"></pre>
</div>
</div>
<script src="https://code.jquery.com/jquery-3.5.1.slim.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/@popperjs/core@2.5.4/dist/umd/popper.min.js"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"></script>
<script>
document.getElementById('uploadForm').addEventListener('submit', async function(event) {
event.preventDefault(); // Prevent default form submission
const formData = new FormData();
const fileInput = document.getElementById('file');
const file = fileInput.files[0];
formData.append("file", file);
const requestOptions = {
method: "POST",
body: formData,
redirect: "follow"
};
// Show loading animation
document.getElementById('loading').style.display = 'block';
try {
const response = await fetch("http://localhost:3000/ocr", requestOptions);
if (response.ok) {
const result = await response.json();
document.getElementById('result').textContent = result.text;
} else {
document.getElementById('result').textContent = 'Error: ' + response.statusText;
}
} catch (error) {
document.getElementById('result').textContent = 'Error: ' + error.message;
} finally {
// Hide loading animation
document.getElementById('loading').style.display = 'none';
}
});
</script>
</body>
</html>
Backend Code
Here is the server.js
file used for the backend:
const express = require("express");
const cors = require("cors");
const multer = require("multer");
const { PDFDocument } = require("pdf-lib");
const Tesseract = require("tesseract.js");
const path = require("path");
const fs = require("fs");
const pdfPoppler = require("pdf-poppler");
const app = express();
const port = 3000;
// Enable CORS for all origins
app.use(cors());
const storage = multer.memoryStorage();
const upload = multer({ storage: storage });
async function convertPdfToImages(pdfPath) {
const outputDir = path.join(__dirname, "output");
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir);
}
const options = {
format: "png",
out_dir: outputDir,
out_prefix: "page",
page_range: "1-",
};
try {
await pdfPoppler.convert(pdfPath, options);
return fs
.readdirSync(outputDir)
.filter((file) => file.endsWith(".png"))
.map((file) => path.join(outputDir, file));
} catch (error) {
console.error("Error converting PDF to images:", error);
throw error;
}
}
// Sample route
app.get("/", (req, res) => {
res.send("Hello World!");
});
// POST route for OCR
app.post("/ocr", upload.single("file"), async (req, res) => {
if (!req.file) {
return res.status(400).send("No file uploaded.");
}
try {
const tempPdfPath = path.join(__dirname, "temp.pdf");
fs.writeFileSync(tempPdfPath, req.file.buffer);
const imagePaths = await convertPdfToImages(tempPdfPath);
const texts = await Promise.all(
imagePaths.map(async (imagePath) => {
const {
data: { text },
} = await Tesseract.recognize(imagePath, "eng", {
logger: (info) => console.log(info),
});
return text;
})
);
fs.unlinkSync(tempPdfPath);
imagePaths.forEach((imagePath) => fs.unlinkSync(imagePath));
res.json({ text: texts.join("\n") });
} catch (error) {
console.error(error);
res.status(500).send("Error processing file.");
}
});
// 404 error handler
app.use((req, res) => {
res.status(404).send("Not Found");
});
app.listen(port, () => {
console.log(`Server running at http://localhost:${port}`);
});
Top comments (0)