import sharp from 'sharp' import fs from 'fs' import path from 'path' import os from 'os' interface PDFToImageOptions { density?: number saveToFile?: boolean format?: 'png' | 'jpeg' quality?: number } interface ConvertedPage { pageNumber: number buffer: Buffer width: number height: number } export async function convertPDFToImages( pdfBuffer: Buffer, options: PDFToImageOptions = {} ): Promise { const { density = 300, format = 'png', quality = 100 } = options // Try pdf2pic first, then fall back to pdf-poppler let convertedPages: ConvertedPage[] = [] try { console.log('Attempting PDF conversion with pdf2pic...') convertedPages = await convertWithPdf2pic(pdfBuffer, options) if (convertedPages.length > 0) { return convertedPages } } catch (error) { console.warn('pdf2pic conversion failed:', error) } try { console.log('Attempting PDF conversion with pdf-poppler...') convertedPages = await convertWithPdfPoppler(pdfBuffer, options) if (convertedPages.length > 0) { return convertedPages } } catch (error) { console.warn('pdf-poppler conversion failed:', error) } // If both methods fail, provide helpful error message throw new Error('PDF 轉圖片失敗:需要安裝 GraphicsMagick、ImageMagick 或 Poppler 工具。請安裝其中一個依賴項目。') } async function convertWithPdf2pic( pdfBuffer: Buffer, options: PDFToImageOptions = {} ): Promise { const { density = 300, format = 'png', quality = 100 } = options const { fromPath } = await import('pdf2pic') // Create temporary file for PDF const tempDir = os.tmpdir() const tempPdfPath = path.join(tempDir, `temp_${Date.now()}.pdf`) try { // Write PDF buffer to temporary file fs.writeFileSync(tempPdfPath, pdfBuffer) // Configure pdf2pic const convert = fromPath(tempPdfPath, { density: density, saveToFile: false, savePath: tempDir, format: format, width: 2480, // A4 at 300 DPI height: 3508 }) const convertedPages: ConvertedPage[] = [] let pageNumber = 1 // Convert all pages while (true) { try { const pageResult = await convert(pageNumber, { responseType: 'buffer' }) if (!pageResult || !pageResult.buffer) { break // No more pages } // Optimize image with Sharp let processedBuffer = pageResult.buffer if (format === 'jpeg') { processedBuffer = await sharp(pageResult.buffer) .jpeg({ quality: quality }) .toBuffer() } else { processedBuffer = await sharp(pageResult.buffer) .png({ quality: quality }) .toBuffer() } // Get image dimensions const metadata = await sharp(processedBuffer).metadata() convertedPages.push({ pageNumber, buffer: processedBuffer, width: metadata.width || 0, height: metadata.height || 0 }) pageNumber++ } catch (error) { // No more pages or conversion error console.log(`Finished converting ${pageNumber - 1} pages`) break } } return convertedPages } finally { // Clean up temporary file try { if (fs.existsSync(tempPdfPath)) { fs.unlinkSync(tempPdfPath) } } catch (cleanupError) { console.warn('Failed to clean up temporary PDF file:', cleanupError) } } } async function convertWithPdfPoppler( pdfBuffer: Buffer, options: PDFToImageOptions = {} ): Promise { const { density = 300, format = 'png' } = options // Try using pdf-poppler as alternative try { const poppler = await import('pdf-poppler') // Create temporary file for PDF const tempDir = os.tmpdir() const tempPdfPath = path.join(tempDir, `temp_${Date.now()}.pdf`) try { // Write PDF buffer to temporary file fs.writeFileSync(tempPdfPath, pdfBuffer) const popplerOptions = { format: format, out_dir: tempDir, out_prefix: `converted_${Date.now()}`, page: null, // Convert all pages png_file: format === 'png', jpeg_file: format === 'jpeg' } const convertedFiles = await poppler.convert(tempPdfPath, popplerOptions) const convertedPages: ConvertedPage[] = [] if (Array.isArray(convertedFiles)) { for (let i = 0; i < convertedFiles.length; i++) { const filePath = convertedFiles[i] try { const imageBuffer = fs.readFileSync(filePath) const metadata = await sharp(imageBuffer).metadata() convertedPages.push({ pageNumber: i + 1, buffer: imageBuffer, width: metadata.width || 0, height: metadata.height || 0 }) // Clean up converted file fs.unlinkSync(filePath) } catch (fileError) { console.warn(`Failed to process converted file ${filePath}:`, fileError) } } } return convertedPages } finally { // Clean up temporary PDF file try { if (fs.existsSync(tempPdfPath)) { fs.unlinkSync(tempPdfPath) } } catch (cleanupError) { console.warn('Failed to clean up temporary PDF file:', cleanupError) } } } catch (importError) { throw new Error('pdf-poppler 無法使用') } } export async function optimizeImageForOCR(imageBuffer: Buffer): Promise { try { // Optimize image for OCR: // 1. Convert to grayscale // 2. Increase contrast // 3. Sharpen // 4. Ensure good resolution const optimizedBuffer = await sharp(imageBuffer) .greyscale() .normalize() // Auto-level .sharpen({ sigma: 1, m1: 0.5, m2: 2, x1: 2, y2: 10 }) .png({ quality: 100 }) .toBuffer() return optimizedBuffer } catch (error) { console.error('Image optimization error:', error) // Return original buffer if optimization fails return imageBuffer } } // Helper function to estimate processing time export function estimateProcessingTime(pageCount: number): number { // Rough estimate: 3-8 seconds per page depending on complexity const baseTimePerPage = 5 // seconds const totalTime = pageCount * baseTimePerPage return Math.min(totalTime, 120) // Cap at 2 minutes } // Helper function to check if system supports PDF conversion export async function checkPDFConversionSupport(): Promise { try { // Create a minimal test PDF buffer const testPdfBuffer = Buffer.from(`%PDF-1.4 1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj 2 0 obj << /Type /Pages /Kids [3 0 R] /Count 1 >> endobj 3 0 obj << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> endobj xref 0 4 0000000000 65535 f 0000000009 00000 n 0000000074 00000 n 0000000120 00000 n trailer << /Size 4 /Root 1 0 R >> startxref 219 %%EOF`) await convertPDFToImages(testPdfBuffer) return true } catch (error) { console.warn('PDF conversion support check failed:', error) return false } }