|
const { exec } = require('child_process'); |
|
const fs = require('fs'); |
|
const path = require('path'); |
|
const os = require('os'); |
|
|
|
function cleanupTempFiles(filePaths) { |
|
console.log('Starting cleanup of temporary files...'); |
|
|
|
filePaths.forEach(filePath => { |
|
try { |
|
if (fs.existsSync(filePath)) { |
|
fs.unlinkSync(filePath); |
|
console.log(`Deleted: ${path.basename(filePath)}`); |
|
} else { |
|
console.log(`File not found (already deleted?): ${path.basename(filePath)}`); |
|
} |
|
} catch (error) { |
|
console.error(`Error deleting ${path.basename(filePath)}: ${error.message}`); |
|
} |
|
}); |
|
|
|
console.log('Cleanup completed'); |
|
} |
|
|
|
function forceCleanupPdfFiles(tempDir = null) { |
|
const workingDir = tempDir || os.tmpdir(); |
|
console.log(`Force cleaning PDF files in: ${workingDir}`); |
|
const pdfPatterns = [ |
|
'first.pdf', |
|
'second.pdf', |
|
'merged.pdf', |
|
'temp_*.pdf' |
|
]; |
|
|
|
try { |
|
const files = fs.readdirSync(workingDir); |
|
let deletedCount = 0; |
|
|
|
files.forEach(file => { |
|
const filePath = path.join(workingDir, file); |
|
const isTargetPdf = pdfPatterns.some(pattern => { |
|
if (pattern.includes('*')) { |
|
const regex = new RegExp(pattern.replace('*', '.*')); |
|
return regex.test(file); |
|
} |
|
return file === pattern; |
|
}); |
|
|
|
if (isTargetPdf && fs.existsSync(filePath)) { |
|
try { |
|
fs.unlinkSync(filePath); |
|
console.log(`Force deleted: ${file}`); |
|
deletedCount++; |
|
} catch (error) { |
|
console.error(`Failed to delete ${file}: ${error.message}`); |
|
} |
|
} |
|
}); |
|
|
|
console.log(`Force cleanup completed. Deleted ${deletedCount} files.`); |
|
return { success: true, deletedCount }; |
|
|
|
} catch (error) { |
|
console.error(`Error during force cleanup: ${error.message}`); |
|
return { success: false, error: error.message }; |
|
} |
|
} |
|
|
|
function mergeDocx(firstFileName, secondFileName, outputDocx, callback) { |
|
const platform = os.platform(); |
|
const libreOfficeCmd = platform === 'darwin' ? 'soffice' : 'libreoffice'; |
|
const tempDir = os.tmpdir(); |
|
const mergedPdf = path.join(tempDir, 'merged.pdf'); |
|
const mergedDocx = path.join(tempDir, 'merged.docx'); |
|
|
|
console.log(`Platform detected: ${platform}`); |
|
console.log(`Using LibreOffice command: ${libreOfficeCmd}`); |
|
|
|
let processedFirstFile = firstFileName; |
|
let processedSecondFile = secondFileName; |
|
let tempFilesToCleanup = []; |
|
|
|
function convertDocToDocx(inputFile, callback) { |
|
const fileExt = path.extname(inputFile).toLowerCase(); |
|
|
|
if (fileExt === '.doc') { |
|
const convertCmd = `${libreOfficeCmd} --headless --convert-to docx --outdir "${tempDir}" "${inputFile}"`; |
|
const baseName = path.basename(inputFile, '.doc'); |
|
const outputDocxPath = path.join(tempDir, `${baseName}.docx`); |
|
|
|
console.log(`Converting ${inputFile} from .doc to .docx...`); |
|
|
|
exec(convertCmd, (error, stdout, stderr) => { |
|
if (error) { |
|
return callback(`Error converting ${inputFile} to DOCX: ${error.message}`); |
|
} |
|
|
|
console.log(`Step 0: Successfully converted ${inputFile} to DOCX`); |
|
tempFilesToCleanup.push(outputDocxPath); |
|
callback(null, outputDocxPath); |
|
}); |
|
} else { |
|
callback(null, inputFile); |
|
} |
|
} |
|
|
|
convertDocToDocx(firstFileName, (error, convertedFirstFile) => { |
|
if (error) { |
|
return callback(error); |
|
} |
|
|
|
processedFirstFile = convertedFirstFile; |
|
|
|
convertDocToDocx(secondFileName, (error, convertedSecondFile) => { |
|
if (error) { |
|
return callback(error); |
|
} |
|
|
|
processedSecondFile = convertedSecondFile; |
|
|
|
console.log('Doc to Docx conversion completed. Proceeding with PDF conversion...'); |
|
|
|
const convertFirstCmd = `${libreOfficeCmd} --headless --convert-to pdf --outdir "${tempDir}" "${processedFirstFile}"`; |
|
|
|
exec(convertFirstCmd, (error, stdout, stderr) => { |
|
if (error) { |
|
cleanupTempFiles(tempFilesToCleanup); |
|
return callback(`Error converting first file to PDF: ${error.message}`); |
|
} |
|
|
|
console.log('Step 1: First file converted to PDF'); |
|
|
|
const convertSecondCmd = `${libreOfficeCmd} --headless --convert-to pdf --outdir "${tempDir}" "${processedSecondFile}"`; |
|
|
|
exec(convertSecondCmd, (error, stdout, stderr) => { |
|
if (error) { |
|
cleanupTempFiles(tempFilesToCleanup); |
|
return callback(`Error converting second file to PDF: ${error.message}`); |
|
} |
|
|
|
console.log('Step 2: Second file converted to PDF'); |
|
const firstBaseName = path.basename(processedFirstFile, path.extname(processedFirstFile)); |
|
const secondBaseName = path.basename(processedSecondFile, path.extname(processedSecondFile)); |
|
const actualFirstPdf = path.join(tempDir, `${firstBaseName}.pdf`); |
|
const actualSecondPdf = path.join(tempDir, `${secondBaseName}.pdf`); |
|
|
|
const mergePdfCmd = `pdftk "${actualFirstPdf}" "${actualSecondPdf}" cat output "${mergedPdf}"`; |
|
|
|
exec(mergePdfCmd, (error, stdout, stderr) => { |
|
|
|
if (error) { |
|
cleanupTempFiles([...tempFilesToCleanup, actualFirstPdf, actualSecondPdf]); |
|
return callback(`Error merging PDFs: ${error.message}`); |
|
} |
|
|
|
console.log('Step 3: PDFs merged successfully'); |
|
const setupAndConvertCmd = ` |
|
# Check if virtual environment exists, create if not |
|
if [ ! -d "pdf_env" ]; then |
|
python3 -m venv pdf_env |
|
source pdf_env/bin/activate |
|
pip install pdf2docx |
|
else |
|
source pdf_env/bin/activate |
|
fi |
|
|
|
# Convert PDF to DOCX |
|
pdf2docx convert "${mergedPdf}" "${mergedDocx}" |
|
`; |
|
|
|
exec(setupAndConvertCmd, { shell: '/bin/bash' }, (error, stdout, stderr) => { |
|
if (error) { |
|
cleanupTempFiles([...tempFilesToCleanup, actualFirstPdf, actualSecondPdf, mergedPdf]); |
|
return callback(`Error converting PDF to DOCX: ${error.message}`); |
|
} |
|
|
|
console.log('Step 4: PDF converted to DOCX'); |
|
fs.copyFile(mergedDocx, outputDocx, (err) => { |
|
if (err) { |
|
cleanupTempFiles([...tempFilesToCleanup, actualFirstPdf, actualSecondPdf, mergedPdf, mergedDocx]); |
|
return callback(`Error copying final file: ${err.message}`); |
|
} |
|
cleanupTempFiles([...tempFilesToCleanup, actualFirstPdf, actualSecondPdf, mergedPdf, mergedDocx]); |
|
|
|
console.log(`Document merge completed successfully: ${outputDocx}`); |
|
callback(null, `Successfully merged documents into ${outputDocx}`); |
|
}); |
|
}); |
|
}); |
|
}); |
|
}); |
|
}); |
|
}); |
|
} |
|
|
|
function mergeDocxCrossPlatform(firstFileName, secondFileName, outputDocx, callback) { |
|
const platform = os.platform(); |
|
let libreOfficeCmd; |
|
|
|
switch (platform) { |
|
case 'darwin': // macOS |
|
libreOfficeCmd = 'soffice'; |
|
break; |
|
case 'linux': |
|
libreOfficeCmd = 'libreoffice'; |
|
break; |
|
case 'win32': // Windows |
|
libreOfficeCmd = '"C:\\Program Files\\LibreOffice\\program\\soffice.exe"'; |
|
break; |
|
default: |
|
return callback(`Unsupported platform: ${platform}`); |
|
} |
|
|
|
|
|
} |
|
|
|
// Usage examples: |
|
// |
|
// Basic usage: |
|
// mergeDocx('document1.docx', 'document2.docx', 'output_merged.docx', (error, result) => { |
|
// if (error) { |
|
// console.error('Error:', error); |
|
// } else { |
|
// console.log('Success:', result); |
|
// } |
|
// }); |
|
// |
|
// Force cleanup if needed: |
|
// forceCleanupPdfFiles(); // Cleans system temp directory |
|
// forceCleanupPdfFiles('/custom/temp/path'); // Cleans specific directory |
|
|
|
module.exports = { mergeDocx, mergeDocxCrossPlatform, cleanupTempFiles, forceCleanupPdfFiles }; |