const { convert } = require('html-to-text'); const fs = require('fs'); const { format } = require('path'); const options = { // wordwrap: 130, selectors:[ {selector: 'img', format : 'skip'}, // Skip images ] }; const folder='../raw/html'; const files = fs.readdirSync(folder).filter(file => file.endsWith('.html')); console.log(`Found ${files.length} HTML files in ${folder}`); files.forEach(file => { const filePath = `${folder}/${file}`; const html = fs.readFileSync(filePath, 'utf8'); const text = convert(html, options); const outputFilePath = filePath.replaceAll('html', 'txt'); fs.writeFileSync(outputFilePath, text, 'utf8'); console.log(`Converted ${file} to ${outputFilePath}`); });