You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

30 lines
738 B

const { convert } = require('html-to-text');
const fs = require('fs');
const { format } = require('path');
const options = {
// wordwrap: 130,
selectors:[
{selector: 'img', format : 'skip'}, // Skip images
]
};
const folder='../raw/html';
const files = fs.readdirSync(folder).filter(file => file.endsWith('.html'));
console.log(`Found ${files.length} HTML files in ${folder}`);
files.forEach(file => {
const filePath = `${folder}/${file}`;
const html = fs.readFileSync(filePath, 'utf8');
const text = convert(html, options);
const outputFilePath = filePath.replaceAll('html', 'txt');
fs.writeFileSync(outputFilePath, text, 'utf8');
console.log(`Converted ${file} to ${outputFilePath}`);
});