import { jsonToAgent } from "./agent.js"; import { getThread } from "./scrapper.js"; import { searchThreads } from "./search.js"; import { writeFileSync, existsSync, mkdirSync, readdirSync, statSync } from "fs"; import dotenv from 'dotenv'; import { processData } from "./embeddings.js"; dotenv.config(); const cookies = [ { name: 'sessionid', value: '64605724719%3ALlZCmwghVyOAck%3A23%3AAYhptDoKttkGRWkpa5583neohBfLXlGfOlwPPmdP1w', domain: '.threads.com' }, { name: 'ds_user_id', value: '64605724719', domain: '.threads.com' }, { name:'csrftoken', value:'SI5YedKIeuSAgAbdtfynUwzrmSAGquxH', domain:'.threads.com' } ]; const Keywords=[ "學貸 房租 焦慮","獨立 失敗","尷尬 年齡","還沒買房","卡住 職涯", "躺平 世代","家境 差距","文組 標籤","覺醒 厭世","年輕", "慣老闆 權益","行政 刁難","學生會 被架空","職場 不平等","家長會 決定", "租屋 系統 惡意","升學 制度 不公","系統性 壓榨","黑箱 決策","系統 漏洞","校規", "SOP","行政 裁量權", "租屋 格局 壓迫","辦公室 權力","校園 空間 設計","宿舍 小 隱私","空間 不舒服", "會議室 借不到","圖書館 規定","公共空間 誰的","校車 收費 影響","畢業 退場", "人生 妥協","我的 決定","邊界 被侵犯","下班 不讀","工作 生活 平衡", "參與 沒用","開會 敷衍","假 民主","意見 被忽略","提案 被擋","連署 實質", "迷因 轉化 焦慮","幽默 反抗","厭世 創作","負面情緒 詩意","話語權 奪回", "歸屬感 飄","租來的 人生","網路 社群 溫暖","家鄉 不屬於","靈魂 避難所" ] const Version="v2"; const DEBUG_MODE=false; async function step1(){ const chooseKeywords=['人生 妥協','邊界 被侵犯','卡住 職涯','尷尬 年齡','厭世 創作']; // const keyword="工作 生活 平衡"; for(const keyword of chooseKeywords){ const searchResults = await searchThreads(keyword, 20, cookies); console.log(JSON.stringify(searchResults)); for(const url of searchResults.urls){ try{ const threadItems=await getThread(url); // console.log(JSON.stringify(threadItems)); // save to filesystem const fileName=url.split("/post/")[1].split("/")[0]; // check if directory exists const dir = `scrapped/${keyword}`; if (!existsSync(dir)){ mkdirSync(dir, { recursive: true }); } writeFileSync(`scrapped/${keyword}/${fileName}.json`, JSON.stringify(threadItems, null, 2) ); } catch(err){ console.error("Error processing", url, ":", err); } } } // const threadUrl="https://www.threads.com/@bobolove0828/post/DQ63pgpklmi"; // const threadItems=await getThread(threadUrl, cookies); // console.log(JSON.stringify(threadItems)); // const fileName=threadUrl.split("/post/")[1].split("/")[0]; // writeFileSync(`scrapped/${fileName}.json`, JSON.stringify(threadItems, null, 2) ); } async function step2(){ // list folders const folders = readdirSync('./scrapped'); console.log("Folders in raw folder:", folders); let count=2;//folders.length; for(var i=0;i=count) break; const folder=folders[i]; // check is folder const isFolder = statSync(`./scrapped/${folder}`).isDirectory(); if (!isFolder) { console.log(`Folder ${folder} is empty or does not exist.`); continue; } const files = readdirSync(`./scrapped/${folder}`); console.log(`Files in folder ${folder}:`, files); files?.forEach(async (file, index) => { if(DEBUG_MODE && index>0) return; // for testing, process only first file try{ const data=await jsonToAgent(`./scrapped/${folder}/${file}`, 'agent_v2.txt'); // console.log("Agent response for folder", folder, ":", data); // save to file const outputFilePath = `./processed_${Version}/${folder}/${file}`; // ensure directory exists if (!existsSync(`./processed_${Version}/${folder}`)){ mkdirSync(`./processed_${Version}/${folder}`, { recursive: true }); } writeFileSync(outputFilePath, JSON.stringify(data, null, 2)); console.log("Saved agent response to", outputFilePath); }catch(err){ console.error("Error processing agent for folder", folder, ":", err); } }); } } async function step3(){ const folders = readdirSync(`./processed_${Version}`); console.log("Folders in raw folder:", folders); for(const folder of folders){ // check is folder const isFolder = statSync(`./processed_${Version}/${folder}`).isDirectory(); if (!isFolder) { console.log(`Folder ${folder} is empty or does not exist.`); continue; } const files = readdirSync(`./processed_${Version}/${folder}`); console.log(`Files in folder ${folder}:`, files); files?.forEach(async (file, index) => { try{ await processData(`./processed_${Version}/${folder}/${file}`, false, `data-v3`); }catch(err){ console.error("Error processing embeddings for folder", folder, ":", err); } }); } } async function main(){ // await step1(); // await step2(); await step3(); } main();