import puppeteer from 'puppeteer'; /** * 極限深度查找 (Ultimate Meta Relay Lookup) * 專門針對 ScheduledServerJS -> __bbox -> RelayPrefetchedStreamCache 的參數結構 */ function nestedLookup(obj, keys = ["thread_items", "replies", "child_posts", "threaded_replies", "post", "posts", "edges", "node", "reply_threads"]) { let results = []; if (typeof obj !== "object" || obj === null) return results; if (Array.isArray(obj)) { // 關鍵修正:檢查這是否為 RelayPrefetchedStreamCache 的呼叫陣列 // 格式通常是 ["RelayPrefetchedStreamCache", "next", null, ["key", {數據物件}]] if (obj[0] === "RelayPrefetchedStreamCache" && Array.isArray(obj[3])) { const relayData = obj[3][1]; // 取得包含數據的第二個參數 if (relayData) results = results.concat(nestedLookup(relayData, keys)); } for (const item of obj) { results = results.concat(nestedLookup(item, keys)); } } else { // 處理 __bbox 封裝結構 if (obj.__bbox?.require) { results = results.concat(nestedLookup(obj.__bbox.require, keys)); } if (obj.__bbox?.define) { results = results.concat(nestedLookup(obj.__bbox.define, keys)); } for (const k in obj) { // 匹配目標鍵值 if (keys.includes(k) && obj[k] !== null) { // 如果是列表結構,進一步遞迴 if ((k === "edges" || k === "thread_items" || k === "reply_threads") && Array.isArray(obj[k])) { results = results.concat(nestedLookup(obj[k], keys)); } else { results.push(obj[k]); } } // 繼續深度遍歷 if (typeof obj[k] === "object" && obj[k] !== null) { results = results.concat(nestedLookup(obj[k], keys)); } } } return results; } /** * 解析單一貼文資料 (支援更多 GraphQL 變體路徑) */ function parseThread(data) { if (!data) return null; // 遍歷所有可能的 Post 容器路徑 let post = data.post || data.node?.post || data.thread_items?.[0]?.post || (data.posts && data.posts[0]) || (data.caption ? data : null); // 處理 GraphQL result.data 結構 if (!post && data.result?.data) { const d = data.result.data; post = d.text_post_app_info || d.post || d; } if (!post || (!post.id && !post.pk)) return null; const mediaType = post.media_type; let images = []; let videos = []; let videoThumbnail = null; if (mediaType === 2 && post.video_versions?.length > 0) { const highestResVideo = post.video_versions.reduce((max, cur) => (cur.width > max.width ? cur : max), post.video_versions[0]); if (highestResVideo?.url) videos.push(highestResVideo.url); if (post.image_versions2?.candidates?.length > 0) videoThumbnail = post.image_versions2.candidates[0].url; } else if (mediaType === 1 && post.image_versions2?.candidates?.length > 0) { const highestResImage = post.image_versions2.candidates.reduce((max, cur) => (cur.width > max.width ? cur : max), post.image_versions2.candidates[0]); if (highestResImage?.url) images.push(highestResImage.url); } else if (mediaType === 8 && post.carousel_media?.length > 0) { post.carousel_media.forEach(m => { if (m.media_type === 1 && m.image_versions2?.candidates?.length > 0) images.push(m.image_versions2.candidates[0].url); if (m.media_type === 2 && m.video_versions?.length > 0) videos.push(m.video_versions[0].url); }); } const result = { text: post.caption?.text || post.text_post_app_info?.share_info?.quoted_post?.caption?.text || "", published_on: post.taken_at, id: post.id || post.pk, code: post.code, username: post.user?.username, // user_pic: post.user?.profile_pic_url, like_count: post.like_count || 0, reply_count: post.direct_reply_count || post.reply_count || 0, // images, // videos, // video_thumbnail: videoThumbnail, // url: post.user?.username && post.code ? `https://www.threads.net/@${post.user.username}/post/${post.code}` : null parent_post_id: post.text_post_app_info?.reply_to_author?.id || post.reply_to_post_id || null }; return result.id ? result : null; } export async function getThread(postUrl) { if (!postUrl?.includes("threads.")) throw new Error("無效的 Threads 網址"); const postCodeFromUrl = postUrl.split("/post/")[1]?.split("/")[0]; console.log(`[目標鎖定] 貼文代碼: ${postCodeFromUrl}`); const browser = await puppeteer.launch({ headless: "new", args: ['--disable-blink-features=AutomationControlled', '--no-sandbox', '--disable-setuid-sandbox'] }); try { const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); // 增加等待時間確保網路請求啟動 await page.goto(postUrl, { waitUntil: 'networkidle2', timeout: 30000 }); // 滾動是必須的,因為 Relay 串流需要滾動來觸發 JS 解析 await page.evaluate(async () => { await new Promise((resolve) => { let totalHeight = 0; let distance = 500; let timer = setInterval(() => { let scrollHeight = document.body.scrollHeight; window.scrollBy(0, distance); totalHeight += distance; if (totalHeight >= scrollHeight || totalHeight > 5000) { clearInterval(timer); resolve(); } }, 100); }); }); // 增加等待時間,讓 Streaming 區塊載入完畢 await new Promise(r => setTimeout(r, 5000)); const allScripts = await page.$$eval( 'script[type="application/json"]', (scripts) => scripts.map((s) => s.textContent) ); let allParsedItems = new Map(); allScripts.forEach((content) => { if (!content || !content.includes("ScheduledServerJS")) return; try { const data = JSON.parse(content); const rawItems = nestedLookup(data).flat(); rawItems.forEach(item => { if (!item) return; const parsed = parseThread(item); if (parsed && parsed.id) { allParsedItems.set(parsed.id, parsed); } }); } catch (e) {} }); const itemsArray = Array.from(allParsedItems.values()); const mainThread = itemsArray.find(t => t.code === postCodeFromUrl); if (!mainThread) { throw new Error("無法定位主貼文。這通常是因為 Auth 阻擋或頁面未完全渲染。"); } const authorName = mainThread.username; const replies = itemsArray.filter(t => t.code !== postCodeFromUrl && mainThread.id.includes(t.parent_post_id) ).sort((a, b) => a.published_on - b.published_on); console.log(`[解析成功] 找到主貼文,作者: ${authorName},回覆數量: ${replies.length}`); return { thread: mainThread, replies }; } finally { await browser.close(); } }