import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs'; import { v4 as uuidv4 } from 'uuid'; const COLLECTION_DATA = 'data-v2'; async function clearCollection(collection){ // clear Qdrant collection const res_delete=await fetch(`http://localhost:6333/collections/${collection}`, { method: 'DELETE', }); const data_delete = await res_delete.json(); console.log(data_delete); const res=await fetch(`http://localhost:6333/collections/${collection}`, { method: 'PUT', body: JSON.stringify({ vectors: { size: 1536, distance: "Cosine" }, payload: { text: "string" } }), }); const data = await res.json(); console.log(data); } // function generateUUID() { // Public Domain/MIT // var d = new Date().getTime();//Timestamp // var d2 = (performance && performance.now && (performance.now()*1000)) || 0; // return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { // var r = Math.random() * 16; // return r | 0; // }); // } export async function processData(filepath, clear=false){ if(clear) await clearCollection(COLLECTION_DATA); console.log("Processing file for embeddings:", filepath); // fetch data from url const response = readFileSync(filepath); const json=await JSON.parse(response); json.output.forEach(async (item, index)=>{ const text=jsonToText(item); // console.log(text); const embeddings=await textToEmbeddings(text); const uuid=uuidv4(); await writeToQdrant(uuid, embeddings, { summry: item.summry, keywords: item.keywords, number: item.number, total: item.total, metadata: JSON.stringify(item.metadata), },COLLECTION_DATA); }); } async function textToEmbeddings(text){ // call embedding API const response=await fetch('https://api.openai.com/v1/embeddings', { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}` }, body: JSON.stringify({ model: "text-embedding-3-small", input: text }) }); const result = await response.json(); // console.log(result); return result.data[0]?.embedding; } function jsonToText(item){ let text = ""; text += `Summary: ${item.summry} `; text += `Keywords: ${item.keywords.join(", ")} `; text += `Order: ${item.number}/${item.total} `; text += `User: ${item.user} `; text += `Content: ${item.content.replace(/[\r\n]+/g, ' ')} `; return text; } async function writeToQdrant(id,embeddings, payload, collection){ // write embeddings to Qdrant const res=await fetch(`http://localhost:6333/collections/${collection}/points?wait=true`, { method: 'PUT', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ points: [{ id: id, vector: embeddings, payload: payload }] }) }); const data = await res.json(); console.log(data); return data; } export async function searchByText(query){ const queryEmbeddings=await textToEmbeddings(query); return await searchQdrant(queryEmbeddings); } export async function searchByTheme(themeId){ // get theme embeddings from Qdrant const res=await fetch(`http://localhost:6333/collections/${COLLECTION_THEME}/points/${themeId}`, { method: 'GET', headers: { 'Content-Type': 'application/json', } }); if (!res.ok) { const errorData = await res.json(); console.error("Search failed:", errorData); return null; } const data = await res.json(); console.log(data); return await searchQdrant(data.result.vector); } async function searchQdrant(query_embeddings){ // search Qdrant const res=await fetch(`http://localhost:6333/collections/${COLLECTION_DATA}/points/search`, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ vector: query_embeddings, limit: 50, with_payload: true, with_vector: true }) }); if (!res.ok) { const errorData = await res.json(); console.error("Search failed:", errorData); return null; } const data = await res.json(); console.log("Search Result:", data); return data.result; }