You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

178 lines
4.6 KiB

import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
import { v4 as uuidv4 } from 'uuid';
const COLLECTION_DATA = 'data-v2';
async function clearCollection(collection){
// clear Qdrant collection
const res_delete=await fetch(`http://localhost:6333/collections/${collection}`, {
method: 'DELETE',
});
const data_delete = await res_delete.json();
console.log(data_delete);
const res=await fetch(`http://localhost:6333/collections/${collection}`, {
method: 'PUT',
body: JSON.stringify({
vectors: {
size: 1536,
distance: "Cosine"
},
payload: {
text: "string"
}
}),
});
const data = await res.json();
console.log(data);
}
// function generateUUID() { // Public Domain/MIT
// var d = new Date().getTime();//Timestamp
// var d2 = (performance && performance.now && (performance.now()*1000)) || 0;
// return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
// var r = Math.random() * 16;
// return r | 0;
// });
// }
export async function processData(filepath, clear=false){
if(clear) await clearCollection(COLLECTION_DATA);
console.log("Processing file for embeddings:", filepath);
// fetch data from url
const response = readFileSync(filepath);
const json=await JSON.parse(response);
json.output.forEach(async (item, index)=>{
const text=jsonToText(item);
// console.log(text);
const embeddings=await textToEmbeddings(text);
const uuid=uuidv4();
await writeToQdrant(uuid, embeddings, {
summry: item.summry,
keywords: item.keywords,
number: item.number,
total: item.total,
metadata: JSON.stringify(item.metadata),
},COLLECTION_DATA);
});
}
async function textToEmbeddings(text){
// call embedding API
const response=await fetch('https://api.openai.com/v1/embeddings', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`
},
body: JSON.stringify({
model: "text-embedding-3-small",
input: text
})
});
const result = await response.json();
// console.log(result);
return result.data[0]?.embedding;
}
function jsonToText(item){
let text = "";
text += `Summary: ${item.summry} `;
text += `Keywords: ${item.keywords.join(", ")} `;
text += `Order: ${item.number}/${item.total} `;
text += `User: ${item.user} `;
text += `Content: ${item.content.replace(/[\r\n]+/g, ' ')} `;
return text;
}
async function writeToQdrant(id,embeddings, payload, collection){
// write embeddings to Qdrant
const res=await fetch(`http://localhost:6333/collections/${collection}/points?wait=true`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
points: [{
id: id,
vector: embeddings,
payload: payload
}]
})
});
const data = await res.json();
console.log(data);
return data;
}
export async function searchByText(query){
const queryEmbeddings=await textToEmbeddings(query);
return await searchQdrant(queryEmbeddings);
}
export async function searchByTheme(themeId){
// get theme embeddings from Qdrant
const res=await fetch(`http://localhost:6333/collections/${COLLECTION_THEME}/points/${themeId}`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
}
});
if (!res.ok) {
const errorData = await res.json();
console.error("Search failed:", errorData);
return null;
}
const data = await res.json();
console.log(data);
return await searchQdrant(data.result.vector);
}
async function searchQdrant(query_embeddings){
// search Qdrant
const res=await fetch(`http://localhost:6333/collections/${COLLECTION_DATA}/points/search`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
vector: query_embeddings,
limit: 50,
with_payload: true,
with_vector: true
})
});
if (!res.ok) {
const errorData = await res.json();
console.error("Search failed:", errorData);
return null;
}
const data = await res.json();
console.log("Search Result:", data);
return data.result;
}