You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
184 lines
4.8 KiB
184 lines
4.8 KiB
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
import { v4 as uuidv4 } from 'uuid';
|
|
|
|
const COLLECTION_DATA = 'data-v2';
|
|
|
|
|
|
async function clearCollection(collection){
|
|
// clear Qdrant collection
|
|
const res_delete=await fetch(`http://localhost:6333/collections/${collection}`, {
|
|
method: 'DELETE',
|
|
});
|
|
const data_delete = await res_delete.json();
|
|
console.log(data_delete);
|
|
|
|
const res=await fetch(`http://localhost:6333/collections/${collection}`, {
|
|
method: 'PUT',
|
|
body: JSON.stringify({
|
|
vectors: {
|
|
size: 1536,
|
|
distance: "Cosine"
|
|
},
|
|
payload: {
|
|
text: "string"
|
|
}
|
|
}),
|
|
});
|
|
const data = await res.json();
|
|
console.log(data);
|
|
|
|
}
|
|
|
|
// function generateUUID() { // Public Domain/MIT
|
|
// var d = new Date().getTime();//Timestamp
|
|
// var d2 = (performance && performance.now && (performance.now()*1000)) || 0;
|
|
// return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
|
|
// var r = Math.random() * 16;
|
|
// return r | 0;
|
|
// });
|
|
// }
|
|
|
|
export async function processData(filepath, clear=false, collection=COLLECTION_DATA){
|
|
|
|
if(clear) await clearCollection(collection);
|
|
|
|
console.log("Processing file for embeddings:", filepath);
|
|
|
|
// fetch data from url
|
|
const response = readFileSync(filepath);
|
|
const json=await JSON.parse(response);
|
|
|
|
json.output.forEach(async (item, index)=>{
|
|
|
|
const text=jsonToText(item);
|
|
// console.log(text);
|
|
const embeddings=await textToEmbeddings(text);
|
|
|
|
|
|
const uuid=uuidv4();
|
|
|
|
await writeToQdrant(uuid, embeddings, {
|
|
summry: item.summry,
|
|
keywords: item.keywords,
|
|
number: item.number,
|
|
total: item.total,
|
|
teaser: item.teaser,
|
|
metadata: JSON.stringify(item.metadata),
|
|
}, collection);
|
|
|
|
});
|
|
}
|
|
|
|
|
|
async function textToEmbeddings(text){
|
|
// call embedding API
|
|
const response=await fetch('https://api.openai.com/v1/embeddings', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`
|
|
},
|
|
body: JSON.stringify({
|
|
model: "text-embedding-3-small",
|
|
input: text
|
|
})
|
|
});
|
|
try{
|
|
const result = await response.json();
|
|
// console.log(result);
|
|
return result.data[0]?.embedding;
|
|
|
|
}catch(err){
|
|
console.error("Error fetching embeddings:", err);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function jsonToText(item){
|
|
let text = "";
|
|
|
|
text += `Summary: ${item.summry} `;
|
|
text += `Keywords: ${item.keywords.join(", ")} `;
|
|
text += `Order: ${item.number}/${item.total} `;
|
|
text += `User: ${item.user} `;
|
|
text += `Content: ${item.content.replace(/[\r\n]+/g, ' ')} `;
|
|
|
|
return text;
|
|
}
|
|
|
|
|
|
|
|
async function writeToQdrant(id,embeddings, payload, collection){
|
|
// write embeddings to Qdrant
|
|
const res=await fetch(`http://localhost:6333/collections/${collection}/points?wait=true`, {
|
|
method: 'PUT',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
points: [{
|
|
id: id,
|
|
vector: embeddings,
|
|
payload: payload
|
|
}]
|
|
})
|
|
});
|
|
|
|
const data = await res.json();
|
|
console.log(data);
|
|
|
|
return data;
|
|
}
|
|
|
|
export async function searchByText(query){
|
|
const queryEmbeddings=await textToEmbeddings(query);
|
|
return await searchQdrant(queryEmbeddings);
|
|
}
|
|
|
|
export async function searchByTheme(themeId){
|
|
// get theme embeddings from Qdrant
|
|
const res=await fetch(`http://localhost:6333/collections/${COLLECTION_THEME}/points/${themeId}`, {
|
|
method: 'GET',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
}
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errorData = await res.json();
|
|
console.error("Search failed:", errorData);
|
|
return null;
|
|
}
|
|
|
|
const data = await res.json();
|
|
console.log(data);
|
|
|
|
return await searchQdrant(data.result.vector);
|
|
}
|
|
|
|
async function searchQdrant(query_embeddings){
|
|
// search Qdrant
|
|
const res=await fetch(`http://localhost:6333/collections/${COLLECTION_DATA}/points/search`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
vector: query_embeddings,
|
|
limit: 50,
|
|
with_payload: true,
|
|
with_vector: true
|
|
})
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errorData = await res.json();
|
|
console.error("Search failed:", errorData);
|
|
return null;
|
|
}
|
|
|
|
const data = await res.json();
|
|
console.log("Search Result:", data);
|
|
|
|
return data.result;
|
|
}
|
|
|