Let's query this Vector Store stored in the Managed Tensor Database using the REST API. The steps are:
Define the authentication tokens and search terms
Embed the search search term using OpenAI
Reformat the embedding to an embedding_search string that can be passed to the REST API request.
Create the query string using Deep Lake TQL. The dataset_path and embedding_search are a part of the query string.
Submit the request and print the response data data
import requests
import openai
import os
# Tokens should be set in environmental variables.
ACTIVELOOP_TOKEN = os.environ['ACTIVELOOP_TOKEN']
DATASET_PATH = 'hub://activeloop/twitter-algorithm'
ENDPOINT_URL = 'https://app.activeloop.ai/api/query/v1'
SEARCH_TERM = 'What do the trust and safety models do?'
# os.environ['OPENAI_API_KEY'] OPEN AI TOKEN should also exist in env variables
# The headers contains the user token
headers = {
"Authorization": f"Bearer {ACTIVELOOP_TOKEN}",
}
# Embed the search term
embedding = openai.Embedding.create(input=SEARCH_TERM, model="text-embedding-ada-002")["data"][0]["embedding"]
# Format the embedding array or list as a string, so it can be passed in the REST API request.
embedding_string = ",".join([str(item) for item in embedding])
# Create the query using TQL
query = f"select * from (select text, cosine_similarity(embedding, ARRAY[{embedding_string}]) as score from \"{dataset_path}\") order by score desc limit 5"
# Submit the request
response = requests.post(ENDPOINT_URL, json={"query": query}, headers=headers)
data = response.json()
print(data)
const axios = require('axios');
OPENAI_API_KEY = process.env.OPENAI_API_KEY;
ACTIVELOOP_TOKEN = process.env.ACTIVELOOP_TOKEN;
const QUERY = 'What do the trust and safety models do?';
const DATASET_PATH = 'hub://activeloop/twitter-algorithm';
const ENDPOINT_URL = 'https://app.activeloop.ai/api/query/v1';
// Function to get the embeddings of a text from Open AI API
async function getEmbedding(text) {
const response = await axios.post('https://api.openai.com/v1/embeddings', {
input: text,
model: "text-embedding-ada-002"
}, {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${OPENAI_API_KEY}`
}
});
return response.data;
}
// Function to search the dataset using the given query on Activeloop
async function searchDataset(query) {
const response = await axios.post(${ENDPOINT_URL}, {
query: query,
}, {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${ACTIVELOOP_TOKEN}`
}
});
return response.data;
}
// Main function to search for similar texts in the dataset based on the query_term
async function searchSimilarTexts(query, dataset_path) {
// Get the embedding of the query_term
const embedding = await getEmbedding(query);
const embedding_search = embedding.data[0].embedding.join(',');
// Construct the search query
const TQL = `SELECT * FROM (
SELECT text, l2_norm(embedding - ARRAY[${embedding_search}]) AS score
from "${dataset_path}"
) ORDER BY score DESC LIMIT 5`;
// Search the dataset using the constructed query
const response = await searchDataset(TQL);
// Log the search results
console.log(response);
}
searchSimilarTexts(QUERY, DATASET_PATH)
Congrats! You performed a vector search using the Deep Lake Managed Database! 🎉