Let's query this Vector Store stored in the Managed Tensor Database using the REST API. The steps are:
Define the authentication tokens and search terms
Embed the search search term using OpenAI
Reformat the embedding to an embedding_search string that can be passed to the REST API request.
Create the query string using Deep Lake TQL. The dataset_path and embedding_search are a part of the query string.
Submit the request and print the response data data
import requestsimport openaiimport os# Tokens should be set in environmental variables.ACTIVELOOP_TOKEN = os.environ['ACTIVELOOP_TOKEN']DATASET_PATH ='hub://activeloop/twitter-algorithm'ENDPOINT_URL ='https://app.activeloop.ai/api/query/v1'SEARCH_TERM ='What do the trust and safety models do?'# os.environ['OPENAI_API_KEY'] OPEN AI TOKEN should also exist in env variables# The headers contains the user tokenheaders ={"Authorization":f"Bearer {ACTIVELOOP_TOKEN}",}# Embed the search termembedding = openai.Embedding.create(input=SEARCH_TERM, model="text-embedding-ada-002")["data"][0]["embedding"]# Format the embedding array or list as a string, so it can be passed in the REST API request.embedding_string =",".join([str(item) for item in embedding])# Create the query using TQLquery =f"select * from (select text, cosine_similarity(embedding, ARRAY[{embedding_string}]) as score from \"{dataset_path}\") order by score desc limit 5"# Submit the request response = requests.post(ENDPOINT_URL, json={"query": query}, headers=headers)data = response.json()print(data)
constaxios=require('axios');OPENAI_API_KEY=process.env.OPENAI_API_KEY;ACTIVELOOP_TOKEN=process.env.ACTIVELOOP_TOKEN;constQUERY='What do the trust and safety models do?';constDATASET_PATH='hub://activeloop/twitter-algorithm';constENDPOINT_URL='https://app.activeloop.ai/api/query/v1';// Function to get the embeddings of a text from Open AI APIasyncfunctiongetEmbedding(text) {constresponse=awaitaxios.post('https://api.openai.com/v1/embeddings', { input: text, model:"text-embedding-ada-002" }, { headers: {'Content-Type':'application/json','Authorization':`Bearer ${OPENAI_API_KEY}` } });returnresponse.data;}// Function to search the dataset using the given query on ActiveloopasyncfunctionsearchDataset(query) {constresponse=awaitaxios.post(${ENDPOINT_URL}, { query: query, }, { headers: {'Content-Type':'application/json','Authorization':`Bearer ${ACTIVELOOP_TOKEN}` } });returnresponse.data;}// Main function to search for similar texts in the dataset based on the query_termasyncfunctionsearchSimilarTexts(query, dataset_path) {// Get the embedding of the query_termconstembedding=awaitgetEmbedding(query);constembedding_search=embedding.data[0].embedding.join(',');// Construct the search queryconstTQL=`SELECT * FROM ( SELECT text, l2_norm(embedding - ARRAY[${embedding_search}]) AS score from "${dataset_path}" ) ORDER BY score DESC LIMIT 5`;// Search the dataset using the constructed queryconstresponse=awaitsearchDataset(TQL);// Log the search resultsconsole.log(response);}searchSimilarTexts(QUERY,DATASET_PATH)
Congrats! You performed a vector search using the Deep Lake Managed Database! 🎉