All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hw.langchain.examples.vectorstores.PineconeExample Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hw.langchain.examples.vectorstores;

import com.hw.langchain.embeddings.openai.OpenAIEmbeddings;
import com.hw.langchain.schema.Document;
import com.hw.langchain.vectorstores.pinecone.Pinecone;
import com.hw.pinecone.PineconeClient;
import com.hw.pinecone.entity.index.CreateIndexRequest;
import com.hw.pinecone.entity.index.IndexDescription;
import com.hw.pinecone.entity.vector.DescribeIndexStatsRequest;

import org.awaitility.Awaitility;

import java.time.Duration;
import java.util.List;

/**
 * export PINECONE_API_KEY=xxx
 * export PINECONE_ENV=xxx
 *
 * @author HamaWhite
 */
public class PineconeExample {

    private PineconeExample() {
    }

    public static final String INDEX_NAME = "langchain-demo";

    /**
     * Initializes the Pinecone client, creates the index if necessary, and performs the desired operations.
     *
     * @param namespace the namespace
     * @param docs      the list of docs
     * @return the initialized Pinecone instance
     */
    public static Pinecone initializePineconeIndex(String namespace, List docs) {
        var client = PineconeClient.builder().requestTimeout(30).build().init();
        createPineconeIndex(client);

        var embeddings = OpenAIEmbeddings.builder().requestTimeout(60).build().init();
        var pinecone = Pinecone.builder()
                .client(client)
                .indexName(INDEX_NAME)
                .namespace(namespace)
                .embeddingFunction(embeddings::embedQuery)
                .build().init();

        var request = new DescribeIndexStatsRequest();
        var response = pinecone.getIndex().describeIndexStats(request);
        if (!response.getNamespaces().containsKey(namespace)) {
            pinecone.fromDocuments(docs, embeddings);
        }
        return pinecone;
    }

    /**
     * If the index does not exist, it creates a new index with the specified name and dimension.
     * It also waits until the index is ready before returning.
     *
     * @param client the PineconeClient instance
     */
    public static void createPineconeIndex(PineconeClient client) {
        if (!client.listIndexes().contains(INDEX_NAME)) {
            // the text-embedding-ada-002 model has an output dimension of 1536.
            var request = CreateIndexRequest.builder()
                    .name(INDEX_NAME)
                    .dimension(1536)
                    .build();
            client.createIndex(request);
            awaitIndexReady(client);
        }
    }

    private static void awaitIndexReady(PineconeClient client) {
        Awaitility.await()
                .atMost(Duration.ofSeconds(120))
                .pollInterval(Duration.ofSeconds(5))
                .until(() -> {
                    IndexDescription indexDescription = client.describeIndex(INDEX_NAME);
                    return indexDescription != null && indexDescription.getStatus().isReady();
                });
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy