WeaviateStore#

WeaviateStore is one implementation of the Milvus vector database in VectorConnector.

Tutorial on how to create a Weaviate instance

inheriting the VectorStoreBase and implement similar_search(), vector_name_exists(), load_document().

class WeaviateStore(VectorStoreBase):
    """Weaviate database"""

    def __init__(self, ctx: dict) -> None:
        """Initialize with Weaviate client."""
        try:
            import weaviate
        except ImportError:
            raise ValueError(
                "Could not import weaviate python package. "
                "Please install it with `pip install weaviate-client`."
            )

        self.ctx = ctx
        self.weaviate_url = CFG.WEAVIATE_URL
        self.embedding = ctx.get("embeddings", None)
        self.vector_name = ctx["vector_store_name"]
        self.persist_dir = os.path.join(
            KNOWLEDGE_UPLOAD_ROOT_PATH, self.vector_name + ".vectordb"
        )

        self.vector_store_client = weaviate.Client(self.weaviate_url)

similar_search()

   def similar_search(self, text: str, topk: int) -> None:
        """Perform similar search in Weaviate"""
        logger.info("Weaviate similar search")
        # nearText = {
        #     "concepts": [text],
        #     "distance": 0.75,  # prior to v1.14 use "certainty" instead of "distance"
        # }
        # vector = self.embedding.embed_query(text)
        response = (
            self.vector_store_client.query.get(self.vector_name, ["metadata", "page_content"])
            # .with_near_vector({"vector": vector})
            .with_limit(topk)
            .do()
        )
        docs = response['data']['Get'][list(response['data']['Get'].keys())[0]]
        return docs

vector_name_exists()

  def vector_name_exists(self) -> bool:
        """Check if a vector name exists for a given class in Weaviate.
        Returns:
            bool: True if the vector name exists, False otherwise.
        """
        if self.vector_store_client.schema.get(self.vector_name):
            return True
        return False

load_document()

    def load_document(self, documents: list) -> None:
        """Load documents into Weaviate"""
        logger.info("Weaviate load document")
        texts = [doc.page_content for doc in documents]
        metadatas = [doc.metadata for doc in documents]

        # Import data
        with self.vector_store_client.batch as batch:
            batch.batch_size = 100

            # Batch import all documents
            for i in range(len(texts)):
                properties = {"metadata": metadatas[i]['source'], "page_content": texts[i]}

                self.vector_store_client.batch.add_data_object(data_object=properties, class_name=self.vector_name)
            self.vector_store_client.batch.flush()