# -*- coding: utf-8 -*-"""The knowledge base abstraction for retrieval-augmented generation (RAG)."""fromabcimportabstractmethodfromtypingimportAnyfrom._readerimportDocumentfrom..embeddingimportEmbeddingModelBasefrom._storeimportVDBStoreBasefrom..messageimportTextBlockfrom..toolimportToolResponse
[docs]classKnowledgeBase:"""The knowledge base abstraction for retrieval-augmented generation (RAG). The ``retrieve`` and ``add_documents`` methods need to be implemented in the subclasses. We also provide a quick method ``retrieve_knowledge`` that enables the agent to retrieve knowledge easily. """embedding_store:VDBStoreBase"""The embedding store for the knowledge base."""embedding_model:EmbeddingModelBase"""The embedding model for the knowledge base."""
[docs]def__init__(self,embedding_store:VDBStoreBase,embedding_model:EmbeddingModelBase,)->None:"""Initialize the knowledge base."""self.embedding_store=embedding_storeself.embedding_model=embedding_model
[docs]@abstractmethodasyncdefretrieve(self,query:str,limit:int=5,score_threshold:float|None=None,**kwargs:Any,)->list[Document]:"""Retrieve relevant documents by the given query. Args: query (`str`): The query string to retrieve relevant documents. limit (`int`, defaults to 5): The number of relevant documents to retrieve. score_threshold (`float | None`, defaults to `None`): The score threshold to filter the retrieved documents. If provided, only documents with a score higher than the threshold will be returned. **kwargs (`Any`): Other keyword arguments for the vector database search API. """
[docs]@abstractmethodasyncdefadd_documents(self,documents:list[Document],**kwargs:Any,)->None:"""Add documents to the knowledge base, which will embed the documents and store them in the embedding store. Args: documents (`list[Document]`): A list of documents to add. """
# A quick method that enable the agent to retrieve knowledge# Developers can wrap the `retrieve` method by themselves to support# more flexible usage
[docs]asyncdefretrieve_knowledge(self,query:str,limit:int=5,score_threshold:float|None=None,**kwargs:Any,)->ToolResponse:"""Retrieve relevant documents from the knowledge base. Note the `query` parameter is directly related to the retrieval quality, and for the same question, you can try many different queries to get the best results. Adjust the `limit` and `score_threshold` parameters to get more or fewer results. Args: query (`str`): The query string, which should be specific and concise. For example, you should provide the specific name instead of "you", "my", "he", "she", etc. limit (`int`, defaults to 3): The number of relevant documents to retrieve. score_threshold (`float`, defaults to 0.8): A threshold in [0, 1] and only the relevance score above this threshold will be returned. Reduce this value to get more results. """docs=awaitself.retrieve(query=query,limit=limit,score_threshold=score_threshold,**kwargs,)iflen(docs):returnToolResponse(content=[TextBlock(type="text",text=f"Score: {_.score}, "f"Content: {_.metadata.content['text']}",)for_indocs],)returnToolResponse(content=[TextBlock(type="text",text="No relevant documents found. TRY to reduce the ""`score_threshold` parameter to get ""more results.",),],)