Source code for agentscope.rag._knowledge_base

# -*- coding: utf-8 -*-
"""The knowledge base abstraction for retrieval-augmented generation (RAG)."""
from abc import abstractmethod
from typing import Any

from ._reader import Document
from ..embedding import EmbeddingModelBase
from ._store import VDBStoreBase
from ..message import TextBlock
from ..tool import ToolResponse


[docs] class KnowledgeBase: """The knowledge base abstraction for retrieval-augmented generation (RAG). The ``retrieve`` and ``add_documents`` methods need to be implemented in the subclasses. We also provide a quick method ``retrieve_knowledge`` that enables the agent to retrieve knowledge easily. """ embedding_store: VDBStoreBase """The embedding store for the knowledge base.""" embedding_model: EmbeddingModelBase """The embedding model for the knowledge base."""
[docs] def __init__( self, embedding_store: VDBStoreBase, embedding_model: EmbeddingModelBase, ) -> None: """Initialize the knowledge base.""" self.embedding_store = embedding_store self.embedding_model = embedding_model
[docs] @abstractmethod async def retrieve( self, query: str, limit: int = 5, score_threshold: float | None = None, **kwargs: Any, ) -> list[Document]: """Retrieve relevant documents by the given query. Args: query (`str`): The query string to retrieve relevant documents. limit (`int`, defaults to 5): The number of relevant documents to retrieve. score_threshold (`float | None`, defaults to `None`): The score threshold to filter the retrieved documents. If provided, only documents with a score higher than the threshold will be returned. **kwargs (`Any`): Other keyword arguments for the vector database search API. """
[docs] @abstractmethod async def add_documents( self, documents: list[Document], **kwargs: Any, ) -> None: """Add documents to the knowledge base, which will embed the documents and store them in the embedding store. Args: documents (`list[Document]`): A list of documents to add. """
# A quick method that enable the agent to retrieve knowledge # Developers can wrap the `retrieve` method by themselves to support # more flexible usage
[docs] async def retrieve_knowledge( self, query: str, limit: int = 5, score_threshold: float | None = None, **kwargs: Any, ) -> ToolResponse: """Retrieve relevant documents from the knowledge base. Note the `query` parameter is directly related to the retrieval quality, and for the same question, you can try many different queries to get the best results. Adjust the `limit` and `score_threshold` parameters to get more or fewer results. Args: query (`str`): The query string, which should be specific and concise. For example, you should provide the specific name instead of "you", "my", "he", "she", etc. limit (`int`, defaults to 3): The number of relevant documents to retrieve. score_threshold (`float`, defaults to 0.8): A threshold in [0, 1] and only the relevance score above this threshold will be returned. Reduce this value to get more results. """ docs = await self.retrieve( query=query, limit=limit, score_threshold=score_threshold, **kwargs, ) if len(docs): return ToolResponse( content=[ TextBlock( type="text", text=f"Score: {_.score}, " f"Content: {_.metadata.content['text']}", ) for _ in docs ], ) return ToolResponse( content=[ TextBlock( type="text", text="No relevant documents found. TRY to reduce the " "`score_threshold` parameter to get " "more results.", ), ], )