Source code for agentscope.rag._reader._image_reader

# -*- coding: utf-8 -*-
"""The Image reader modules"""
import hashlib

from .. import DocMetadata
from ...message import ImageBlock, URLSource
from .._reader import ReaderBase, Document


[docs] class ImageReader(ReaderBase): """A simple image reader that wraps the image into a Document object. This class is only a simple implementation to support multimodal RAG. """
[docs] async def __call__(self, image_url: str | list[str]) -> list[Document]: """Read an image and return the wrapped Document object. Args: image_url (`str | list[str]`): The image URL(s) or path(s). Returns: `list[Document]`: A list of Document objects containing the image data. """ # Read the image data and wrap it into a Document object. if isinstance(image_url, str): image_url = [image_url] image_blocks: list[ImageBlock] = [ ImageBlock( type="image", source=URLSource( type="url", url=_, ), ) for _ in image_url ] doc_idx = [self.get_doc_id(_) for _ in image_url] return [ Document( metadata=DocMetadata( content=image_block, doc_id=doc_id, chunk_id=0, total_chunks=1, ), ) for doc_id, image_block in zip(doc_idx, image_blocks) ]
[docs] def get_doc_id(self, image_path: str) -> str: """Generate a document ID based on the image path. Args: image_path (`str`): The image path or URL. Returns: `str`: The generated document ID. """ return hashlib.md5(image_path.encode("utf-8")).hexdigest()