file

Base classes for retrieval.

Classes:

FilePathRet –

File Retrieval class.
PDFPathRet –

PDFPathRet Retrieval class.

FilePathRet

FilePathRet(
    source: Any,
    splitter: TextSplitterBase = LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    ),
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: RetrievalBase

File Retrieval class.

Methods:

get –

Get the data from the source.

Source code in src/rago/retrieval/base.py

def __init__(
    self,
    source: Any,
    splitter: TextSplitterBase = LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    ),
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize the Retrieval class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)
    self.source = source
    self.splitter = splitter

    self._validate()
    self._setup()

get `abstractmethod`

get(query: str = '') -> Iterable[str]

Get the data from the source.

Source code in src/rago/retrieval/base.py

@abstractmethod
def get(self, query: str = '') -> Iterable[str]:
    """Get the data from the source."""
    return []

PDFPathRet

PDFPathRet(
    source: Any,
    splitter: TextSplitterBase = LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    ),
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: FilePathRet

PDFPathRet Retrieval class.

Methods:

get –

Get the data from the source.

Source code in src/rago/retrieval/base.py

def __init__(
    self,
    source: Any,
    splitter: TextSplitterBase = LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    ),
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize the Retrieval class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)
    self.source = source
    self.splitter = splitter

    self._validate()
    self._setup()

get

get(query: str = '') -> Iterable[str]

Get the data from the source.

Source code in src/rago/retrieval/file.py

def get(self, query: str = '') -> Iterable[str]:
    """Get the data from the source."""
    cache_key = self.source
    cached = self._get_cache(cache_key)
    if cached is not None:
        return cast(Iterable[str], cached)

    text = extract_text_from_pdf(self.source)

    self.logs['text'] = text

    result = self.splitter.split(text)

    self._save_cache(cache_key, result)

    return result

file

FilePathRet

get abstractmethod

PDFPathRet

get

get `abstractmethod`