Skip to content

file

Base classes for retrieval.

Classes:

FilePathRet

FilePathRet(
    source: Any,
    splitter: TextSplitterBase = LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    ),
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: RetrievalBase

File Retrieval class.

Methods:

  • get

    Get the data from the source.

Source code in src/rago/retrieval/base.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    source: Any,
    splitter: TextSplitterBase = LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    ),
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize the Retrieval class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)
    self.source = source
    self.splitter = splitter

    self._validate()
    self._setup()

get abstractmethod

get(query: str = '') -> Iterable[str]

Get the data from the source.

Source code in src/rago/retrieval/base.py
56
57
58
59
@abstractmethod
def get(self, query: str = '') -> Iterable[str]:
    """Get the data from the source."""
    return []

PDFPathRet

PDFPathRet(
    source: Any,
    splitter: TextSplitterBase = LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    ),
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: FilePathRet

PDFPathRet Retrieval class.

Methods:

  • get

    Get the data from the source.

Source code in src/rago/retrieval/base.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    source: Any,
    splitter: TextSplitterBase = LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    ),
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize the Retrieval class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)
    self.source = source
    self.splitter = splitter

    self._validate()
    self._setup()

get

get(query: str = '') -> Iterable[str]

Get the data from the source.

Source code in src/rago/retrieval/file.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def get(self, query: str = '') -> Iterable[str]:
    """Get the data from the source."""
    cache_key = self.source
    cached = self._get_cache(cache_key)
    if cached is not None:
        return cast(Iterable[str], cached)

    text = extract_text_from_pdf(self.source)

    self.logs['text'] = text

    result = self.splitter.split(text)

    self._save_cache(cache_key, result)

    return result