Skip to content

retrieval

Composable retrieval APIs for Rago.

Modules:

  • base

    Base classes for retrieval steps.

  • dummy

    In-memory retrieval implementations.

  • file

    File-based retrieval implementations.

  • text_splitter

    Package for classes about text splitter.

  • tools

    Tools for support retrieval classes.

Classes:

Retrieval

Retrieval(
    source: Any = None,
    backend: str = 'string',
    api_key: str = '',
    api_params: dict[str, Any] | None = None,
    splitter: Any = None,
    cache: Any = None,
    logs: dict[str, Any] | None = None,
)

Bases: StepBase

Public retrieval wrapper that resolves a concrete backend lazily.

Methods:

  • apply

    Apply declarative configuration to the retrieval wrapper.

  • get

    Backward-compatible alias for retrieve.

  • process

    Process the current pipeline source with retrieval.

  • retrieve

    Resolve the concrete retriever and fetch content.

Source code in src/rago/retrieval/__init__.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def __init__(
    self,
    source: Any = None,
    backend: str = 'string',
    api_key: str = '',
    api_params: dict[str, Any] | None = None,
    splitter: Any = None,
    cache: Any = None,
    logs: dict[str, Any] | None = None,
) -> None:
    super().__init__()
    self.backend = backend.lower()
    self.params = RetrievalParameters(
        source=source,
        api_key=api_key,
        api_params=api_params or {},
    )
    self.splitter = splitter or LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    )
    self.cache = cache
    self.logs = logs if logs is not None else {}

apply

apply(parameters: Any) -> None

Apply declarative configuration to the retrieval wrapper.

Source code in src/rago/retrieval/__init__.py
63
64
65
66
67
68
69
70
71
72
def apply(self, parameters: Any) -> None:
    """Apply declarative configuration to the retrieval wrapper."""
    super().apply(parameters)
    for key, value in config_to_dict(parameters).items():
        if key == 'backend' and isinstance(value, str):
            self.backend = value.lower()
        elif key == 'splitter':
            self.splitter = value
        else:
            self.params.params[key] = value

get

get(query: str = '', source: Any = None) -> list[str]

Backward-compatible alias for retrieve.

Source code in src/rago/retrieval/__init__.py
 99
100
101
def get(self, query: str = '', source: Any = None) -> list[str]:
    """Backward-compatible alias for `retrieve`."""
    return self.retrieve(query=query, source=source)

process

process(inp: Input) -> Output

Process the current pipeline source with retrieval.

Source code in src/rago/retrieval/__init__.py
103
104
105
106
107
108
109
110
111
112
113
def process(self, inp: Input) -> Output:
    """Process the current pipeline source with retrieval."""
    source = self.params.params.get('source')
    if source is None:
        source = inp.get('source', inp.get('content'))

    result = self.retrieve(query=inp.query, source=source)
    output = Output.from_input(inp)
    output.content = result
    output.data = result
    return output

retrieve

retrieve(query: str = '', source: Any = None) -> list[str]

Resolve the concrete retriever and fetch content.

Source code in src/rago/retrieval/__init__.py
94
95
96
97
def retrieve(self, query: str = '', source: Any = None) -> list[str]:
    """Resolve the concrete retriever and fetch content."""
    retrieval_instance = self._resolve(source=source)
    return retrieval_instance.retrieve(query=query, source=source)

RetrievalBase

RetrievalBase(
    source: Any = None,
    splitter: TextSplitterBase | None = None,
    api_key: str = '',
    api_params: dict[str, Any] | None = None,
    cache: Cache | None = None,
    logs: dict[str, Any] | None = None,
)

Bases: StepBase

Base retrieval class.

Methods:

  • apply

    Apply attached configuration to the step.

  • get

    Backward-compatible alias for retrieve.

  • process

    Resolve the content for downstream steps.

  • retrieve

    Get the data from the source.

Source code in src/rago/retrieval/base.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def __init__(
    self,
    source: Any = None,
    splitter: TextSplitterBase | None = None,
    api_key: str = '',
    api_params: dict[str, Any] | None = None,
    cache: Cache | None = None,
    logs: dict[str, Any] | None = None,
) -> None:
    super().__init__()
    self.source = source
    self.splitter = splitter or LangChainTextSplitter(
        'RecursiveCharacterTextSplitter'
    )
    self.api_key = api_key
    self.api_params = api_params or {}
    self.cache = cache
    self.logs = logs if logs is not None else {}

    self._validate()
    self._setup()

apply

apply(parameters: Any) -> None

Apply attached configuration to the step.

Source code in src/rago/base.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def apply(self, parameters: Any) -> None:
    """Apply attached configuration to the step."""
    if parameters is None:
        return

    if _is_cache_backend(parameters):
        self.cache = parameters
        return

    if _is_vector_db(parameters):
        setattr(self, 'db', parameters)
        return

    if _is_text_splitter(parameters):
        setattr(self, 'splitter', parameters)
        return

    for key, value in config_to_dict(parameters).items():
        if key == 'cache':
            self.cache = value
        elif key == 'logs':
            self.logs = value if value is not None else {}
        else:
            setattr(self, key, value)

get

get(query: str = '', source: Any = None) -> list[str]

Backward-compatible alias for retrieve.

Source code in src/rago/retrieval/base.py
100
101
102
def get(self, query: str = '', source: Any = None) -> list[str]:
    """Backward-compatible alias for `retrieve`."""
    return self.retrieve(query=query, source=source)

process

process(inp: Input) -> Output

Resolve the content for downstream steps.

Source code in src/rago/retrieval/base.py
104
105
106
107
108
109
110
111
112
113
114
def process(self, inp: Input) -> Output:
    """Resolve the content for downstream steps."""
    source = self.source if self.source is not None else inp.get('source')
    if source is None:
        source = inp.get('content')

    result = ensure_list(self.retrieve(query=inp.query, source=source))
    output = Output.from_input(inp)
    output.content = result
    output.data = result
    return output

retrieve abstractmethod

retrieve(query: str = '', source: Any = None) -> list[str]

Get the data from the source.

Source code in src/rago/retrieval/base.py
96
97
98
@abstractmethod
def retrieve(self, query: str = '', source: Any = None) -> list[str]:
    """Get the data from the source."""

RetrievalParameters

RetrievalParameters(**kwargs: Any)

Bases: ParametersBase

Parameters for configuring retrieval steps.

Methods:

  • apply

    Merge additional configuration into this object.

  • process

    Return the input unchanged for configuration-only objects.

Attributes:

  • params (dict[str, Any]) –

    Expose the underlying parameter mapping.

Source code in src/rago/base.py
62
63
def __init__(self, **kwargs: Any) -> None:
    super().__init__(kwargs)

params property

params: dict[str, Any]

Expose the underlying parameter mapping.

apply

apply(parameters: Any) -> None

Merge additional configuration into this object.

Source code in src/rago/base.py
86
87
88
def apply(self, parameters: Any) -> None:
    """Merge additional configuration into this object."""
    self.data.update(config_to_dict(parameters))

process

process(inp: Input) -> Output

Return the input unchanged for configuration-only objects.

Source code in src/rago/base.py
90
91
92
def process(self, inp: Input) -> Output:
    """Return the input unchanged for configuration-only objects."""
    return inp.to_output()