together

Classes for augmentation with Together embeddings.

Classes:

TogetherAug –

Class for augmentation with Together embeddings.

TogetherAug

TogetherAug(
    model_name: Optional[str] = None,
    db: DBBase | None = None,
    top_k: Optional[int] = None,
    api_key: str = '',
    api_params: dict[str, Any] | None = None,
    cache: Cache | None = None,
    logs: dict[str, Any] | None = None,
)

Bases: AugmentedBase

Class for augmentation with Together embeddings.

Methods:

apply –

Apply attached configuration to the step.
get_embedding –

Retrieve the embedding for given texts using Together API.
process –

Run augmentation against the current pipeline content.
search –

Search an encoded query into vector database.

Source code in src/rago/augmented/base.py

def __init__(
    self,
    model_name: Optional[str] = None,
    db: DBBase | None = None,
    top_k: Optional[int] = None,
    api_key: str = '',
    api_params: dict[str, Any] | None = None,
    cache: Cache | None = None,
    logs: dict[str, Any] | None = None,
) -> None:
    super().__init__()
    self.api_key = api_key
    self.api_params = api_params or {}
    self.cache = cache
    self.logs = logs if logs is not None else {}
    self.db = db or FaissDB()
    self.top_k = top_k if top_k is not None else self.default_top_k
    self.model_name = (
        model_name if model_name is not None else self.default_model_name
    )
    self.model = None

    self._validate()
    self._load_optional_modules()
    self._setup()

apply

apply(parameters: Any) -> None

Apply attached configuration to the step.

Source code in src/rago/base.py

def apply(self, parameters: Any) -> None:
    """Apply attached configuration to the step."""
    if parameters is None:
        return

    if _is_cache_backend(parameters):
        self.cache = parameters
        return

    if _is_vector_db(parameters):
        setattr(self, 'db', parameters)
        return

    if _is_text_splitter(parameters):
        setattr(self, 'splitter', parameters)
        return

    for key, value in config_to_dict(parameters).items():
        if key == 'cache':
            self.cache = value
        elif key == 'logs':
            self.logs = value if value is not None else {}
        else:
            setattr(self, key, value)

get_embedding

get_embedding(content: list[str]) -> EmbeddingType

Retrieve the embedding for given texts using Together API.

Source code in src/rago/augmented/together.py

def get_embedding(self, content: list[str]) -> EmbeddingType:
    """Retrieve the embedding for given texts using Together API."""
    client = cast('Together', self.model)
    all_embeddings = []
    for text in content:
        response = client.embeddings.create(
            model=self.model_name, input=text
        )
        embedding = response.data[0].embedding
        all_embeddings.append(embedding)
    result = np.array(all_embeddings, dtype=np.float32)
    return result

process

process(inp: Input) -> Output

Run augmentation against the current pipeline content.

Source code in src/rago/augmented/base.py

def process(self, inp: Input) -> Output:
    """Run augmentation against the current pipeline content."""
    query = str(inp.query)
    content = inp.get('content', inp.get('data', inp.get('source')))
    result = self.search(query, ensure_list(content), top_k=self.top_k)
    output = Output.from_input(inp)
    output.content = result
    output.data = result
    return output

search

search(
    query: str, documents: list[str], top_k: int = 0
) -> list[str]

Search an encoded query into vector database.

Source code in src/rago/augmented/together.py

def search(
    self, query: str, documents: list[str], top_k: int = 0
) -> list[str]:
    """Search an encoded query into vector database."""
    if not hasattr(self, 'db') or not self.db:
        raise Exception('Vector database (db) is not initialized.')
    document_encoded = self.get_embedding(documents)
    query_encoded = self.get_embedding([query])
    top_k = top_k or self.top_k or self.default_top_k or 1

    self.db.embed(document_encoded)
    _, indices = self.db.search(query_encoded, top_k=top_k)

    # self.logs['indices'] = indices
    # self.logs['scores'] = scores
    # self.logs['search_params'] = {
    #     'query_encoded': query_encoded,
    #     'top_k': top_k,
    # }

    return self._resolve_retrieved_docs(documents, indices)