Skip to content

generation

RAG Generation package.

Modules:

  • base

    Base classes for generation.

  • gemini

    GeminiGen class for text generation using Google's Gemini model.

  • hugging_face

    Hugging Face classes for text generation.

  • llama

    Llama generation module.

  • openai

    OpenAI Generation Model class for flexible GPT-based text generation.

Classes:

GeminiGen

GeminiGen(
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: GenerationBase

Gemini generation model for text generation.

Methods:

  • generate

    Generate text using Gemini model support.

Source code in src/rago/generation/base.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def __init__(
    self,
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize Generation class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)

    self.model_name: str = (
        model_name if model_name is not None else self.default_model_name
    )
    self.output_max_length: int = (
        output_max_length or self.default_output_max_length
    )
    self.temperature: float = (
        temperature
        if temperature is not None
        else self.default_temperature
    )

    self.prompt_template: str = (
        prompt_template or self.default_prompt_template
    )
    self.structured_output: Optional[Type[BaseModel]] = structured_output
    if api_params is DEFAULT_API_PARAMS:
        api_params = deepcopy(self.default_api_params or {})

    self.system_message = system_message
    self.api_params = api_params

    if device not in ['cpu', 'cuda', 'auto']:
        raise Exception(
            f'Device {device} not supported. Options: cpu, cuda, auto.'
        )

    cuda_available = torch.cuda.is_available()
    self.device_name: str = (
        'cpu' if device == 'cpu' or not cuda_available else 'cuda'
    )
    self.device = torch.device(self.device_name)

    self._validate()
    self._setup()

generate

generate(query: str, context: list[str]) -> str | BaseModel

Generate text using Gemini model support.

Source code in src/rago/generation/gemini.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def generate(self, query: str, context: list[str]) -> str | BaseModel:
    """Generate text using Gemini model support."""
    input_text = self.prompt_template.format(
        query=query, context=' '.join(context)
    )

    if not self.structured_output:
        models_params_gen = {'contents': input_text}
        response = self.model.generate_content(**models_params_gen)
        self.logs['model_params'] = models_params_gen
        return cast(str, response.text.strip())

    api_params = (
        self.api_params if self.api_params else self.default_api_params
    )

    messages = []
    if self.system_message:
        messages.append({'role': 'system', 'content': self.system_message})
    messages.append({'role': 'user', 'content': input_text})

    model_params = {
        'messages': messages,
        'response_model': self.structured_output,
        **api_params,
    }

    response = self.model.create(
        **model_params,
    )

    self.logs['model_params'] = model_params

    return cast(BaseModel, response)

GenerationBase

GenerationBase(
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: RagoBase

Generic Generation class.

Methods:

  • generate

    Generate text with optional language parameter.

Source code in src/rago/generation/base.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def __init__(
    self,
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize Generation class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)

    self.model_name: str = (
        model_name if model_name is not None else self.default_model_name
    )
    self.output_max_length: int = (
        output_max_length or self.default_output_max_length
    )
    self.temperature: float = (
        temperature
        if temperature is not None
        else self.default_temperature
    )

    self.prompt_template: str = (
        prompt_template or self.default_prompt_template
    )
    self.structured_output: Optional[Type[BaseModel]] = structured_output
    if api_params is DEFAULT_API_PARAMS:
        api_params = deepcopy(self.default_api_params or {})

    self.system_message = system_message
    self.api_params = api_params

    if device not in ['cpu', 'cuda', 'auto']:
        raise Exception(
            f'Device {device} not supported. Options: cpu, cuda, auto.'
        )

    cuda_available = torch.cuda.is_available()
    self.device_name: str = (
        'cpu' if device == 'cpu' or not cuda_available else 'cuda'
    )
    self.device = torch.device(self.device_name)

    self._validate()
    self._setup()

generate abstractmethod

generate(query: str, context: list[str]) -> str | BaseModel

Generate text with optional language parameter.

Parameters:

  • query (str) –

    The input query or prompt.

  • context (list[str]) –

    Additional context information for the generation.

Returns:

  • str

    Generated text based on query and context.

Source code in src/rago/generation/base.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
@abstractmethod
def generate(
    self,
    query: str,
    context: list[str],
) -> str | BaseModel:
    """Generate text with optional language parameter.

    Parameters
    ----------
    query : str
        The input query or prompt.
    context : list[str]
        Additional context information for the generation.

    Returns
    -------
    str
        Generated text based on query and context.
    """
    ...

HuggingFaceGen

HuggingFaceGen(
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: GenerationBase

HuggingFaceGen.

Methods:

  • generate

    Generate the text from the query and augmented context.

Source code in src/rago/generation/base.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def __init__(
    self,
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize Generation class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)

    self.model_name: str = (
        model_name if model_name is not None else self.default_model_name
    )
    self.output_max_length: int = (
        output_max_length or self.default_output_max_length
    )
    self.temperature: float = (
        temperature
        if temperature is not None
        else self.default_temperature
    )

    self.prompt_template: str = (
        prompt_template or self.default_prompt_template
    )
    self.structured_output: Optional[Type[BaseModel]] = structured_output
    if api_params is DEFAULT_API_PARAMS:
        api_params = deepcopy(self.default_api_params or {})

    self.system_message = system_message
    self.api_params = api_params

    if device not in ['cpu', 'cuda', 'auto']:
        raise Exception(
            f'Device {device} not supported. Options: cpu, cuda, auto.'
        )

    cuda_available = torch.cuda.is_available()
    self.device_name: str = (
        'cpu' if device == 'cpu' or not cuda_available else 'cuda'
    )
    self.device = torch.device(self.device_name)

    self._validate()
    self._setup()

generate

generate(query: str, context: list[str]) -> str

Generate the text from the query and augmented context.

Source code in src/rago/generation/hugging_face.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def generate(self, query: str, context: list[str]) -> str:
    """Generate the text from the query and augmented context."""
    with torch.no_grad():
        input_text = self.prompt_template.format(
            query=query, context=' '.join(context)
        )
        input_ids = self.tokenizer.encode(
            input_text,
            return_tensors='pt',
            truncation=True,
            max_length=512,
        ).to(self.device_name)

        api_params = (
            self.api_params if self.api_params else self.default_api_params
        )

        model_params = dict(
            inputs=input_ids,
            max_length=self.output_max_length,
            pad_token_id=self.tokenizer.eos_token_id,
            **api_params,
        )

        outputs = self.model.generate(**model_params)

        self.logs['model_params'] = model_params

        response = self.tokenizer.decode(
            outputs[0], skip_special_tokens=True
        )

    if self.device_name == 'cuda':
        torch.cuda.empty_cache()

    return str(response)

LlamaGen

LlamaGen(
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: GenerationBase

Llama Generation class.

Methods:

  • generate

    Generate text using Llama model with language support.

Source code in src/rago/generation/base.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def __init__(
    self,
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize Generation class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)

    self.model_name: str = (
        model_name if model_name is not None else self.default_model_name
    )
    self.output_max_length: int = (
        output_max_length or self.default_output_max_length
    )
    self.temperature: float = (
        temperature
        if temperature is not None
        else self.default_temperature
    )

    self.prompt_template: str = (
        prompt_template or self.default_prompt_template
    )
    self.structured_output: Optional[Type[BaseModel]] = structured_output
    if api_params is DEFAULT_API_PARAMS:
        api_params = deepcopy(self.default_api_params or {})

    self.system_message = system_message
    self.api_params = api_params

    if device not in ['cpu', 'cuda', 'auto']:
        raise Exception(
            f'Device {device} not supported. Options: cpu, cuda, auto.'
        )

    cuda_available = torch.cuda.is_available()
    self.device_name: str = (
        'cpu' if device == 'cpu' or not cuda_available else 'cuda'
    )
    self.device = torch.device(self.device_name)

    self._validate()
    self._setup()

generate

generate(query: str, context: list[str]) -> str

Generate text using Llama model with language support.

Source code in src/rago/generation/llama.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def generate(self, query: str, context: list[str]) -> str:
    """Generate text using Llama model with language support."""
    input_text = self.prompt_template.format(
        query=query, context=' '.join(context)
    )

    # Detect and set the language code for multilingual models (optional)
    language = str(detect(query)) or 'en'
    self.tokenizer.lang_code = language

    api_params = (
        self.api_params if self.api_params else self.default_api_params
    )

    # Generate the response with adjusted parameters

    model_params = dict(
        text_inputs=input_text,
        max_new_tokens=self.output_max_length,
        do_sample=True,
        temperature=self.temperature,
        eos_token_id=self.tokenizer.eos_token_id,
        **api_params,
    )
    response = self.generator(**model_params)

    self.logs['model_params'] = model_params

    # Extract and return the answer only
    answer = str(response[0].get('generated_text', ''))
    # Strip off any redundant text after the answer itself
    return answer.split('Answer:')[-1].strip()

OpenAIGen

OpenAIGen(
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: GenerationBase

OpenAI generation model for text generation.

Methods:

  • generate

    Generate text using OpenAI's API with dynamic model support.

Source code in src/rago/generation/base.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def __init__(
    self,
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize Generation class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)

    self.model_name: str = (
        model_name if model_name is not None else self.default_model_name
    )
    self.output_max_length: int = (
        output_max_length or self.default_output_max_length
    )
    self.temperature: float = (
        temperature
        if temperature is not None
        else self.default_temperature
    )

    self.prompt_template: str = (
        prompt_template or self.default_prompt_template
    )
    self.structured_output: Optional[Type[BaseModel]] = structured_output
    if api_params is DEFAULT_API_PARAMS:
        api_params = deepcopy(self.default_api_params or {})

    self.system_message = system_message
    self.api_params = api_params

    if device not in ['cpu', 'cuda', 'auto']:
        raise Exception(
            f'Device {device} not supported. Options: cpu, cuda, auto.'
        )

    cuda_available = torch.cuda.is_available()
    self.device_name: str = (
        'cpu' if device == 'cpu' or not cuda_available else 'cuda'
    )
    self.device = torch.device(self.device_name)

    self._validate()
    self._setup()

generate

generate(query: str, context: list[str]) -> str | BaseModel

Generate text using OpenAI's API with dynamic model support.

Source code in src/rago/generation/openai.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def generate(
    self,
    query: str,
    context: list[str],
) -> str | BaseModel:
    """Generate text using OpenAI's API with dynamic model support."""
    input_text = self.prompt_template.format(
        query=query, context=' '.join(context)
    )

    if not self.model:
        raise Exception('The model was not created.')

    api_params = (
        self.api_params if self.api_params else self.default_api_params
    )

    messages = []
    if self.system_message:
        messages.append({'role': 'system', 'content': self.system_message})
    messages.append({'role': 'user', 'content': input_text})

    model_params = dict(
        model=self.model_name,
        messages=messages,
        max_tokens=self.output_max_length,
        temperature=self.temperature,
        **api_params,
    )

    if self.structured_output:
        model_params['response_model'] = self.structured_output

    response = self.model.chat.completions.create(**model_params)

    self.logs['model_params'] = model_params

    has_choices = hasattr(response, 'choices')

    if has_choices and isinstance(response.choices, list):
        return cast(str, response.choices[0].message.content.strip())
    return cast(BaseModel, response)