hugging_face

Hugging Face classes for text generation.

Classes:

HuggingFaceGen –

HuggingFaceGen.

HuggingFaceGen

HuggingFaceGen(
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
)

Bases: GenerationBase

HuggingFaceGen.

Methods:

generate –

Generate the text from the query and augmented context.

Source code in src/rago/generation/base.py

def __init__(
    self,
    model_name: Optional[str] = None,
    temperature: Optional[float] = None,
    prompt_template: str = '',
    output_max_length: int = 500,
    device: str = 'auto',
    structured_output: Optional[Type[BaseModel]] = None,
    system_message: str = '',
    api_params: dict[str, Any] = DEFAULT_API_PARAMS,
    api_key: str = '',
    cache: Optional[Cache] = None,
    logs: dict[str, Any] = DEFAULT_LOGS,
) -> None:
    """Initialize Generation class."""
    if logs is DEFAULT_LOGS:
        logs = {}
    super().__init__(api_key=api_key, cache=cache, logs=logs)

    self.model_name: str = (
        model_name if model_name is not None else self.default_model_name
    )
    self.output_max_length: int = (
        output_max_length or self.default_output_max_length
    )
    self.temperature: float = (
        temperature
        if temperature is not None
        else self.default_temperature
    )

    self.prompt_template: str = (
        prompt_template or self.default_prompt_template
    )
    self.structured_output: Optional[Type[BaseModel]] = structured_output
    if api_params is DEFAULT_API_PARAMS:
        api_params = deepcopy(self.default_api_params or {})

    self.system_message = system_message
    self.api_params = api_params

    if device not in ['cpu', 'cuda', 'auto']:
        raise Exception(
            f'Device {device} not supported. Options: cpu, cuda, auto.'
        )

    cuda_available = torch.cuda.is_available()
    self.device_name: str = (
        'cpu' if device == 'cpu' or not cuda_available else 'cuda'
    )
    self.device = torch.device(self.device_name)

    self._validate()
    self._setup()

generate

generate(query: str, context: list[str]) -> str

Generate the text from the query and augmented context.

Source code in src/rago/generation/hugging_face.py

def generate(self, query: str, context: list[str]) -> str:
    """Generate the text from the query and augmented context."""
    with torch.no_grad():
        input_text = self.prompt_template.format(
            query=query, context=' '.join(context)
        )
        input_ids = self.tokenizer.encode(
            input_text,
            return_tensors='pt',
            truncation=True,
            max_length=512,
        ).to(self.device_name)

        api_params = (
            self.api_params if self.api_params else self.default_api_params
        )

        model_params = dict(
            inputs=input_ids,
            max_length=self.output_max_length,
            pad_token_id=self.tokenizer.eos_token_id,
            **api_params,
        )

        outputs = self.model.generate(**model_params)

        self.logs['model_params'] = model_params

        response = self.tokenizer.decode(
            outputs[0], skip_special_tokens=True
        )

    if self.device_name == 'cuda':
        torch.cuda.empty_cache()

    return str(response)