Skip to content

LLMs

BaseRagasLLM dataclass

BaseRagasLLM(run_config: RunConfig = RunConfig(), multiple_completion_supported: bool = False, cache: Optional[CacheInterface] = None)

Bases: ABC

get_temperature

get_temperature(n: int) -> float

Return the temperature to use for completion based on n.

Source code in src/ragas/llms/base.py
def get_temperature(self, n: int) -> float:
    """Return the temperature to use for completion based on n."""
    return 0.3 if n > 1 else 0.01

is_finished abstractmethod

is_finished(response: LLMResult) -> bool

Check if the LLM response is finished/complete.

Source code in src/ragas/llms/base.py
@abstractmethod
def is_finished(self, response: LLMResult) -> bool:
    """Check if the LLM response is finished/complete."""
    ...

generate async

generate(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Callbacks = None) -> LLMResult

Generate text using the given event loop.

Source code in src/ragas/llms/base.py
async def generate(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: Callbacks = None,
) -> LLMResult:
    """Generate text using the given event loop."""

    if temperature is None:
        temperature = self.get_temperature(n)

    agenerate_text_with_retry = add_async_retry(
        self.agenerate_text, self.run_config
    )
    result = await agenerate_text_with_retry(
        prompt=prompt,
        n=n,
        temperature=temperature,
        stop=stop,
        callbacks=callbacks,
    )

    # check there are no max_token issues
    if not self.is_finished(result):
        raise LLMDidNotFinishException()
    return result

InstructorBaseRagasLLM

Bases: ABC

Base class for LLMs using the Instructor library pattern.

generate abstractmethod

generate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Generate a response using the configured LLM.

For async clients, this will run the async method in the appropriate event loop.

Source code in src/ragas/llms/base.py
@abstractmethod
def generate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Generate a response using the configured LLM.

    For async clients, this will run the async method in the appropriate event loop.
    """

agenerate abstractmethod async

agenerate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Asynchronously generate a response using the configured LLM.

Source code in src/ragas/llms/base.py
@abstractmethod
async def agenerate(
    self,
    prompt: str,
    response_model: t.Type[InstructorTypeVar],
) -> InstructorTypeVar:
    """Asynchronously generate a response using the configured LLM."""

InstructorLLM

InstructorLLM(client: Any, model: str, provider: str, model_args: Optional[InstructorModelArgs] = None, **kwargs)

Bases: InstructorBaseRagasLLM

LLM wrapper using the Instructor library for structured outputs.

Source code in src/ragas/llms/base.py
def __init__(
    self,
    client: t.Any,
    model: str,
    provider: str,
    model_args: t.Optional[InstructorModelArgs] = None,
    **kwargs,
):
    self.client = client
    self.model = model
    self.provider = provider

    # Use deterministic defaults if no model_args provided
    if model_args is None:
        model_args = InstructorModelArgs()

    # Convert to dict and merge with any additional kwargs
    self.model_args = {**model_args.model_dump(), **kwargs}

    # Check if client is async-capable at initialization
    self.is_async = self._check_client_async()

generate

generate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Generate a response using the configured LLM.

For async clients, this will run the async method in the appropriate event loop.

Source code in src/ragas/llms/base.py
def generate(
    self, prompt: str, response_model: t.Type[InstructorTypeVar]
) -> InstructorTypeVar:
    """Generate a response using the configured LLM.

    For async clients, this will run the async method in the appropriate event loop.
    """
    messages = [{"role": "user", "content": prompt}]

    # If client is async, use the appropriate method to run it
    if self.is_async:
        result = self._run_async_in_current_loop(
            self.agenerate(prompt, response_model)
        )
    else:
        if self.provider.lower() == "google":
            google_kwargs = {}
            generation_config_keys = {"temperature", "max_tokens", "top_p", "top_k"}
            generation_config = {}

            for key, value in self.model_args.items():
                if key in generation_config_keys:
                    if key == "max_tokens":
                        generation_config["max_output_tokens"] = value
                    else:
                        generation_config[key] = value
                else:
                    google_kwargs[key] = value

            if generation_config:
                google_kwargs["generation_config"] = generation_config

            result = self.client.create(
                messages=messages,
                response_model=response_model,
                **google_kwargs,
            )
        else:
            result = self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                response_model=response_model,
                **self.model_args,
            )

    # Track the usage
    track(
        LLMUsageEvent(
            provider=self.provider,
            model=self.model,
            llm_type="instructor",
            num_requests=1,
            is_async=self.is_async,
        )
    )
    return result

agenerate async

agenerate(prompt: str, response_model: Type[InstructorTypeVar]) -> InstructorTypeVar

Asynchronously generate a response using the configured LLM.

Source code in src/ragas/llms/base.py
async def agenerate(
    self,
    prompt: str,
    response_model: t.Type[InstructorTypeVar],
) -> InstructorTypeVar:
    """Asynchronously generate a response using the configured LLM."""
    messages = [{"role": "user", "content": prompt}]

    # If client is not async, raise a helpful error
    if not self.is_async:
        raise TypeError(
            "Cannot use agenerate() with a synchronous client. Use generate() instead."
        )

    if self.provider.lower() == "google":
        google_kwargs = {}
        generation_config_keys = {"temperature", "max_tokens", "top_p", "top_k"}
        generation_config = {}

        for key, value in self.model_args.items():
            if key in generation_config_keys:
                if key == "max_tokens":
                    generation_config["max_output_tokens"] = value
                else:
                    generation_config[key] = value
            else:
                google_kwargs[key] = value

        if generation_config:
            google_kwargs["generation_config"] = generation_config

        result = await self.client.create(
            messages=messages,
            response_model=response_model,
            **google_kwargs,
        )
    else:
        result = await self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_model=response_model,
            **self.model_args,
        )

    # Track the usage
    track(
        LLMUsageEvent(
            provider=self.provider,
            model=self.model,
            llm_type="instructor",
            num_requests=1,
            is_async=True,
        )
    )
    return result

HaystackLLMWrapper

HaystackLLMWrapper(haystack_generator: Any, run_config: Optional[RunConfig] = None, cache: Optional[CacheInterface] = None)

Bases: BaseRagasLLM

A wrapper class for using Haystack LLM generators within the Ragas framework.

This class integrates Haystack's LLM components (e.g., OpenAIGenerator, HuggingFaceAPIGenerator, etc.) into Ragas, enabling both synchronous and asynchronous text generation.

Parameters:

Name Type Description Default
haystack_generator AzureOpenAIGenerator | HuggingFaceAPIGenerator | HuggingFaceLocalGenerator | OpenAIGenerator

An instance of a Haystack generator.

required
run_config RunConfig

Configuration object to manage LLM execution settings, by default None.

None
cache CacheInterface

A cache instance for storing results, by default None.

None
Source code in src/ragas/llms/haystack_wrapper.py
def __init__(
    self,
    haystack_generator: t.Any,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[CacheInterface] = None,
):
    super().__init__(cache=cache)

    # Lazy Import of required Haystack components
    try:
        from haystack import AsyncPipeline
        from haystack.components.generators.azure import AzureOpenAIGenerator
        from haystack.components.generators.hugging_face_api import (
            HuggingFaceAPIGenerator,
        )
        from haystack.components.generators.hugging_face_local import (
            HuggingFaceLocalGenerator,
        )
        from haystack.components.generators.openai import OpenAIGenerator
    except ImportError as exc:
        raise ImportError(
            "Haystack is not installed. Please install it using `pip install haystack-ai`."
        ) from exc

    # Validate haystack_generator type
    if not isinstance(
        haystack_generator,
        (
            AzureOpenAIGenerator,
            HuggingFaceAPIGenerator,
            HuggingFaceLocalGenerator,
            OpenAIGenerator,
        ),
    ):
        raise TypeError(
            "Expected 'haystack_generator' to be one of: "
            "AzureOpenAIGenerator, HuggingFaceAPIGenerator, "
            "HuggingFaceLocalGenerator, or OpenAIGenerator, but received "
            f"{type(haystack_generator).__name__}."
        )

    # Set up Haystack pipeline and generator
    self.generator = haystack_generator
    self.async_pipeline = AsyncPipeline()
    self.async_pipeline.add_component("llm", self.generator)

    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

OCIGenAIWrapper

OCIGenAIWrapper(model_id: str, compartment_id: str, config: Optional[Dict[str, Any]] = None, endpoint_id: Optional[str] = None, run_config: Optional[RunConfig] = None, cache: Optional[Any] = None, default_system_prompt: Optional[str] = None, client: Optional[Any] = None)

Bases: BaseRagasLLM

OCI Gen AI LLM wrapper for Ragas.

This wrapper provides direct integration with Oracle Cloud Infrastructure Generative AI services without requiring LangChain or LlamaIndex.

Args: model_id: The OCI model ID to use for generation compartment_id: The OCI compartment ID config: OCI configuration dictionary (optional, uses default if not provided) endpoint_id: Optional endpoint ID for the model run_config: Ragas run configuration cache: Optional cache backend

Source code in src/ragas/llms/oci_genai_wrapper.py
def __init__(
    self,
    model_id: str,
    compartment_id: str,
    config: t.Optional[t.Dict[str, t.Any]] = None,
    endpoint_id: t.Optional[str] = None,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[t.Any] = None,
    default_system_prompt: t.Optional[str] = None,
    client: t.Optional[t.Any] = None,
):
    """
    Initialize OCI Gen AI wrapper.

    Args:
        model_id: The OCI model ID to use for generation
        compartment_id: The OCI compartment ID
        config: OCI configuration dictionary (optional, uses default if not provided)
        endpoint_id: Optional endpoint ID for the model
        run_config: Ragas run configuration
        cache: Optional cache backend
    """
    super().__init__(cache=cache)

    self.model_id = model_id
    self.compartment_id = compartment_id
    self.endpoint_id = endpoint_id
    self.default_system_prompt = default_system_prompt

    # Store client/config; perform lazy initialization to keep import-optional
    self.client = client
    self._oci_config = config
    # If no client and SDK not available and no endpoint fallback, raise early
    if (
        self.client is None
        and GenerativeAiClient is None
        and self.endpoint_id is None
    ):  # type: ignore
        raise ImportError(
            "OCI SDK not found. Please install it with: pip install oci"
        )

    # Set run config
    if run_config is None:
        run_config = RunConfig()
    self.set_run_config(run_config)

    # Track initialization
    track(
        LLMUsageEvent(
            provider="oci_genai",
            model=model_id,
            llm_type="oci_wrapper",
            num_requests=1,
            is_async=False,
        )
    )

generate_text

generate_text(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Optional[Any] = None) -> LLMResult

Generate text using OCI Gen AI.

Source code in src/ragas/llms/oci_genai_wrapper.py
def generate_text(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: t.Optional[t.Any] = None,
) -> LLMResult:
    """Generate text using OCI Gen AI."""
    if temperature is None:
        temperature = self.get_temperature(n)

    messages = self._convert_prompt_to_messages(prompt)
    generations = []

    try:
        for _ in range(n):
            request = self._create_generation_request(
                messages, temperature, stop=stop
            )

            response = self._get_client().generate_text(**request)

            # Extract text from response
            if hasattr(response.data, "choices") and response.data.choices:
                text = response.data.choices[0].message.content
            elif hasattr(response.data, "text"):
                text = response.data.text
            else:
                text = str(response.data)

            generation = Generation(text=text)
            generations.append([generation])

        # Track usage
        track(
            LLMUsageEvent(
                provider="oci_genai",
                model=self.model_id,
                llm_type="oci_wrapper",
                num_requests=n,
                is_async=False,
            )
        )

        return LLMResult(generations=generations)

    except Exception as e:
        logger.error(f"Error generating text with OCI Gen AI: {e}")
        raise

agenerate_text async

agenerate_text(prompt: PromptValue, n: int = 1, temperature: Optional[float] = 0.01, stop: Optional[List[str]] = None, callbacks: Optional[Any] = None) -> LLMResult

Generate text asynchronously using OCI Gen AI.

Source code in src/ragas/llms/oci_genai_wrapper.py
async def agenerate_text(
    self,
    prompt: PromptValue,
    n: int = 1,
    temperature: t.Optional[float] = 0.01,
    stop: t.Optional[t.List[str]] = None,
    callbacks: t.Optional[t.Any] = None,
) -> LLMResult:
    """Generate text asynchronously using OCI Gen AI."""
    if temperature is None:
        temperature = self.get_temperature(n)

    messages = self._convert_prompt_to_messages(prompt)
    generations = []

    try:
        # Run synchronous calls in thread pool for async compatibility
        loop = asyncio.get_event_loop()

        for _ in range(n):
            request = self._create_generation_request(
                messages, temperature, stop=stop
            )

            response = await loop.run_in_executor(
                None, lambda: self._get_client().generate_text(**request)
            )

            # Extract text from response
            if hasattr(response.data, "choices") and response.data.choices:
                text = response.data.choices[0].message.content
            elif hasattr(response.data, "text"):
                text = response.data.text
            else:
                text = str(response.data)

            generation = Generation(text=text)
            generations.append([generation])

        # Track usage
        track(
            LLMUsageEvent(
                provider="oci_genai",
                model=self.model_id,
                llm_type="oci_wrapper",
                num_requests=n,
                is_async=True,
            )
        )

        return LLMResult(generations=generations)

    except Exception as e:
        logger.error(f"Error generating text with OCI Gen AI: {e}")
        raise

is_finished

is_finished(response: LLMResult) -> bool

Check if the LLM response is finished/complete.

Source code in src/ragas/llms/oci_genai_wrapper.py
def is_finished(self, response: LLMResult) -> bool:
    """Check if the LLM response is finished/complete."""
    # For OCI Gen AI, we assume the response is always finished
    # unless there's an explicit error or truncation
    try:
        for generation_list in response.generations:
            for generation in generation_list:
                if not generation.text or generation.text.strip() == "":
                    return False
        return True
    except Exception:
        return False

llm_factory

llm_factory(model: str, provider: str = 'openai', client: Optional[Any] = None, **kwargs: Any) -> InstructorBaseRagasLLM

Create an LLM instance for structured output generation using Instructor.

Supports multiple LLM providers with unified interface for both sync and async operations. Returns instances with .generate() and .agenerate() methods that accept Pydantic models for structured outputs.

Args: model: Model name (e.g., "gpt-4o", "gpt-4o-mini", "claude-3-sonnet"). provider: LLM provider. Default: "openai". Supported: openai, anthropic, google, litellm. client: Pre-initialized client instance (required). For OpenAI, can be OpenAI(...) or AsyncOpenAI(...). **kwargs: Additional model arguments (temperature, max_tokens, top_p, etc).

Returns: InstructorBaseRagasLLM: Instance with generate() and agenerate() methods.

Raises: ValueError: If client is missing, provider is unsupported, or model is invalid.

Examples: from openai import OpenAI

client = OpenAI(api_key="...")
llm = llm_factory("gpt-4o", client=client)
response = llm.generate(prompt, ResponseModel)

# Async
from openai import AsyncOpenAI
client = AsyncOpenAI(api_key="...")
llm = llm_factory("gpt-4o", client=client)
response = await llm.agenerate(prompt, ResponseModel)
Source code in src/ragas/llms/base.py
def llm_factory(
    model: str,
    provider: str = "openai",
    client: t.Optional[t.Any] = None,
    **kwargs: t.Any,
) -> InstructorBaseRagasLLM:
    """
    Create an LLM instance for structured output generation using Instructor.

    Supports multiple LLM providers with unified interface for both sync and async
    operations. Returns instances with .generate() and .agenerate() methods that
    accept Pydantic models for structured outputs.

    Args:
        model: Model name (e.g., "gpt-4o", "gpt-4o-mini", "claude-3-sonnet").
        provider: LLM provider. Default: "openai".
                 Supported: openai, anthropic, google, litellm.
        client: Pre-initialized client instance (required). For OpenAI, can be
               OpenAI(...) or AsyncOpenAI(...).
        **kwargs: Additional model arguments (temperature, max_tokens, top_p, etc).

    Returns:
        InstructorBaseRagasLLM: Instance with generate() and agenerate() methods.

    Raises:
        ValueError: If client is missing, provider is unsupported, or model is invalid.

    Examples:
        from openai import OpenAI

        client = OpenAI(api_key="...")
        llm = llm_factory("gpt-4o", client=client)
        response = llm.generate(prompt, ResponseModel)

        # Async
        from openai import AsyncOpenAI
        client = AsyncOpenAI(api_key="...")
        llm = llm_factory("gpt-4o", client=client)
        response = await llm.agenerate(prompt, ResponseModel)
    """
    if client is None:
        raise ValueError(
            "llm_factory() requires a client instance. "
            "Text-only mode has been removed.\n\n"
            "To migrate:\n"
            "  from openai import OpenAI\n"
            "  client = OpenAI(api_key='...')\n"
            "  llm = llm_factory('gpt-4o-mini', client=client)\n\n"
            "For more details: https://docs.ragas.io/en/latest/llm-factory"
        )

    if not model:
        raise ValueError("model parameter is required")

    provider_lower = provider.lower()

    instructor_funcs = {
        "openai": lambda c: instructor.from_openai(c),
        "anthropic": lambda c: instructor.from_anthropic(c),
        "google": lambda c: instructor.from_gemini(c),
        "litellm": lambda c: instructor.from_litellm(c),
    }

    if provider_lower not in instructor_funcs:
        raise ValueError(
            f"Unsupported provider: '{provider}'. "
            f"Supported: {', '.join(instructor_funcs.keys())}"
        )

    try:
        patched_client = instructor_funcs[provider_lower](client)
    except Exception as e:
        raise ValueError(
            f"Failed to initialize {provider} client with instructor. "
            f"Ensure you've created a valid {provider} client.\n"
            f"Error: {str(e)}"
        )

    track(
        LLMUsageEvent(
            provider=provider,
            model=model,
            llm_type="llm_factory",
            num_requests=1,
            is_async=False,
        )
    )

    return InstructorLLM(
        client=patched_client,
        model=model,
        provider=provider,
        model_args=InstructorModelArgs(),
        **kwargs,
    )

oci_genai_factory

oci_genai_factory(model_id: str, compartment_id: str, config: Optional[Dict[str, Any]] = None, endpoint_id: Optional[str] = None, run_config: Optional[RunConfig] = None, cache: Optional[Any] = None, default_system_prompt: Optional[str] = None, client: Optional[Any] = None) -> OCIGenAIWrapper

Factory function to create an OCI Gen AI LLM instance.

Args: model_id: The OCI model ID to use for generation compartment_id: The OCI compartment ID config: OCI configuration dictionary (optional) endpoint_id: Optional endpoint ID for the model run_config: Ragas run configuration **kwargs: Additional arguments passed to OCIGenAIWrapper

Returns: OCIGenAIWrapper: An instance of the OCI Gen AI LLM wrapper

Examples: # Basic usage with default config llm = oci_genai_factory( model_id="cohere.command", compartment_id="ocid1.compartment.oc1..example" )

# With custom config
llm = oci_genai_factory(
    model_id="cohere.command",
    compartment_id="ocid1.compartment.oc1..example",
    config={"user": "user_ocid", "key_file": "~/.oci/private_key.pem"}
)
Source code in src/ragas/llms/oci_genai_wrapper.py
def oci_genai_factory(
    model_id: str,
    compartment_id: str,
    config: t.Optional[t.Dict[str, t.Any]] = None,
    endpoint_id: t.Optional[str] = None,
    run_config: t.Optional[RunConfig] = None,
    cache: t.Optional[t.Any] = None,
    default_system_prompt: t.Optional[str] = None,
    client: t.Optional[t.Any] = None,
) -> OCIGenAIWrapper:
    """
    Factory function to create an OCI Gen AI LLM instance.

    Args:
        model_id: The OCI model ID to use for generation
        compartment_id: The OCI compartment ID
        config: OCI configuration dictionary (optional)
        endpoint_id: Optional endpoint ID for the model
        run_config: Ragas run configuration
        **kwargs: Additional arguments passed to OCIGenAIWrapper

    Returns:
        OCIGenAIWrapper: An instance of the OCI Gen AI LLM wrapper

    Examples:
        # Basic usage with default config
        llm = oci_genai_factory(
            model_id="cohere.command",
            compartment_id="ocid1.compartment.oc1..example"
        )

        # With custom config
        llm = oci_genai_factory(
            model_id="cohere.command",
            compartment_id="ocid1.compartment.oc1..example",
            config={"user": "user_ocid", "key_file": "~/.oci/private_key.pem"}
        )
    """
    return OCIGenAIWrapper(
        model_id=model_id,
        compartment_id=compartment_id,
        config=config,
        endpoint_id=endpoint_id,
        run_config=run_config,
        cache=cache,
        default_system_prompt=default_system_prompt,
        client=client,
    )