LLM Configuration

Arcana integrates with LLMs for question answering via Arcana.ask/2 and the Agent pipeline. Use model strings, functions, or custom modules.

Quick Start

# Using req_llm with model strings
{:ok, answer} = Arcana.ask("What is Elixir?",
  repo: MyApp.Repo,
  llm: "openai:gpt-4o-mini"
)

# Using a function
{:ok, answer} = Arcana.ask("What is Elixir?",
  repo: MyApp.Repo,
  llm: fn prompt -> {:ok, "Generated answer"} end
)

# Using a custom module
{:ok, answer} = Arcana.ask("What is Elixir?",
  repo: MyApp.Repo,
  llm: MyApp.CustomLLM
)

Using req_llm (Recommended)

req_llm provides a unified interface to 45+ LLM providers.

Setup

Add Dependency

mix.exs

defp deps do
  [
    {:arcana, "~> 1.0"},
    {:req_llm, "~> 1.2"}
  ]
end

Configure API Keys

config/runtime.exs

# OpenAI
config :req_llm, :openai,
  api_key: System.get_env("OPENAI_API_KEY")

# Anthropic
config :req_llm, :anthropic,
  api_key: System.get_env("ANTHROPIC_API_KEY")

# Google
config :req_llm, :google,
  api_key: System.get_env("GOOGLE_API_KEY")

Set Environment Variables

.env

OPENAI_API_KEY=sk-...
ANTHROPIC_API_KEY=sk-ant-...
GOOGLE_API_KEY=AIza...

Model Strings

Pass model strings directly to ask/2 or Agent functions:

OpenAI
Anthropic
Google
Other Providers

# GPT-4o mini (recommended)
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "openai:gpt-4o-mini"
)

# GPT-4o
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "openai:gpt-4o"
)

# GPT-4o Turbo
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "openai:gpt-4o-turbo"
)

Cost: $0.15-15.00 per 1M input tokens

# Claude 4.5 Sonnet (recommended)
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "anthropic:claude-sonnet-4-20250514"
)

# Claude 4.0 Opus
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "anthropic:claude-opus-4-20250514"
)

# Claude 3.5 Haiku
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "anthropic:claude-3-5-haiku-20241022"
)

Cost: $1.00-15.00 per 1M input tokens

# Gemini 2.0 Flash (recommended)
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "google:gemini-2.0-flash-exp"
)

# Gemini 2.0 Pro
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "google:gemini-pro"
)

Cost: Free tier available, $0.075-7.00 per 1M tokens

# Groq (fast inference)
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "groq:llama-3.1-70b-versatile"
)

# Together.ai
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
)

# OpenRouter (access to 200+ models)
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: "openrouter:anthropic/claude-3.5-sonnet"
)

Model String Options

Pass options as a tuple:

# Custom API key
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: {"openai:gpt-4o-mini", api_key: "sk-..."}
)

# Temperature and max tokens
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: {
    "anthropic:claude-sonnet-4-20250514",
    temperature: 0.7,
    max_tokens: 1024
  }
)

# Provider-specific options
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: {
    "zai:glm-4.7",
    provider_options: %{thinking: true}  # Z.ai thinking mode
  }
)

Global Configuration

Set a default LLM in your config:

# config/config.exs
config :arcana, llm: "openai:gpt-4o-mini"

# With options
config :arcana, llm: {
  "anthropic:claude-sonnet-4-20250514",
  temperature: 0.7
}

# Function
config :arcana, llm: fn prompt ->
  {:ok, MyApp.LLM.complete(prompt)}
end

# Module
config :arcana, llm: MyApp.CustomLLM

Then use without specifying :llm:

{:ok, answer} = Arcana.ask("question", repo: MyApp.Repo)

Function-Based LLM

Provide a function for custom LLM logic:

1-Arity Function
2-Arity Function
3-Arity Function

Signature: fn prompt -> {:ok, response} | {:error, reason}

llm = fn prompt ->
  # Simple completion
  case HTTPoison.post(
    "https://api.openai.com/v1/completions",
    Jason.encode!(%{prompt: prompt, model: "gpt-4o-mini"}),
    headers()
  ) do
    {:ok, %{body: body}} ->
      %{"choices" => [%{"text" => text}]} = Jason.decode!(body)
      {:ok, text}

    {:error, reason} ->
      {:error, reason}
  end
end

{:ok, answer} = Arcana.ask("question", repo: MyApp.Repo, llm: llm)

Signature: fn prompt, context -> {:ok, response} | {:error, reason}

llm = fn prompt, context ->
  # Access retrieved chunks
  context_text = Enum.map_join(context, "\n\n", & &1.text)
  
  full_prompt = """
  Context:
  #{context_text}

  Question: #{prompt}

  Answer:
  """

  MyApp.LLM.complete(full_prompt)
end

{:ok, answer} = Arcana.ask("question", repo: MyApp.Repo, llm: llm)

Signature: fn prompt, context, opts -> {:ok, response} | {:error, reason}

llm = fn prompt, context, opts ->
  # Full control over prompting
  system_prompt = opts[:system_prompt] || default_prompt(context)
  temperature = opts[:temperature] || 0.7

  ReqLLM.generate_text(
    "openai:gpt-4o-mini",
    ReqLLM.Context.new([
      ReqLLM.Context.system(system_prompt),
      ReqLLM.Context.user(prompt)
    ]),
    temperature: temperature
  )
  |> case do
    {:ok, response} -> {:ok, ReqLLM.Response.text(response)}
    {:error, reason} -> {:error, reason}
  end
end

{:ok, answer} = Arcana.ask(
  "question",
  repo: MyApp.Repo,
  llm: llm,
  temperature: 0.5
)

Custom LLM Module

Implement custom LLM logic in a module:

defmodule MyApp.CustomLLM do
  @behaviour Arcana.LLM

  @impl true
  def complete(prompt, context, opts) do
    # Build system prompt with context
    system_prompt = build_system_prompt(context)
    
    # Extract options
    model = opts[:model] || "openai:gpt-4o-mini"
    temperature = opts[:temperature] || 0.7

    # Call LLM
    case ReqLLM.generate_text(
      model,
      ReqLLM.Context.new([
        ReqLLM.Context.system(system_prompt),
        ReqLLM.Context.user(prompt)
      ]),
      temperature: temperature
    ) do
      {:ok, response} ->
        {:ok, ReqLLM.Response.text(response)}

      {:error, reason} ->
        {:error, reason}
    end
  end

  defp build_system_prompt([]) do
    "You are a helpful assistant."
  end

  defp build_system_prompt(context) do
    context_text = Enum.map_join(context, "\n\n---\n\n", fn
      %{text: text} -> text
      text when is_binary(text) -> text
    end)

    """
    You are a helpful assistant with access to the following reference material.
    Answer questions directly and naturally, using this information to inform your responses.
    Don't mention or reference the material explicitly in your answers.

    Reference material:
    #{context_text}
    """
  end
end

Configuration:

# config/config.exs
config :arcana, llm: MyApp.CustomLLM

# Or use directly
{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: MyApp.CustomLLM
)

Agentic RAG

Use LLMs with the Agent pipeline for complex workflows:

alias Arcana.Agent

llm = "openai:gpt-4o-mini"

ctx =
  Agent.new("Compare Elixir and Erlang features", repo: MyApp.Repo, llm: llm)
  |> Agent.gate()                                 # Skip retrieval if not needed
  |> Agent.rewrite()                              # Clean up conversational input
  |> Agent.select(collections: ["elixir", "erlang"])  # Choose collections
  |> Agent.expand()                               # Add synonyms
  |> Agent.decompose()                            # Split complex questions
  |> Agent.search()                               # Execute search
  |> Agent.reason()                               # Multi-hop reasoning
  |> Agent.rerank(threshold: 7)                   # Filter low-quality chunks
  |> Agent.answer()                               # Generate answer

ctx.answer
# => "Generated answer based on retrieved context..."

Pipeline Steps with LLM

Each Agent step uses the LLM:

Step	LLM Purpose
`gate/2`	Decide if retrieval is needed
`rewrite/2`	Clean up conversational queries
`select/2`	Choose relevant collections
`expand/2`	Add synonyms and related terms
`decompose/2`	Split into sub-questions
`reason/2`	Evaluate if more search needed
`rerank/2`	Score chunk relevance (0-10)
`answer/2`	Generate final answer

Custom Prompts

Override default prompts for any step:

ctx
|> Agent.expand(
  prompt: fn question ->
    "Generate 5 synonyms for key terms in: #{question}"
  end
)
|> Agent.decompose(
  prompt: fn question ->
    "Break this into 2-3 focused sub-questions: #{question}"
  end
)
|> Agent.answer(
  prompt: fn question, chunks ->
    context = Enum.map_join(chunks, "\n\n", & &1.text)
    """
    Context: #{context}
    
    Question: #{question}
    
    Provide a detailed answer with examples.
    """
  end
)

Streaming Responses

Stream LLM responses for better UX in LiveView:

defmodule MyAppWeb.ChatLive do
  use MyAppWeb, :live_view

  def handle_event("ask", %{"question" => question}, socket) do
    # Get context from Arcana
    {:ok, context} = Arcana.search(question, repo: MyApp.Repo, limit: 5)
    
    # Stream response
    send(self(), {:stream_answer, question, context})
    {:noreply, assign(socket, streaming: true, answer: "")}
  end

  def handle_info({:stream_answer, question, context}, socket) do
    live_view_pid = self()

    Task.start(fn ->
      context_text = Enum.map_join(context, "\n\n", & &1.text)

      llm_context =
        ReqLLM.Context.new([
          ReqLLM.Context.system("""
            Answer based on this context:
            #{context_text}
          """),
          ReqLLM.Context.user(question)
        ])

      {:ok, response} = ReqLLM.stream_text("openai:gpt-4o-mini", llm_context)

      response
      |> ReqLLM.StreamResponse.tokens()
      |> Stream.each(fn chunk ->
        send(live_view_pid, {:chunk, chunk})
      end)
      |> Stream.run()

      send(live_view_pid, :stream_done)
    end)

    {:noreply, socket}
  end

  def handle_info({:chunk, content}, socket) do
    {:noreply, update(socket, :answer, &(&1 <> content))}
  end

  def handle_info(:stream_done, socket) do
    {:noreply, assign(socket, streaming: false)}
  end
end

Custom RAG Module

Wrap Arcana for app-specific RAG:

defmodule MyApp.RAG do
  @default_model "openai:gpt-4o-mini"
  @default_limit 5

  def ask(question, opts \\ []) do
    repo = Keyword.get(opts, :repo, MyApp.Repo)
    model = Keyword.get(opts, :model, @default_model)
    limit = Keyword.get(opts, :limit, @default_limit)
    collection = Keyword.get(opts, :collection)

    search_opts = [
      repo: repo,
      llm: model,
      limit: limit,
      mode: :hybrid
    ]

    search_opts =
      if collection,
        do: Keyword.put(search_opts, :collection, collection),
        else: search_opts

    Arcana.ask(question, search_opts)
  end

  def ask_with_agent(question, opts \\ []) do
    repo = Keyword.get(opts, :repo, MyApp.Repo)
    model = Keyword.get(opts, :model, @default_model)
    collections = Keyword.get(opts, :collections)

    ctx =
      Arcana.Agent.new(question, repo: repo, llm: model)
      |> maybe_select(collections)
      |> Arcana.Agent.expand()
      |> Arcana.Agent.search()
      |> Arcana.Agent.rerank(threshold: 7)
      |> Arcana.Agent.answer()

    {:ok, ctx.answer}
  end

  defp maybe_select(ctx, nil), do: ctx
  defp maybe_select(ctx, collections) do
    Arcana.Agent.select(ctx, collections: collections)
  end
end

Usage:

# Simple ask
{:ok, answer} = MyApp.RAG.ask("What is Elixir?")

# With options
{:ok, answer} = MyApp.RAG.ask(
  "What is Elixir?",
  collection: "docs",
  model: "anthropic:claude-sonnet-4-20250514"
)

# Agent pipeline
{:ok, answer} = MyApp.RAG.ask_with_agent(
  "Compare Elixir and Erlang",
  collections: ["elixir", "erlang"]
)

Cost Tracking

Monitor LLM costs via telemetry:

defmodule MyApp.LLMLogger do
  require Logger

  def setup do
    # Track Arcana LLM calls
    :telemetry.attach(
      "arcana-llm-logger",
      [:arcana, :llm, :complete, :stop],
      &handle_llm/4,
      nil
    )

    # Track req_llm token usage
    :telemetry.attach(
      "req-llm-cost-logger",
      [:req_llm, :token_usage],
      &handle_cost/4,
      nil
    )
  end

  def handle_llm([:arcana, :llm, :complete, :stop], measurements, metadata, _) do
    duration_ms = System.convert_time_unit(measurements.duration, :native, :millisecond)
    
    Logger.info("""
    LLM Call:
      Model: #{metadata.model}
      Prompt: #{String.slice(metadata.prompt, 0..100)}...
      Duration: #{duration_ms}ms
      Success: #{metadata.success}
    """)
  end

  def handle_cost([:req_llm, :token_usage], measurements, metadata, _) do
    Logger.info("""
    Token Usage:
      Model: #{metadata.model}
      Input: #{measurements.input_tokens} tokens
      Output: #{measurements.output_tokens} tokens
      Cost: $#{Float.round(measurements.total_cost, 4)}
    """)
  end
end

# In application.ex
MyApp.LLMLogger.setup()

Best Practices

Use gpt-4o-mini for development - Fast and cheap ($0.15/1M tokens)
Upgrade to Claude 4.5 for production - Better quality, longer context
Set max_tokens - Prevent runaway costs
Use temperature=0.7 - Good balance of creativity and consistency
Stream responses - Better UX for chat interfaces
Monitor costs - Attach telemetry handlers
Cache common queries - LLM calls are expensive
Use hybrid search - Better context = better answers

Model Selection Guide

Use Case	Recommended Model	Reason
Development	`gpt-4o-mini`	Fast, cheap, good quality
Production	`claude-sonnet-4-20250514`	Best quality, 200K context
High Volume	`gemini-2.0-flash-exp`	Free tier, fast
Complex Reasoning	`gpt-4o` or `claude-opus-4`	Best reasoning capabilities
Low Latency	`groq:llama-3.1-*`	Ultra-fast inference
Budget	`gemini-flash` or `gpt-4o-mini`	Low cost

Troubleshooting

req_llm not loaded

Add dependency:

{:req_llm, "~> 1.2"}

Run:

mix deps.get

API key errors

Set environment variables:

export OPENAI_API_KEY=sk-...
export ANTHROPIC_API_KEY=sk-ant-...

Check config:

System.get_env("OPENAI_API_KEY")  # Should not be nil

Rate limit errors

Implement retry logic:

defp call_with_retry(llm, prompt, context, retries \\ 3) do
  case Arcana.LLM.complete(llm, prompt, context, []) do
    {:ok, response} -> {:ok, response}
    {:error, :rate_limit} when retries > 0 ->
      Process.sleep(1000)
      call_with_retry(llm, prompt, context, retries - 1)
    {:error, reason} -> {:error, reason}
  end
end

Timeout errors

Increase timeout:

{:ok, answer} = Arcana.ask("question",
  repo: MyApp.Repo,
  llm: {"openai:gpt-4o-mini", timeout: 60_000}
)

Getting Started

Core Concepts

Guides

Configuration

LLM Configuration

Quick Start

Using req_llm (Recommended)

Setup

Model Strings

Model String Options

Global Configuration

Function-Based LLM

Custom LLM Module

Agentic RAG

Pipeline Steps with LLM

Custom Prompts

Streaming Responses

Custom RAG Module

Cost Tracking

Best Practices

Model Selection Guide

Troubleshooting

Next Steps

Agentic RAG Guide

Embeddings

Getting Started

Core Concepts

Guides

Configuration

Documentation Index

​Quick Start

​Using req_llm (Recommended)

​Setup

​Model Strings

​Model String Options

​Global Configuration

​Function-Based LLM

​Custom LLM Module

​Agentic RAG

​Pipeline Steps with LLM

​Custom Prompts

​Streaming Responses

​Custom RAG Module

​Cost Tracking

​Best Practices

​Model Selection Guide

​Troubleshooting

​Next Steps

Agentic RAG Guide

Embeddings

Quick Start

Using req_llm (Recommended)

Setup

Model Strings

Model String Options

Global Configuration

Function-Based LLM

Custom LLM Module

Agentic RAG

Pipeline Steps with LLM

Custom Prompts

Streaming Responses

Custom RAG Module

Cost Tracking

Best Practices

Model Selection Guide

Troubleshooting

Next Steps