Documentation Index Fetch the complete documentation index at: https://mintlify.com/georgeguimaraes/arcana/llms.txt
Use this file to discover all available pages before exploring further.
Arcana integrates with LLMs for question answering via Arcana.ask/2 and the Agent pipeline. Use model strings, functions, or custom modules.
Quick Start
# Using req_llm with model strings
{ :ok , answer} = Arcana . ask ( "What is Elixir?" ,
repo: MyApp . Repo ,
llm: "openai:gpt-4o-mini"
)
# Using a function
{ :ok , answer} = Arcana . ask ( "What is Elixir?" ,
repo: MyApp . Repo ,
llm: fn prompt -> { :ok , "Generated answer" } end
)
# Using a custom module
{ :ok , answer} = Arcana . ask ( "What is Elixir?" ,
repo: MyApp . Repo ,
llm: MyApp . CustomLLM
)
Using req_llm (Recommended)
req_llm provides a unified interface to 45+ LLM providers.
Setup
Add Dependency
defp deps do
[
{ :arcana , "~> 1.0" },
{ :req_llm , "~> 1.2" }
]
end
Configure API Keys
# OpenAI
config :req_llm , :openai ,
api_key: System . get_env ( "OPENAI_API_KEY" )
# Anthropic
config :req_llm , :anthropic ,
api_key: System . get_env ( "ANTHROPIC_API_KEY" )
# Google
config :req_llm , :google ,
api_key: System . get_env ( "GOOGLE_API_KEY" )
Set Environment Variables
OPENAI_API_KEY = sk-...
ANTHROPIC_API_KEY = sk-ant-...
GOOGLE_API_KEY = AIza...
Model Strings
Pass model strings directly to ask/2 or Agent functions:
OpenAI
Anthropic
Google
Other Providers
# GPT-4o mini (recommended)
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "openai:gpt-4o-mini"
)
# GPT-4o
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "openai:gpt-4o"
)
# GPT-4o Turbo
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "openai:gpt-4o-turbo"
)
Cost: $0.15-15.00 per 1M input tokens# Claude 4.5 Sonnet (recommended)
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "anthropic:claude-sonnet-4-20250514"
)
# Claude 4.0 Opus
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "anthropic:claude-opus-4-20250514"
)
# Claude 3.5 Haiku
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "anthropic:claude-3-5-haiku-20241022"
)
Cost: $1.00-15.00 per 1M input tokens# Gemini 2.0 Flash (recommended)
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "google:gemini-2.0-flash-exp"
)
# Gemini 2.0 Pro
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "google:gemini-pro"
)
Cost: Free tier available, $0.075-7.00 per 1M tokens# Groq (fast inference)
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "groq:llama-3.1-70b-versatile"
)
# Together.ai
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
)
# OpenRouter (access to 200+ models)
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: "openrouter:anthropic/claude-3.5-sonnet"
)
Model String Options
Pass options as a tuple:
# Custom API key
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: { "openai:gpt-4o-mini" , api_key: "sk-..." }
)
# Temperature and max tokens
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: {
"anthropic:claude-sonnet-4-20250514" ,
temperature: 0.7 ,
max_tokens: 1024
}
)
# Provider-specific options
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: {
"zai:glm-4.7" ,
provider_options: %{ thinking: true } # Z.ai thinking mode
}
)
Global Configuration
Set a default LLM in your config:
# config/config.exs
config :arcana , llm: "openai:gpt-4o-mini"
# With options
config :arcana , llm: {
"anthropic:claude-sonnet-4-20250514" ,
temperature: 0.7
}
# Function
config :arcana , llm: fn prompt ->
{ :ok , MyApp . LLM . complete (prompt)}
end
# Module
config :arcana , llm: MyApp . CustomLLM
Then use without specifying :llm:
{ :ok , answer} = Arcana . ask ( "question" , repo: MyApp . Repo )
Function-Based LLM
Provide a function for custom LLM logic:
1-Arity Function
2-Arity Function
3-Arity Function
Signature: fn prompt -> {:ok, response} | {:error, reason}llm = fn prompt ->
# Simple completion
case HTTPoison . post (
"https://api.openai.com/v1/completions" ,
Jason . encode! (%{ prompt: prompt, model: "gpt-4o-mini" }),
headers ()
) do
{ :ok , %{ body: body}} ->
%{ "choices" => [%{ "text" => text}]} = Jason . decode! (body)
{ :ok , text}
{ :error , reason} ->
{ :error , reason}
end
end
{ :ok , answer} = Arcana . ask ( "question" , repo: MyApp . Repo , llm: llm)
Signature: fn prompt, context -> {:ok, response} | {:error, reason}llm = fn prompt, context ->
# Access retrieved chunks
context_text = Enum . map_join (context, " \n\n " , & &1 .text)
full_prompt = """
Context:
#{ context_text }
Question: #{ prompt }
Answer:
"""
MyApp . LLM . complete (full_prompt)
end
{ :ok , answer} = Arcana . ask ( "question" , repo: MyApp . Repo , llm: llm)
Signature: fn prompt, context, opts -> {:ok, response} | {:error, reason}llm = fn prompt, context, opts ->
# Full control over prompting
system_prompt = opts[ :system_prompt ] || default_prompt (context)
temperature = opts[ :temperature ] || 0.7
ReqLLM . generate_text (
"openai:gpt-4o-mini" ,
ReqLLM . Context . new ([
ReqLLM . Context . system (system_prompt),
ReqLLM . Context . user (prompt)
]),
temperature: temperature
)
|> case do
{ :ok , response} -> { :ok , ReqLLM . Response . text (response)}
{ :error , reason} -> { :error , reason}
end
end
{ :ok , answer} = Arcana . ask (
"question" ,
repo: MyApp . Repo ,
llm: llm,
temperature: 0.5
)
Custom LLM Module
Implement custom LLM logic in a module:
defmodule MyApp . CustomLLM do
@behaviour Arcana . LLM
@impl true
def complete (prompt, context, opts) do
# Build system prompt with context
system_prompt = build_system_prompt (context)
# Extract options
model = opts[ :model ] || "openai:gpt-4o-mini"
temperature = opts[ :temperature ] || 0.7
# Call LLM
case ReqLLM . generate_text (
model,
ReqLLM . Context . new ([
ReqLLM . Context . system (system_prompt),
ReqLLM . Context . user (prompt)
]),
temperature: temperature
) do
{ :ok , response} ->
{ :ok , ReqLLM . Response . text (response)}
{ :error , reason} ->
{ :error , reason}
end
end
defp build_system_prompt ([]) do
"You are a helpful assistant."
end
defp build_system_prompt (context) do
context_text = Enum . map_join (context, " \n\n --- \n\n " , fn
%{ text: text} -> text
text when is_binary (text) -> text
end )
"""
You are a helpful assistant with access to the following reference material.
Answer questions directly and naturally, using this information to inform your responses.
Don't mention or reference the material explicitly in your answers.
Reference material:
#{ context_text }
"""
end
end
Configuration:
# config/config.exs
config :arcana , llm: MyApp . CustomLLM
# Or use directly
{ :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: MyApp . CustomLLM
)
Agentic RAG
Use LLMs with the Agent pipeline for complex workflows:
alias Arcana . Agent
llm = "openai:gpt-4o-mini"
ctx =
Agent . new ( "Compare Elixir and Erlang features" , repo: MyApp . Repo , llm: llm)
|> Agent . gate () # Skip retrieval if not needed
|> Agent . rewrite () # Clean up conversational input
|> Agent . select ( collections: [ "elixir" , "erlang" ]) # Choose collections
|> Agent . expand () # Add synonyms
|> Agent . decompose () # Split complex questions
|> Agent . search () # Execute search
|> Agent . reason () # Multi-hop reasoning
|> Agent . rerank ( threshold: 7 ) # Filter low-quality chunks
|> Agent . answer () # Generate answer
ctx.answer
# => "Generated answer based on retrieved context..."
Pipeline Steps with LLM
Each Agent step uses the LLM:
Step LLM Purpose gate/2Decide if retrieval is needed rewrite/2Clean up conversational queries select/2Choose relevant collections expand/2Add synonyms and related terms decompose/2Split into sub-questions reason/2Evaluate if more search needed rerank/2Score chunk relevance (0-10) answer/2Generate final answer
Custom Prompts
Override default prompts for any step:
ctx
|> Agent . expand (
prompt: fn question ->
"Generate 5 synonyms for key terms in: #{ question } "
end
)
|> Agent . decompose (
prompt: fn question ->
"Break this into 2-3 focused sub-questions: #{ question } "
end
)
|> Agent . answer (
prompt: fn question, chunks ->
context = Enum . map_join (chunks, " \n\n " , & &1 .text)
"""
Context: #{ context }
Question: #{ question }
Provide a detailed answer with examples.
"""
end
)
Streaming Responses
Stream LLM responses for better UX in LiveView:
defmodule MyAppWeb . ChatLive do
use MyAppWeb , :live_view
def handle_event ( "ask" , %{ "question" => question}, socket) do
# Get context from Arcana
{ :ok , context} = Arcana . search (question, repo: MyApp . Repo , limit: 5 )
# Stream response
send ( self (), { :stream_answer , question, context})
{ :noreply , assign (socket, streaming: true , answer: "" )}
end
def handle_info ({ :stream_answer , question, context}, socket) do
live_view_pid = self ()
Task . start ( fn ->
context_text = Enum . map_join (context, " \n\n " , & &1 .text)
llm_context =
ReqLLM . Context . new ([
ReqLLM . Context . system ( """
Answer based on this context:
#{ context_text }
""" ),
ReqLLM . Context . user (question)
])
{ :ok , response} = ReqLLM . stream_text ( "openai:gpt-4o-mini" , llm_context)
response
|> ReqLLM . StreamResponse . tokens ()
|> Stream . each ( fn chunk ->
send (live_view_pid, { :chunk , chunk})
end )
|> Stream . run ()
send (live_view_pid, :stream_done )
end )
{ :noreply , socket}
end
def handle_info ({ :chunk , content}, socket) do
{ :noreply , update (socket, :answer , & ( &1 <> content))}
end
def handle_info ( :stream_done , socket) do
{ :noreply , assign (socket, streaming: false )}
end
end
Custom RAG Module
Wrap Arcana for app-specific RAG:
defmodule MyApp . RAG do
@default_model "openai:gpt-4o-mini"
@default_limit 5
def ask (question, opts \\ []) do
repo = Keyword . get (opts, :repo , MyApp . Repo )
model = Keyword . get (opts, :model , @default_model )
limit = Keyword . get (opts, :limit , @default_limit )
collection = Keyword . get (opts, :collection )
search_opts = [
repo: repo,
llm: model,
limit: limit,
mode: :hybrid
]
search_opts =
if collection,
do: Keyword . put (search_opts, :collection , collection),
else: search_opts
Arcana . ask (question, search_opts)
end
def ask_with_agent (question, opts \\ []) do
repo = Keyword . get (opts, :repo , MyApp . Repo )
model = Keyword . get (opts, :model , @default_model )
collections = Keyword . get (opts, :collections )
ctx =
Arcana . Agent . new (question, repo: repo, llm: model)
|> maybe_select (collections)
|> Arcana . Agent . expand ()
|> Arcana . Agent . search ()
|> Arcana . Agent . rerank ( threshold: 7 )
|> Arcana . Agent . answer ()
{ :ok , ctx.answer}
end
defp maybe_select (ctx, nil ), do: ctx
defp maybe_select (ctx, collections) do
Arcana . Agent . select (ctx, collections: collections)
end
end
Usage:
# Simple ask
{ :ok , answer} = MyApp . RAG . ask ( "What is Elixir?" )
# With options
{ :ok , answer} = MyApp . RAG . ask (
"What is Elixir?" ,
collection: "docs" ,
model: "anthropic:claude-sonnet-4-20250514"
)
# Agent pipeline
{ :ok , answer} = MyApp . RAG . ask_with_agent (
"Compare Elixir and Erlang" ,
collections: [ "elixir" , "erlang" ]
)
Cost Tracking
Monitor LLM costs via telemetry:
defmodule MyApp . LLMLogger do
require Logger
def setup do
# Track Arcana LLM calls
:telemetry . attach (
"arcana-llm-logger" ,
[ :arcana , :llm , :complete , :stop ],
& handle_llm / 4 ,
nil
)
# Track req_llm token usage
:telemetry . attach (
"req-llm-cost-logger" ,
[ :req_llm , :token_usage ],
& handle_cost / 4 ,
nil
)
end
def handle_llm ([ :arcana , :llm , :complete , :stop ], measurements, metadata, _ ) do
duration_ms = System . convert_time_unit (measurements.duration, :native , :millisecond )
Logger . info ( """
LLM Call:
Model: #{ metadata.model }
Prompt: #{ String . slice (metadata.prompt, 0 .. 100 ) } ...
Duration: #{ duration_ms } ms
Success: #{ metadata.success }
""" )
end
def handle_cost ([ :req_llm , :token_usage ], measurements, metadata, _ ) do
Logger . info ( """
Token Usage:
Model: #{ metadata.model }
Input: #{ measurements.input_tokens } tokens
Output: #{ measurements.output_tokens } tokens
Cost: $ #{ Float . round (measurements.total_cost, 4 ) }
""" )
end
end
# In application.ex
MyApp . LLMLogger . setup ()
Best Practices
Use gpt-4o-mini for development - Fast and cheap ($0.15/1M tokens)
Upgrade to Claude 4.5 for production - Better quality, longer context
Set max_tokens - Prevent runaway costs
Use temperature=0.7 - Good balance of creativity and consistency
Stream responses - Better UX for chat interfaces
Monitor costs - Attach telemetry handlers
Cache common queries - LLM calls are expensive
Use hybrid search - Better context = better answers
Model Selection Guide
Use Case Recommended Model Reason Development gpt-4o-miniFast, cheap, good quality Production claude-sonnet-4-20250514Best quality, 200K context High Volume gemini-2.0-flash-expFree tier, fast Complex Reasoning gpt-4o or claude-opus-4Best reasoning capabilities Low Latency groq:llama-3.1-*Ultra-fast inference Budget gemini-flash or gpt-4o-miniLow cost
Troubleshooting
Set environment variables: export OPENAI_API_KEY = sk- ...
export ANTHROPIC_API_KEY = sk-ant- ...
Check config: System . get_env ( "OPENAI_API_KEY" ) # Should not be nil
Implement retry logic: defp call_with_retry (llm, prompt, context, retries \\ 3 ) do
case Arcana . LLM . complete (llm, prompt, context, []) do
{ :ok , response} -> { :ok , response}
{ :error , :rate_limit } when retries > 0 ->
Process . sleep ( 1000 )
call_with_retry (llm, prompt, context, retries - 1 )
{ :error , reason} -> { :error , reason}
end
end
Increase timeout: { :ok , answer} = Arcana . ask ( "question" ,
repo: MyApp . Repo ,
llm: { "openai:gpt-4o-mini" , timeout: 60_000 }
)
Next Steps
Agentic RAG Guide Build sophisticated RAG pipelines
Embeddings Configure embedding providers