JustLLMsDocumentation

JustLLMs Documentation

Complete guide to building production-ready LLM applications with intelligent routing, enterprise analytics, and multi-provider management.

Quick Navigation

Installation

Get started with JustLLMs in seconds. Choose the installation option that fits your needs.

Basic Installation
bash
pip install justllms
With PDF Export Support
bash
pip install justllms[pdf]
Full Installation (All Features)

Includes PDF export, Redis caching, and advanced analytics

bash
pip install justllms[all]

📦 Package Stats

Size:1.1MB
Lines of Code:~11K
Dependencies:Minimal

Quick Start

Get your first LLM response in under 30 seconds with automatic provider routing.

Quick Start Example
python
from justllms import JustLLM
# Initialize with your API keys
client = JustLLM({
"providers": {
"openai": {"api_key": "your-openai-key"},
"google": {"api_key": "your-google-key"},
"anthropic": {"api_key": "your-anthropic-key"}
}
})
# Simple completion - automatically routes to best provider
response = client.completion.create(
messages=[{"role": "user", "content": "Explain quantum computing"}]
)
print(response.content)
print(f"Used provider: {response.provider}")
print(f"Cost: ${response.cost:.4f}")

✅ That's it!

JustLLMs automatically chose the best provider based on cost, availability, and performance. No manual provider switching required.

Multi-Provider Support

Connect to all major LLM providers with a single, consistent interface.

OpenAI (GPT-5, GPT-4, etc.)
Google (Gemini 2.0, Gemini 1.5)
Anthropic (Claude 3.5, Claude 3)
Azure OpenAI
xAI Grok
DeepSeek
Multi-Provider Configuration
python
from justllms import JustLLM
client = JustLLM({
"providers": {
"openai": {
"api_key": "your-openai-key",
},
"anthropic": {
"api_key": "your-anthropic-key",
},
"google": {
"api_key": "your-google-key",
}
},
"default_provider": "openai", # Fallback if routing fails
"timeout": 30 # Request timeout in seconds
})

RAG Integration

Enterprise-ready document search and knowledge retrieval with support for multiple vector databases.

Setup RAG with Pinecone
python
from justllms.rag import RAGPipeline
rag = RAGPipeline({
"vector_store": "pinecone",
"pinecone_config": {
"api_key": "your-pinecone-key",
"environment": "us-east-1-aws",
"index_name": "knowledge-base"
},
"embedding_model": "text-embedding-ada-002",
"chunk_size": 1000,
"chunk_overlap": 200
})
Index Documents
python
# Index PDF documents
rag.index_documents([
"./documents/company_handbook.pdf",
"./documents/product_specs.pdf",
"./documents/faq.pdf"
])
# Index text content
rag.index_text(
content="Your company policies and procedures...",
metadata={"source": "hr_policies", "date": "2024-01-15"}
)
RAG-Enhanced Completions
python
# Ask questions with document context
response = client.completion.create(
messages=[{
"role": "user",
"content": "What is our remote work policy?"
}],
rag_enabled=True,
rag_config={
"top_k": 5, # Retrieve top 5 relevant chunks
"similarity_threshold": 0.7,
"include_sources": True
}
)
print(response.content)
print("Sources:")
for source in response.sources:
print(f"- {source.filename} (page {source.page})")

Intelligent Routing

Automatically route requests to the optimal provider based on cost, speed, or quality preferences.

Cost-Optimized Routing
python
client = JustLLM({
"providers": {...},
"routing": {
"strategy": "cost", # Route to cheapest provider
"fallback": True, # Auto-fallback on failure
"max_retries": 3, # Retry failed requests
"prefer_cached": True # Use cached responses when available
}
})
Speed-Optimized Routing
python
client = JustLLM({
"providers": {...},
"routing": {
"strategy": "speed", # Route to fastest provider
"response_time_weight": 0.8, # How much to weight response time
"availability_weight": 0.2 # How much to weight uptime
}
})
Quality-Based Routing
python
client = JustLLM({
"providers": {...},
"routing": {
"strategy": "quality", # Route based on model capabilities
"task_type": "reasoning", # Options: reasoning, creative, coding
"model_preferences": {
"reasoning": ["gpt-4", "claude-3-5-sonnet"],
"creative": ["gpt-4", "gemini-pro"],
"coding": ["gpt-4", "claude-3-5-sonnet"]
}
}
})

💰 Cost Savings

Intelligent routing typically reduces LLM costs by 40-60% while maintaining quality. The system learns from usage patterns and automatically optimizes over time.

Real-time Streaming

Stream responses in real-time for better user experience with automatic provider streaming support.

Basic Streaming
python
# Stream a response
stream = client.completion.create(
messages=[{"role": "user", "content": "Write a story about AI"}],
stream=True
)
for chunk in stream:
if chunk.content:
print(chunk.content, end="", flush=True)
if chunk.done:
print(f"\nCompleted using {chunk.provider}")
break
Advanced Streaming with Error Handling
python
from justllms.exceptions import StreamingError
try:
stream = client.completion.create(
messages=[{"role": "user", "content": "Explain machine learning"}],
stream=True,
max_tokens=500,
temperature=0.7
)
collected_content = ""
for chunk in stream:
if chunk.content:
collected_content += chunk.content
print(chunk.content, end="", flush=True)
# Access streaming metadata
if chunk.usage:
print(f"\nTokens used: {chunk.usage.total_tokens}")
print(f"Cost so far: ${chunk.cost:.4f}")
except StreamingError as e:
print(f"Streaming failed: {e}")
# Automatically falls back to non-streaming
response = client.completion.create(messages=messages)
print(response.content)

Conversation Management

Maintain context across multiple exchanges with automatic conversation history and token management.

Conversation Sessions
python
# Create a conversation session
conversation = client.conversation.create(
name="user_chat_001",
max_history=10, # Keep last 10 exchanges
max_tokens=4000 # Auto-trim when approaching limits
)
# Add messages to conversation
conversation.add_user_message("What is machine learning?")
response1 = conversation.complete()
print(f"AI: {response1.content}")
# Continue the conversation - context is maintained
conversation.add_user_message("Give me a practical example")
response2 = conversation.complete()
print(f"AI: {response2.content}")
# View conversation history
print(f"Total exchanges: {len(conversation.history)}")
print(f"Total cost: ${conversation.total_cost:.4f}")
Conversation Persistence
python
# Save conversation to file
conversation.save("./conversations/user_001.json")
# Load conversation from file
conversation = client.conversation.load("./conversations/user_001.json")
# Export conversation history
history = conversation.export(format="markdown")
with open("chat_history.md", "w") as f:
f.write(history)

Smart Caching

Reduce costs and improve response times with intelligent caching that understands semantic similarity.

Enable Caching
python
client = JustLLM({
"providers": {...},
"caching": {
"enabled": True,
"backend": "memory", # Options: memory, redis, file
"ttl": 3600, # Cache for 1 hour
"similarity_threshold": 0.85, # How similar queries must be
"max_cache_size": 1000 # Maximum cached responses
}
})
Redis Caching (Production)
python
client = JustLLM({
"providers": {...},
"caching": {
"enabled": True,
"backend": "redis",
"redis_config": {
"host": "localhost",
"port": 6379,
"password": "your_redis_password",
"db": 0
},
"ttl": 86400, # Cache for 24 hours
"similarity_threshold": 0.90
}
})

âš¡ Performance Impact

Smart caching typically improves response times by 90%+ for similar queries while reducing API costs. The system uses semantic similarity to match related questions.

Enterprise Analytics

Comprehensive usage tracking with detailed cost analysis, performance insights, and exportable reports.

Enable Analytics
python
client = JustLLM({
"providers": {...},
"analytics": {
"enabled": True,
"track_costs": True,
"track_performance": True,
"track_usage": True,
"export_format": "csv" # Options: csv, json, pdf
}
})
Generate Analytics Report
python
# Generate comprehensive usage report
report = client.analytics.generate_report(
start_date="2024-01-01",
end_date="2024-01-31",
group_by="provider" # Options: provider, model, user, date
)
print(f"Total requests: {report.total_requests}")
print(f"Total cost: ${report.total_cost:.2f}")
print(f"Average response time: {report.avg_response_time:.2f}s")
# Export to different formats
report.export("monthly_report.csv")
report.export("monthly_report.pdf") # Requires justllms[pdf]
# Get top performers
top_models = report.get_top_models_by_performance()
cost_breakdown = report.get_cost_breakdown_by_provider()
Real-time Monitoring
python
# Set up real-time monitoring
monitor = client.analytics.create_monitor(
alerts={
"high_cost": {"threshold": 100, "period": "daily"},
"slow_response": {"threshold": 5.0, "period": "hourly"},
"error_rate": {"threshold": 0.05, "period": "hourly"}
},
webhooks=["https://your-app.com/webhook"]
)
# Check current metrics
metrics = client.analytics.get_current_metrics()
print(f"Requests today: {metrics.requests_today}")
print(f"Cost today: ${metrics.cost_today:.2f}")
print(f"Average response time: {metrics.avg_response_time:.2f}s")

Business Rules & Validation

Implement content filtering, compliance rules, and response validation for enterprise safety.

Content Filtering
python
client = JustLLM({
"providers": {...},
"validation": {
"input_filters": [
{"type": "profanity", "action": "reject"},
{"type": "pii", "action": "mask"},
{"type": "custom", "pattern": r"confidential", "action": "alert"}
],
"output_filters": [
{"type": "harmful_content", "action": "reject"},
{"type": "off_topic", "threshold": 0.3, "action": "flag"}
]
}
})
Custom Validation Rules
python
from justllms.validation import ValidationRule
# Define custom validation
def financial_compliance_check(content):
prohibited_terms = ["insider trading", "financial advice"]
for term in prohibited_terms:
if term.lower() in content.lower():
return {"valid": False, "reason": f"Contains prohibited term: {term}"}
return {"valid": True}
# Register custom validator
client.add_validation_rule(
ValidationRule(
name="financial_compliance",
validator=financial_compliance_check,
apply_to="output"
)
)
# Responses are automatically validated
response = client.completion.create(
messages=[{"role": "user", "content": "Tell me about stocks"}]
)
print(f"Validation passed: {response.validation.passed}")
print(f"Validation flags: {response.validation.flags}")