Complete guide to building production-ready LLM applications with intelligent routing, enterprise analytics, and multi-provider management.
Get started with JustLLMs in seconds. Choose the installation option that fits your needs.
pip install justllms
pip install justllms[pdf]
Includes PDF export, Redis caching, and advanced analytics
pip install justllms[all]
Get your first LLM response in under 30 seconds with automatic provider routing.
from justllms import JustLLM # Initialize with your API keysclient = JustLLM({ "providers": { "openai": {"api_key": "your-openai-key"}, "google": {"api_key": "your-google-key"}, "anthropic": {"api_key": "your-anthropic-key"} }}) # Simple completion - automatically routes to best providerresponse = client.completion.create( messages=[{"role": "user", "content": "Explain quantum computing"}]) print(response.content)print(f"Used provider: {response.provider}")print(f"Cost: ${response.cost:.4f}")
JustLLMs automatically chose the best provider based on cost, availability, and performance. No manual provider switching required.
Connect to all major LLM providers with a single, consistent interface.
from justllms import JustLLM client = JustLLM({ "providers": { "openai": { "api_key": "your-openai-key", }, "anthropic": { "api_key": "your-anthropic-key", }, "google": { "api_key": "your-google-key", } }, "default_provider": "openai", # Fallback if routing fails "timeout": 30 # Request timeout in seconds})
Enterprise-ready document search and knowledge retrieval with support for multiple vector databases.
from justllms.rag import RAGPipeline rag = RAGPipeline({ "vector_store": "pinecone", "pinecone_config": { "api_key": "your-pinecone-key", "environment": "us-east-1-aws", "index_name": "knowledge-base" }, "embedding_model": "text-embedding-ada-002", "chunk_size": 1000, "chunk_overlap": 200})
# Index PDF documentsrag.index_documents([ "./documents/company_handbook.pdf", "./documents/product_specs.pdf", "./documents/faq.pdf"]) # Index text contentrag.index_text( content="Your company policies and procedures...", metadata={"source": "hr_policies", "date": "2024-01-15"})
# Ask questions with document contextresponse = client.completion.create( messages=[{ "role": "user", "content": "What is our remote work policy?" }], rag_enabled=True, rag_config={ "top_k": 5, # Retrieve top 5 relevant chunks "similarity_threshold": 0.7, "include_sources": True }) print(response.content)print("Sources:")for source in response.sources: print(f"- {source.filename} (page {source.page})")
Automatically route requests to the optimal provider based on cost, speed, or quality preferences.
client = JustLLM({ "providers": {...}, "routing": { "strategy": "cost", # Route to cheapest provider "fallback": True, # Auto-fallback on failure "max_retries": 3, # Retry failed requests "prefer_cached": True # Use cached responses when available }})
client = JustLLM({ "providers": {...}, "routing": { "strategy": "speed", # Route to fastest provider "response_time_weight": 0.8, # How much to weight response time "availability_weight": 0.2 # How much to weight uptime }})
client = JustLLM({ "providers": {...}, "routing": { "strategy": "quality", # Route based on model capabilities "task_type": "reasoning", # Options: reasoning, creative, coding "model_preferences": { "reasoning": ["gpt-4", "claude-3-5-sonnet"], "creative": ["gpt-4", "gemini-pro"], "coding": ["gpt-4", "claude-3-5-sonnet"] } }})
Intelligent routing typically reduces LLM costs by 40-60% while maintaining quality. The system learns from usage patterns and automatically optimizes over time.
Stream responses in real-time for better user experience with automatic provider streaming support.
# Stream a responsestream = client.completion.create( messages=[{"role": "user", "content": "Write a story about AI"}], stream=True) for chunk in stream: if chunk.content: print(chunk.content, end="", flush=True) if chunk.done: print(f"\nCompleted using {chunk.provider}") break
from justllms.exceptions import StreamingError try: stream = client.completion.create( messages=[{"role": "user", "content": "Explain machine learning"}], stream=True, max_tokens=500, temperature=0.7 ) collected_content = "" for chunk in stream: if chunk.content: collected_content += chunk.content print(chunk.content, end="", flush=True) # Access streaming metadata if chunk.usage: print(f"\nTokens used: {chunk.usage.total_tokens}") print(f"Cost so far: ${chunk.cost:.4f}") except StreamingError as e: print(f"Streaming failed: {e}") # Automatically falls back to non-streaming response = client.completion.create(messages=messages) print(response.content)
Maintain context across multiple exchanges with automatic conversation history and token management.
# Create a conversation sessionconversation = client.conversation.create( name="user_chat_001", max_history=10, # Keep last 10 exchanges max_tokens=4000 # Auto-trim when approaching limits) # Add messages to conversationconversation.add_user_message("What is machine learning?")response1 = conversation.complete()print(f"AI: {response1.content}") # Continue the conversation - context is maintainedconversation.add_user_message("Give me a practical example")response2 = conversation.complete()print(f"AI: {response2.content}") # View conversation historyprint(f"Total exchanges: {len(conversation.history)}")print(f"Total cost: ${conversation.total_cost:.4f}")
# Save conversation to fileconversation.save("./conversations/user_001.json") # Load conversation from file conversation = client.conversation.load("./conversations/user_001.json") # Export conversation historyhistory = conversation.export(format="markdown")with open("chat_history.md", "w") as f: f.write(history)
Reduce costs and improve response times with intelligent caching that understands semantic similarity.
client = JustLLM({ "providers": {...}, "caching": { "enabled": True, "backend": "memory", # Options: memory, redis, file "ttl": 3600, # Cache for 1 hour "similarity_threshold": 0.85, # How similar queries must be "max_cache_size": 1000 # Maximum cached responses }})
client = JustLLM({ "providers": {...}, "caching": { "enabled": True, "backend": "redis", "redis_config": { "host": "localhost", "port": 6379, "password": "your_redis_password", "db": 0 }, "ttl": 86400, # Cache for 24 hours "similarity_threshold": 0.90 }})
Smart caching typically improves response times by 90%+ for similar queries while reducing API costs. The system uses semantic similarity to match related questions.
Comprehensive usage tracking with detailed cost analysis, performance insights, and exportable reports.
client = JustLLM({ "providers": {...}, "analytics": { "enabled": True, "track_costs": True, "track_performance": True, "track_usage": True, "export_format": "csv" # Options: csv, json, pdf }})
# Generate comprehensive usage reportreport = client.analytics.generate_report( start_date="2024-01-01", end_date="2024-01-31", group_by="provider" # Options: provider, model, user, date) print(f"Total requests: {report.total_requests}")print(f"Total cost: ${report.total_cost:.2f}")print(f"Average response time: {report.avg_response_time:.2f}s") # Export to different formatsreport.export("monthly_report.csv")report.export("monthly_report.pdf") # Requires justllms[pdf] # Get top performerstop_models = report.get_top_models_by_performance()cost_breakdown = report.get_cost_breakdown_by_provider()
# Set up real-time monitoringmonitor = client.analytics.create_monitor( alerts={ "high_cost": {"threshold": 100, "period": "daily"}, "slow_response": {"threshold": 5.0, "period": "hourly"}, "error_rate": {"threshold": 0.05, "period": "hourly"} }, webhooks=["https://your-app.com/webhook"]) # Check current metricsmetrics = client.analytics.get_current_metrics()print(f"Requests today: {metrics.requests_today}")print(f"Cost today: ${metrics.cost_today:.2f}")print(f"Average response time: {metrics.avg_response_time:.2f}s")
Implement content filtering, compliance rules, and response validation for enterprise safety.
client = JustLLM({ "providers": {...}, "validation": { "input_filters": [ {"type": "profanity", "action": "reject"}, {"type": "pii", "action": "mask"}, {"type": "custom", "pattern": r"confidential", "action": "alert"} ], "output_filters": [ {"type": "harmful_content", "action": "reject"}, {"type": "off_topic", "threshold": 0.3, "action": "flag"} ] }})
from justllms.validation import ValidationRule # Define custom validationdef financial_compliance_check(content): prohibited_terms = ["insider trading", "financial advice"] for term in prohibited_terms: if term.lower() in content.lower(): return {"valid": False, "reason": f"Contains prohibited term: {term}"} return {"valid": True} # Register custom validatorclient.add_validation_rule( ValidationRule( name="financial_compliance", validator=financial_compliance_check, apply_to="output" )) # Responses are automatically validatedresponse = client.completion.create( messages=[{"role": "user", "content": "Tell me about stocks"}])print(f"Validation passed: {response.validation.passed}")print(f"Validation flags: {response.validation.flags}")