Briefcase AI SDK Documentation for LLM Context
Overview
Briefcase AI is a comprehensive observability and decision-tracking platform for AI applications. It provides two distribution paths:
- briefcase-ai (Python) - Pure Python SDK with OpenTelemetry instrumentation for lakeFS integration, compliance reporting, RAG pipelines, external data tracking, and validation
- briefcase-ai (Rust core) - High-performance Rust-powered SDK with PyO3 bindings for decision tracking, deterministic replay, storage backends, drift detection, cost analysis, and PII sanitization
Also available as:
briefcase-core(Rust crate)briefcase-wasm(npm package for JavaScript/TypeScript)briefcase-server(HTTP API)
Using llm.txt for LLM Context
This documentation is also available as a machine-readable llm.txt file optimized for LLM consumption at:
https://docs.briefcasebrain.com/llm.txt
How to Use llm.txt
For AI Assistants (Claude, ChatGPT, etc.)
When working with LLM assistants on Briefcase AI integration, reference the llm.txt file:
"Please help me integrate Briefcase AI with my OpenAI calls.
Use the context from https://docs.briefcasebrain.com/llm.txt"
For Development Tools
Many AI-powered development tools automatically discover llm.txt files:
- Cursor IDE: Automatically indexes llm.txt for better code completion
- GitHub Copilot: Can reference llm.txt context in suggestions
- Codeium: Uses llm.txt for more accurate API recommendations
For Custom LLM Applications
Fetch and use as context in your own LLM applications:
import requests
# Fetch LLM-optimized documentation
response = requests.get("https://docs.briefcasebrain.com/llm.txt")
llm_context = response.text
# Use as context for your LLM calls
system_prompt = f"""
You are a helpful assistant for Briefcase AI integration.
Context:
{llm_context}
Answer questions about integrating LLM providers with Briefcase AI.
"""
# Use with any LLM provider
completion = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": "How do I track OpenAI calls with Briefcase AI?"}
]
)
What's in llm.txt vs. This Page
- llm.txt: Optimized for LLM consumption, pattern-first approach, essential examples
- This page: Human-friendly browsing, comprehensive details, cross-references
Both contain the same accurate information, just optimized for different audiences.
Python SDK Installation
pip install briefcase-ai
# Optional extras
pip install briefcase-ai[lakefs] # lakeFS integration
pip install briefcase-ai[langchain] # LangChain callbacks
pip install briefcase-ai[vector] # Vector database integrations
Core API Reference
Basic Usage
import briefcase_ai
# Initialize the library
briefcase_ai.init()
# Create a decision snapshot
decision = briefcase_ai.DecisionSnapshot("ai_function_name")
# Add inputs
input_data = briefcase_ai.Input("user_query", "What is machine learning?", "string")
decision.add_input(input_data)
# Add model parameters
params = briefcase_ai.ModelParameters("gpt-4")
params.with_provider("openai")
params.with_parameter("temperature", 0.7)
params.with_parameter("max_tokens", 150)
decision.with_model_parameters(params)
# Add outputs with confidence
output_data = briefcase_ai.Output("ai_response", "Machine learning is...", "string")
output_data.with_confidence(0.95)
decision.add_output(output_data)
# Add metadata
decision.with_execution_time(125.5) # milliseconds
decision.with_module("chat_service")
decision.add_tag("environment", "production")
decision.add_tag("user_id", "user_123")
# Save to storage
storage = briefcase_ai.SqliteBackend.in_memory()
decision_id = storage.save_decision(decision)
Storage Backends
# In-memory storage (for testing)
storage = briefcase_ai.SqliteBackend.in_memory()
# Persistent SQLite
storage = briefcase_ai.SqliteBackend.new("decisions.db")
# lakeFS integration
from briefcase.integrations.lakefs import VersionedClient
lakefs_client = VersionedClient(
repository="my-repo",
branch="main",
briefcase_client=storage
)
Drift Detection
calculator = briefcase_ai.DriftCalculator()
# Analyze model outputs over time
outputs = ["positive", "positive", "negative", "positive", "negative"]
metrics = calculator.calculate_drift(outputs)
print(f"Consistency Score: {metrics.consistency_score}")
print(f"Agreement Rate: {metrics.agreement_rate}")
print(f"Status: {calculator.get_status(metrics)}")
print(f"Consensus: {metrics.consensus_output}")
print(f"Outliers: {len(metrics.outliers)}")
# Custom threshold
strict_calculator = briefcase_ai.DriftCalculator.with_threshold(0.95)
Cost Calculation
calculator = briefcase_ai.CostCalculator()
# Estimate costs for different models
estimate = calculator.estimate_cost("gpt-4", input_tokens=1000, output_tokens=500)
print(f"Total cost: ${estimate.total_cost:.4f}")
print(f"Input cost: ${estimate.input_cost:.4f}")
print(f"Output cost: ${estimate.output_cost:.4f}")
# Budget monitoring
status = calculator.check_budget(spent=85.0, budget=100.0)
print(f"Status: {status.status}") # ok, warning, critical, exceeded
print(f"Percent used: {status.percent_used}%")
# Monthly projection
projection = calculator.project_monthly_cost("gpt-4", 5000, 2000, 30.0)
print(f"Daily: ${projection.daily_cost:.2f}")
print(f"Monthly: ${projection.monthly_cost:.2f}")
Data Sanitization
sanitizer = briefcase_ai.Sanitizer()
# Text sanitization
text = "Contact me at john.doe@company.com or call 555-123-4567"
result = sanitizer.sanitize(text)
print(f"Sanitized: {result.sanitized}")
print(f"Redactions: {len(result.redactions)}")
# JSON sanitization
sensitive_data = {
"api_key": "sk-abc123",
"user_email": "user@example.com",
"public_data": "safe content"
}
json_result = sanitizer.sanitize_json(sensitive_data)
# Custom patterns
sanitizer.add_pattern("employee_id", r"\bEMP-\d{6}\b")
Replay Engine
engine = briefcase_ai.ReplayEngine(storage)
# Replay a decision
result = engine.replay(decision_id, "strict") # strict, tolerant, validation_only
print(f"Original output: {result.original_output}")
print(f"Replay output: {result.replay_output}")
print(f"Match: {result.outputs_match}")
JavaScript/TypeScript SDK (WASM)
Installation
npm install briefcase-wasm
Usage
import { init, JsDecisionSnapshot, JsInput, JsOutput, JsMemoryStorage } from 'briefcase-wasm';
// Initialize WASM module
await init();
// Create decision snapshot
const decision = new JsDecisionSnapshot("chat_completion");
// Add input and output
const input = new JsInput("prompt", "Hello world", "string");
const output = new JsOutput("response", "Hello back!", "string");
decision.addInput(input);
decision.addOutput(output);
// Save to storage
const storage = new JsMemoryStorage();
const decisionId = storage.saveDecision(decision);
TypeScript Support
import type { JsDecisionSnapshot, JsInput, JsOutput } from 'briefcase-wasm';
interface ChatCompletionParams {
prompt: string;
model: string;
temperature: number;
}
async function trackChatCompletion(params: ChatCompletionParams, response: string) {
const decision = new JsDecisionSnapshot("chat_completion");
decision.addInput(new JsInput("prompt", params.prompt, "string"));
decision.addInput(new JsInput("model", params.model, "string"));
decision.addInput(new JsInput("temperature", params.temperature.toString(), "float"));
decision.addOutput(new JsOutput("response", response, "string"));
// Store decision
const storage = new JsMemoryStorage();
return storage.saveDecision(decision);
}
Framework Integrations
LangChain Handler
from briefcase.integrations.frameworks.langchain_handler import BriefcaseLangChainHandler
# Create handler
handler = BriefcaseLangChainHandler(
engagement_id="my-project",
workstream_id="chat-bot",
capture_llm=True,
capture_chains=True,
capture_tools=True
)
# Use with any LangChain component
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
llm = OpenAI(callbacks=[handler])
prompt = PromptTemplate.from_template("Tell me about {topic}")
chain = LLMChain(llm=llm, prompt=prompt)
result = chain.invoke({"topic": "AI"})
# Retrieve captured decisions
decisions = handler.get_decisions()
for decision in decisions:
print(f"Function: {decision.function_name}")
print(f"Type: {decision.decision_type}")
print(f"Inputs: {decision.inputs}")
print(f"Outputs: {decision.outputs}")
LlamaIndex Handler
from briefcase.integrations.frameworks.llamaindex_handler import BriefcaseLlamaIndexHandler
handler = BriefcaseLlamaIndexHandler(
engagement_id="my-project",
workstream_id="rag-system"
)
# Use with LlamaIndex
from llama_index.core import Settings
Settings.callback_manager.add_handler(handler)
LLM Integration Patterns
OpenAI Integration
import openai
import briefcase_ai
briefcase_ai.init()
client = openai.OpenAI()
def track_openai_completion(prompt: str, model: str = "gpt-4") -> tuple[str, str]:
# Create decision snapshot
decision = briefcase_ai.DecisionSnapshot("openai_completion")
decision.add_input(briefcase_ai.Input("prompt", prompt, "string"))
decision.add_input(briefcase_ai.Input("model", model, "string"))
# Record start time
import time
start_time = time.time()
try:
# Make OpenAI API call
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}]
)
# Extract response
result = response.choices[0].message.content
execution_time = (time.time() - start_time) * 1000
# Add model parameters
params = briefcase_ai.ModelParameters(model)
params.with_provider("openai")
decision.with_model_parameters(params)
# Add output and timing
decision.add_output(briefcase_ai.Output("response", result, "string"))
decision.with_execution_time(execution_time)
decision.add_tag("provider", "openai")
decision.add_tag("status", "success")
# Add token usage if available
if hasattr(response, 'usage'):
decision.add_tag("prompt_tokens", str(response.usage.prompt_tokens))
decision.add_tag("completion_tokens", str(response.usage.completion_tokens))
decision.add_tag("total_tokens", str(response.usage.total_tokens))
except Exception as e:
execution_time = (time.time() - start_time) * 1000
decision.with_execution_time(execution_time)
decision.add_tag("provider", "openai")
decision.add_tag("status", "error")
decision.add_tag("error", str(e))
raise
finally:
# Save decision regardless of success/failure
storage = briefcase_ai.SqliteBackend.in_memory()
decision_id = storage.save_decision(decision)
return result, decision_id
# Usage
response, decision_id = track_openai_completion("Explain quantum computing", "gpt-4")
print(f"Response: {response}")
print(f"Decision ID: {decision_id}")
Anthropic Claude Integration
import anthropic
import briefcase_ai
def track_claude_completion(prompt: str, model: str = "claude-3-sonnet-20240229") -> tuple[str, str]:
briefcase_ai.init()
client = anthropic.Anthropic()
decision = briefcase_ai.DecisionSnapshot("claude_completion")
decision.add_input(briefcase_ai.Input("prompt", prompt, "string"))
decision.add_input(briefcase_ai.Input("model", model, "string"))
start_time = time.time()
try:
response = client.messages.create(
model=model,
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
result = response.content[0].text
execution_time = (time.time() - start_time) * 1000
params = briefcase_ai.ModelParameters(model)
params.with_provider("anthropic")
decision.with_model_parameters(params)
decision.add_output(briefcase_ai.Output("response", result, "string"))
decision.with_execution_time(execution_time)
decision.add_tag("provider", "anthropic")
decision.add_tag("status", "success")
if hasattr(response, 'usage'):
decision.add_tag("input_tokens", str(response.usage.input_tokens))
decision.add_tag("output_tokens", str(response.usage.output_tokens))
except Exception as e:
execution_time = (time.time() - start_time) * 1000
decision.with_execution_time(execution_time)
decision.add_tag("provider", "anthropic")
decision.add_tag("status", "error")
decision.add_tag("error", str(e))
raise
finally:
storage = briefcase_ai.SqliteBackend.in_memory()
decision_id = storage.save_decision(decision)
return result, decision_id
Generic LLM Wrapper
from typing import Callable, Dict, Any
import briefcase_ai
class LLMTracker:
def __init__(self, storage_backend=None):
briefcase_ai.init()
self.storage = storage_backend or briefcase_ai.SqliteBackend.in_memory()
def track_llm_call(
self,
function_name: str,
llm_function: Callable,
inputs: Dict[str, Any],
provider: str,
model: str,
**kwargs
) -> tuple[Any, str]:
"""Generic wrapper for any LLM function call."""
decision = briefcase_ai.DecisionSnapshot(function_name)
# Add inputs
for key, value in inputs.items():
input_type = "string" if isinstance(value, str) else "json"
decision.add_input(briefcase_ai.Input(key, str(value), input_type))
# Add model info
params = briefcase_ai.ModelParameters(model)
params.with_provider(provider)
decision.with_model_parameters(params)
start_time = time.time()
try:
# Execute the LLM function
result = llm_function(**inputs, **kwargs)
execution_time = (time.time() - start_time) * 1000
# Add output
result_type = "string" if isinstance(result, str) else "json"
decision.add_output(briefcase_ai.Output("response", str(result), result_type))
decision.with_execution_time(execution_time)
decision.add_tag("provider", provider)
decision.add_tag("status", "success")
except Exception as e:
execution_time = (time.time() - start_time) * 1000
decision.with_execution_time(execution_time)
decision.add_tag("provider", provider)
decision.add_tag("status", "error")
decision.add_tag("error", str(e))
raise
finally:
decision_id = self.storage.save_decision(decision)
return result, decision_id
# Usage example
tracker = LLMTracker()
def my_openai_call(prompt: str, model: str = "gpt-4"):
import openai
client = openai.OpenAI()
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
result, decision_id = tracker.track_llm_call(
function_name="chat_completion",
llm_function=my_openai_call,
inputs={"prompt": "Hello world"},
provider="openai",
model="gpt-4"
)
Server API Endpoints
Base URL
https://api.briefcasebrain.com
Health Check
GET /health
Response:
{
"status": "healthy",
"storage_healthy": true,
"version": "2.1.30",
"uptime_secs": 3600
}
Create Decision
POST /api/v1/decisions
Content-Type: application/json
{
"function_name": "evaluate_claim",
"module_name": "claims",
"inputs": [{"value": "claim_data", "type": "json"}],
"outputs": [{"value": "approved", "type": "string", "confidence": 0.95}],
"model_params": {
"name": "gpt-4",
"version": "0613",
"provider": "openai",
"params": {"temperature": 0.1}
},
"execution_time_ms": 1250,
"tags": {"environment": "production"}
}
List Decisions
GET /api/v1/decisions?function_name=evaluate_claim&limit=50&offset=0
Replay Decision
POST /api/v1/replay/{decision_id}
Content-Type: application/json
{
"mode": "strict",
"context_overrides": {}
}
Compare Decisions
POST /api/v1/diff
Content-Type: application/json
{
"original_id": "dec_abc123",
"new_id": "dec_def456"
}
Compliance and Data Management
lakeFS Integration
from briefcase.integrations.lakefs import lakefs_context, VersionedClient
# Using context manager
with lakefs_context(client, "acme-healthcare", "main") as lakefs:
policy_content = lakefs.read_object("policies/policy.pdf")
# Process with AI
decision = briefcase_ai.DecisionSnapshot("policy_review")
decision.add_input(briefcase_ai.Input("policy", policy_content, "string"))
# ... AI processing ...
# Context automatically tracks lakeFS commit SHA
# Direct client usage
versioned_client = VersionedClient(
repository="acme-healthcare",
branch="main",
briefcase_client=storage
)
Compliance Reporting
from briefcase.compliance.reports import SOC2ReportGenerator
generator = SOC2ReportGenerator(client)
report = generator.evaluate(
tenant="acme",
branch="main",
start_date="2024-01-01",
end_date="2024-12-31"
)
print(f"Compliance score: {report.overall_score}")
print(f"Controls evaluated: {len(report.controls)}")
Multi-Agent Workflows
from briefcase.correlation import briefcase_workflow
with briefcase_workflow("prior_auth", client) as workflow:
# All decisions within this context are correlated
# Agent 1: Extract claim data
claim_decision = briefcase_ai.DecisionSnapshot("extract_claim_data")
# ... process claim ...
# Agent 2: Validate against policy
policy_decision = briefcase_ai.DecisionSnapshot("validate_policy")
# ... validate claim ...
# Agent 3: Make final decision
final_decision = briefcase_ai.DecisionSnapshot("make_decision")
# ... make decision ...
# All decisions share correlation ID for tracing
Best Practices
1. Always Track Decisions
# ✅ Good - Track every LLM call
def generate_response(prompt: str) -> str:
decision = briefcase_ai.DecisionSnapshot("generate_response")
decision.add_input(briefcase_ai.Input("prompt", prompt, "string"))
# ... AI call ...
decision.add_output(briefcase_ai.Output("response", result, "string"))
storage.save_decision(decision)
return result
# ❌ Bad - No tracking
def generate_response(prompt: str) -> str:
return openai_client.complete(prompt)
2. Handle Errors Gracefully
def robust_llm_call(prompt: str) -> str:
decision = briefcase_ai.DecisionSnapshot("robust_llm_call")
decision.add_input(briefcase_ai.Input("prompt", prompt, "string"))
try:
result = make_llm_call(prompt)
decision.add_output(briefcase_ai.Output("response", result, "string"))
decision.add_tag("status", "success")
return result
except Exception as e:
decision.add_tag("status", "error")
decision.add_tag("error", str(e))
raise
finally:
storage.save_decision(decision)
3. Monitor Costs
def cost_aware_completion(prompt: str, max_cost: float = 0.10) -> str:
calculator = briefcase_ai.CostCalculator()
# Estimate cost before making call
estimated_tokens = estimate_tokens(prompt)
estimate = calculator.estimate_cost("gpt-4", estimated_tokens, 150)
if estimate.total_cost > max_cost:
raise ValueError(f"Estimated cost ${estimate.total_cost:.4f} exceeds limit")
# Make tracked call
result, decision_id = track_openai_completion(prompt, "gpt-4")
return result
4. Sanitize Sensitive Data
def safe_llm_call(user_input: str) -> str:
sanitizer = briefcase_ai.Sanitizer()
# Sanitize input before processing
sanitized_input = sanitizer.sanitize(user_input)
safe_prompt = sanitized_input.sanitized
# Make LLM call with sanitized data
decision = briefcase_ai.DecisionSnapshot("safe_completion")
decision.add_input(briefcase_ai.Input("prompt", safe_prompt, "string"))
decision.add_tag("pii_redacted", str(len(sanitized_input.redactions)))
result = make_llm_call(safe_prompt)
decision.add_output(briefcase_ai.Output("response", result, "string"))
storage.save_decision(decision)
return result
5. Use Deterministic Replay for Testing
def test_model_consistency():
# Capture baseline decisions
baseline_decisions = []
for test_case in test_cases:
result, decision_id = track_llm_call(test_case.prompt)
baseline_decisions.append(decision_id)
# After model update, replay decisions
replay_engine = briefcase_ai.ReplayEngine(storage)
for decision_id in baseline_decisions:
replay_result = replay_engine.replay(decision_id, "strict")
if not replay_result.outputs_match:
print(f"Decision {decision_id} outputs changed!")
print(f"Original: {replay_result.original_output}")
print(f"Replay: {replay_result.replay_output}")
This documentation provides comprehensive guidance for integrating LLMs with the Briefcase AI observability platform. Use it as context when building AI applications that require robust tracking, compliance, and monitoring capabilities.