# Briefcase AI SDK for LLMs Briefcase AI provides observability and decision-tracking for AI applications. Two main distributions: pure Python SDK and high-performance Rust core with Python bindings. ## Quick Start ```python import briefcase_ai briefcase_ai.init() # Track any LLM call decision = briefcase_ai.DecisionSnapshot("chat_completion") decision.add_input(briefcase_ai.Input("prompt", "Hello world", "string")) decision.add_output(briefcase_ai.Output("response", "Hello back!", "string")) storage = briefcase_ai.SqliteBackend.in_memory() decision_id = storage.save_decision(decision) ``` ## Installation ```bash pip install briefcase-ai ``` ## Core Patterns ### OpenAI Integration ```python import openai import briefcase_ai import time briefcase_ai.init() client = openai.OpenAI() def track_openai(prompt: str, model: str = "gpt-4") -> tuple[str, str]: decision = briefcase_ai.DecisionSnapshot("openai_completion") decision.add_input(briefcase_ai.Input("prompt", prompt, "string")) decision.add_input(briefcase_ai.Input("model", model, "string")) start_time = time.time() try: response = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}] ) result = response.choices[0].message.content execution_time = (time.time() - start_time) * 1000 # Add model parameters params = briefcase_ai.ModelParameters(model) params.with_provider("openai") decision.with_model_parameters(params) # Add output and metadata decision.add_output(briefcase_ai.Output("response", result, "string")) decision.with_execution_time(execution_time) decision.add_tag("provider", "openai") decision.add_tag("status", "success") if hasattr(response, 'usage'): decision.add_tag("total_tokens", str(response.usage.total_tokens)) except Exception as e: execution_time = (time.time() - start_time) * 1000 decision.with_execution_time(execution_time) decision.add_tag("status", "error") decision.add_tag("error", str(e)) raise finally: storage = briefcase_ai.SqliteBackend.in_memory() decision_id = storage.save_decision(decision) return result, decision_id ``` ### Anthropic Claude Integration ```python import anthropic import briefcase_ai import time def track_claude(prompt: str, model: str = "claude-3-sonnet-20240229") -> tuple[str, str]: briefcase_ai.init() client = anthropic.Anthropic() decision = briefcase_ai.DecisionSnapshot("claude_completion") decision.add_input(briefcase_ai.Input("prompt", prompt, "string")) decision.add_input(briefcase_ai.Input("model", model, "string")) start_time = time.time() try: response = client.messages.create( model=model, max_tokens=1024, messages=[{"role": "user", "content": prompt}] ) result = response.content[0].text execution_time = (time.time() - start_time) * 1000 params = briefcase_ai.ModelParameters(model) params.with_provider("anthropic") decision.with_model_parameters(params) decision.add_output(briefcase_ai.Output("response", result, "string")) decision.with_execution_time(execution_time) decision.add_tag("provider", "anthropic") decision.add_tag("status", "success") except Exception as e: execution_time = (time.time() - start_time) * 1000 decision.with_execution_time(execution_time) decision.add_tag("status", "error") decision.add_tag("error", str(e)) raise finally: storage = briefcase_ai.SqliteBackend.in_memory() decision_id = storage.save_decision(decision) return result, decision_id ``` ### Generic LLM Wrapper ```python from typing import Callable, Dict, Any import briefcase_ai import time class LLMTracker: def __init__(self): briefcase_ai.init() self.storage = briefcase_ai.SqliteBackend.in_memory() def track(self, function_name: str, llm_function: Callable, inputs: Dict[str, Any], provider: str, model: str, **kwargs) -> tuple[Any, str]: decision = briefcase_ai.DecisionSnapshot(function_name) for key, value in inputs.items(): input_type = "string" if isinstance(value, str) else "json" decision.add_input(briefcase_ai.Input(key, str(value), input_type)) params = briefcase_ai.ModelParameters(model) params.with_provider(provider) decision.with_model_parameters(params) start_time = time.time() try: result = llm_function(**inputs, **kwargs) execution_time = (time.time() - start_time) * 1000 result_type = "string" if isinstance(result, str) else "json" decision.add_output(briefcase_ai.Output("response", str(result), result_type)) decision.with_execution_time(execution_time) decision.add_tag("provider", provider) decision.add_tag("status", "success") except Exception as e: execution_time = (time.time() - start_time) * 1000 decision.with_execution_time(execution_time) decision.add_tag("status", "error") decision.add_tag("error", str(e)) raise finally: decision_id = self.storage.save_decision(decision) return result, decision_id # Usage tracker = LLMTracker() def my_llm_call(prompt: str): # Your LLM call here return "LLM response" result, decision_id = tracker.track("my_function", my_llm_call, {"prompt": "Hello"}, "custom", "my-model") ``` ## Core API ### DecisionSnapshot ```python # Create decision = briefcase_ai.DecisionSnapshot("function_name") # Add inputs/outputs decision.add_input(briefcase_ai.Input("key", "value", "string")) decision.add_output(briefcase_ai.Output("key", "value", "string").with_confidence(0.95)) # Add model info params = briefcase_ai.ModelParameters("gpt-4") params.with_provider("openai") params.with_parameter("temperature", 0.7) decision.with_model_parameters(params) # Add metadata decision.with_execution_time(125.5) # milliseconds decision.with_module("my_module") decision.add_tag("key", "value") ``` ### Storage Backends ```python # In-memory (testing) storage = briefcase_ai.SqliteBackend.in_memory() # Persistent SQLite storage = briefcase_ai.SqliteBackend.new("decisions.db") # Save/load decision_id = storage.save_decision(decision) loaded = storage.load_decision(decision_id) ``` ### Cost Tracking ```python calculator = briefcase_ai.CostCalculator() # Estimate costs estimate = calculator.estimate_cost("gpt-4", input_tokens=1000, output_tokens=500) print(f"Total cost: ${estimate.total_cost:.4f}") # Budget monitoring status = calculator.check_budget(spent=85.0, budget=100.0) print(f"Status: {status.status}") # ok, warning, critical, exceeded ``` ### Drift Detection ```python calculator = briefcase_ai.DriftCalculator() # Analyze outputs over time outputs = ["positive", "positive", "negative", "positive", "negative"] metrics = calculator.calculate_drift(outputs) print(f"Consistency Score: {metrics.consistency_score}") print(f"Agreement Rate: {metrics.agreement_rate}") print(f"Status: {calculator.get_status(metrics)}") ``` ### Data Sanitization ```python sanitizer = briefcase_ai.Sanitizer() # Sanitize text text = "Contact john.doe@company.com or call 555-123-4567" result = sanitizer.sanitize(text) print(f"Sanitized: {result.sanitized}") print(f"Redactions: {len(result.redactions)}") # Custom patterns sanitizer.add_pattern("employee_id", r"\bEMP-\d{6}\b") ``` ## Framework Integrations ### LangChain ```python from briefcase.integrations.frameworks.langchain_handler import BriefcaseLangChainHandler handler = BriefcaseLangChainHandler( engagement_id="project-id", workstream_id="workflow-id" ) # Use with any LangChain component from langchain.llms import OpenAI llm = OpenAI(callbacks=[handler]) # Retrieve decisions decisions = handler.get_decisions() ``` ### LlamaIndex ```python from briefcase.integrations.frameworks.llamaindex_handler import BriefcaseLlamaIndexHandler handler = BriefcaseLlamaIndexHandler( engagement_id="project-id", workstream_id="workflow-id" ) from llama_index.core import Settings Settings.callback_manager.add_handler(handler) ``` ## JavaScript/TypeScript (WASM) ### Installation ```bash npm install briefcase-wasm ``` ### Usage ```javascript import { init, JsDecisionSnapshot, JsInput, JsOutput, JsMemoryStorage } from 'briefcase-wasm'; await init(); const decision = new JsDecisionSnapshot("chat_completion"); decision.addInput(new JsInput("prompt", "Hello world", "string")); decision.addOutput(new JsOutput("response", "Hello back!", "string")); const storage = new JsMemoryStorage(); const decisionId = storage.saveDecision(decision); ``` ## Advanced Features ### Replay Engine ```python engine = briefcase_ai.ReplayEngine(storage) result = engine.replay(decision_id, "strict") # strict, tolerant, validation_only if not result.outputs_match: print(f"Output changed: {result.original_output} -> {result.replay_output}") ``` ### Multi-Agent Workflows ```python from briefcase.correlation import briefcase_workflow with briefcase_workflow("workflow-name", client) as workflow: # All decisions within this context are correlated decision1 = briefcase_ai.DecisionSnapshot("step1") decision2 = briefcase_ai.DecisionSnapshot("step2") # Both share correlation ID for tracing ``` ### lakeFS Integration ```python from briefcase.integrations.lakefs import lakefs_context with lakefs_context(client, "repo-name", "main") as lakefs: content = lakefs.read_object("data/policy.pdf") # Context automatically tracks lakeFS commit SHA ``` ## Best Practices ### Always Track Decisions ```python # ✅ Good def llm_call(prompt: str) -> str: decision = briefcase_ai.DecisionSnapshot("llm_call") decision.add_input(briefcase_ai.Input("prompt", prompt, "string")) result = make_llm_call(prompt) decision.add_output(briefcase_ai.Output("response", result, "string")) storage.save_decision(decision) return result # ❌ Bad - No tracking def llm_call(prompt: str) -> str: return make_llm_call(prompt) ``` ### Error Handling ```python def robust_llm_call(prompt: str) -> str: decision = briefcase_ai.DecisionSnapshot("robust_call") decision.add_input(briefcase_ai.Input("prompt", prompt, "string")) try: result = make_llm_call(prompt) decision.add_output(briefcase_ai.Output("response", result, "string")) decision.add_tag("status", "success") return result except Exception as e: decision.add_tag("status", "error") decision.add_tag("error", str(e)) raise finally: storage.save_decision(decision) ``` ### Cost Monitoring ```python def cost_aware_call(prompt: str, max_cost: float = 0.10) -> str: calculator = briefcase_ai.CostCalculator() estimate = calculator.estimate_cost("gpt-4", len(prompt.split()), 150) if estimate.total_cost > max_cost: raise ValueError(f"Cost ${estimate.total_cost:.4f} exceeds limit") return track_openai(prompt, "gpt-4")[0] ``` ## Server API (Optional) Base URL: `https://api.briefcasebrain.com` ### Create Decision ```bash curl -X POST https://api.briefcasebrain.com/api/v1/decisions \ -H "Content-Type: application/json" \ -d '{ "function_name": "chat_completion", "inputs": [{"value": "Hello", "type": "string"}], "outputs": [{"value": "Hi there", "type": "string"}] }' ``` ### List Decisions ```bash curl "https://api.briefcasebrain.com/api/v1/decisions?limit=10" ``` ### Replay Decision ```bash curl -X POST https://api.briefcasebrain.com/api/v1/replay/{decision_id} \ -H "Content-Type: application/json" \ -d '{"mode": "strict"}' ``` ## Troubleshooting ### Common Issues **Import Error**: `ModuleNotFoundError: No module named 'briefcase_ai'` ```bash pip install briefcase-ai ``` **Runtime Error**: "Library not initialized" ```python import briefcase_ai briefcase_ai.init() # Must call before using ``` **Storage Error**: SQLite database locked ```python # Use in-memory for testing storage = briefcase_ai.SqliteBackend.in_memory() # Or ensure proper cleanup storage = briefcase_ai.SqliteBackend.new("unique_name.db") ``` **Performance**: Slow decision saving ```python # Batch operations when possible decisions = [decision1, decision2, decision3] for decision in decisions: storage.save_decision(decision) ``` ### Version Info ```python import briefcase_ai print(briefcase_ai.__version__) # Check version ``` Current stable version: 2.1.30 ## Documentation - Full docs: https://docs.briefcasebrain.com - GitHub: https://github.com/briefcasebrain/briefcase-ai-core - Examples: `/examples` directory in repository - Python API: `/website/docs/api/llm.md`