The invariance CLI provides full access to the Invariance API from your terminal. Manage sessions, run evals, create datasets, and more.
The CLI is included in the SDK package:
npm install -g @invariance/sdkOr run directly with npx:
npx @invariance/sdk invariance --helpVerify installation:
invariance --version
# 0.2.1The CLI uses environment variables for authentication. See the Configuration page for full details.
# Required
INVARIANCE_API_KEY=inv_your_api_key_here
# Optional
INVARIANCE_API_URL=https://api.useinvariance.com
INVARIANCE_PRIVATE_KEY=your_ed25519_private_key
# For LLM judge evals
ANTHROPIC_API_KEY=sk-ant-...
OPENAI_API_KEY=sk-...
# Mock mode for testing
EVAL_JUDGE_MOCK=trueCheck your configuration:
invariance configureAll commands support --json for machine-readable output:
invariance sessions --json | jq '.[] | .id'View and verify hash-chained receipt sessions.
# List all sessions
invariance sessions
invariance sessions --status open
# View session detail with receipt timeline
invariance session <session-id>
# Verify chain integrity (exit code 1 if broken)
invariance verify <session-id>
# Check connection status
invariance statusCreate natural-language monitoring rules that watch agent behavior.
# List monitors
invariance monitors list
invariance monitors list --status active --agent-id my-agent
# Create a monitor
invariance monitors create --name "PII Check" --rule "Agent must not output SSNs"
invariance monitors create --name "Spend Limit" --rule "No transactions over $1000" \
--agent-id trading-bot --severity critical
# Trigger evaluation
invariance monitors evaluate <monitor-id>
# Delete a monitor
invariance monitors delete <monitor-id>Manage eval suites, run evaluations, compare results, and set quality thresholds.
# List eval suites
invariance evals suites
invariance evals suites --agent-id my-agent
# Run an eval suite
invariance evals run <suite-id> --agent-id my-agent
invariance evals run <suite-id> --agent-id my-agent --version v2.0
# Compare two runs
invariance evals compare <suite-id> <run-a-id> <run-b-id># List thresholds
invariance evals thresholds --suite-id <suite-id>
# Create a threshold (gate on 90% pass rate)
invariance evals threshold-create --suite-id <suite-id> --min-value 0.9
invariance evals threshold-create --suite-id <suite-id> --min-value 0.8 \
--metric avg_score --webhook-url https://hooks.example.com/alert
# Update / delete
invariance evals threshold-update <id> --min-value 0.95
invariance evals threshold-delete <id># List failure clusters
invariance evals clusters --agent-id my-agent
# Create a cluster
invariance evals cluster-create --agent-id my-agent --cluster-type hallucination \
--label "FAQ hallucinations" --severity high
# Add trace nodes to cluster
invariance evals cluster-add-member <cluster-id> \
--trace-node-id <node-id> --session-id <session-id>
# List optimization suggestions
invariance evals suggestions --agent-id my-agent
# Create a suggestion
invariance evals suggestion-create --agent-id my-agent --suggestion-type prompt \
--title "Improve error handling" --description "Add retry logic" --confidence 0.85Manage versioned test datasets for eval runs.
# List datasets
invariance datasets list
invariance datasets list --agent-id my-agent
# Create a dataset
invariance datasets create --name "Support Cases" --agent-id support-bot
# Add rows (JSON array)
invariance datasets add-rows <id> \
--data '[{"input":{"query":"hello"},"expected":{"intent":"greeting"}}]'
# List rows
invariance datasets rows <id> --limit 10
# Publish immutable version
invariance datasets publish <id> --notes "v1 initial set"
# List versions
invariance datasets versions <id>
# Create from trace data
invariance datasets from-traces --session-ids sess-1,sess-2 \
--agent-id my-agent --name "From Production"
# Import traces into existing dataset
invariance datasets import-traces <id> --session-ids sess-1,sess-2 \
--agent-id my-agentManage scoring configurations for LLM judges and human rubrics.
# List scorers
invariance scorers list
# Create an LLM scorer
invariance scorers create --name "Quality" --type llm \
--config '{"prompt":"Rate quality","criteria":["accuracy"],"model":"claude-sonnet-4-20250514"}'
# Create a human scorer
invariance scorers create --name "Safety" --type human \
--config '{"rubric":[{"criterion":"safety","weight":1}]}'
# Update / delete
invariance scorers update <id> --name "Updated Quality"
invariance scorers delete <id>Run structured experiments combining datasets, eval suites, and prompt versions.
# List experiments
invariance experiments list
invariance experiments list --suite-id suite-123 --status completed
# Create an experiment
invariance experiments create --name "Prompt A/B" --dataset-id ds-1 \
--dataset-version 1 --suite-id suite-1
# Run experiment
invariance experiments run <id>
# Compare two experiments
invariance experiments compare <exp-a-id> <exp-b-id>
# Delete
invariance experiments delete <id>Version-controlled prompt management with diff capabilities.
# List prompts
invariance prompts list
# Create a prompt
invariance prompts create --name "Support Agent System Prompt"
# Add a version
invariance prompts create-version <id> \
--system-prompt "You are a helpful assistant." \
--variables "name,topic"
# List versions
invariance prompts versions <id>
# Diff two versions
invariance prompts diff <from-version-id> <to-version-id>
# Delete
invariance prompts delete <id>Detect behavioral drift between agent sessions.
# List detected drift catches
invariance drift catches
# Compare two sessions for drift
invariance drift compare <session-a-id> <session-b-id>Manage trace flags for feedback and training data.
# List trace flags
invariance training flags
invariance training flags --flag bad --limit 20
# View flag statistics
invariance training stats# List registered agent identities
invariance identities list
# Search across sessions, agents, and anomalies
invariance search "payment failed"
invariance search "agent-7"
# Show environment configuration
invariance configure
# Validate API key connectivity
invariance configure-check