Skip to main content

Metrics & Observability + Chengeta AI

Track hit rates, cost savings, warm the cache, and use multi-tenant namespacing.

CacheMetrics snapshot

from chengeta_ai import CacheManager, InMemoryBackend, CacheKeyBuilder

manager = CacheManager(backend=InMemoryBackend(), key_builder=CacheKeyBuilder())

# ... use the cache ...

snap = manager.metrics.snapshot()
print(snap)
# {
# 'hits': 142, 'misses': 31, 'evictions': 0, 'sets': 31,
# 'hit_rate': 0.821, 'miss_rate': 0.179,
# 'provider_cache_hits': 12,
# 'estimated_tokens_saved': 8400,
# 'estimated_cost_saved_usd': 0.025
# }

Cache Warming

from chengeta_ai import ResponseCache
from chengeta_ai.core.warmer import CacheWarmer

cache = ResponseCache(manager)
warmer = CacheWarmer(cache)

results = warmer.warm_from_queries(
queries=["What is RAG?", "Explain caching", "What is a vector DB?"],
generate_fn=my_llm_fn,
model_id="gpt-4o-mini",
ttl=3600,
)
for q, status in results.items():
print(f"[{status}] {q}")

Multi-tenant namespacing

tenant_a = manager.for_tenant("acme-corp")
tenant_b = manager.for_tenant("beta-inc")

from chengeta_ai import ResponseCache
cache_a = ResponseCache(tenant_a)
cache_b = ResponseCache(tenant_b)

msg = [{"role": "user", "content": "What is our SLA?"}]
cache_a.set(msg, {"answer": "99.9% uptime"}, model_id="gpt-4o")
cache_b.set(msg, {"answer": "99.5% uptime"}, model_id="gpt-4o")

# Each tenant gets their own value — fully isolated
print(cache_a.get(msg, model_id="gpt-4o")) # 99.9%
print(cache_b.get(msg, model_id="gpt-4o")) # 99.5%

Prometheus export

pip install 'chengeta-ai[observability]'
from chengeta_ai.core.exporters import PrometheusExporter

exporter = PrometheusExporter(manager.metrics, port=9090)
exporter.start()
# Scrape: http://localhost:9090/metrics

CLI stats

python -m chengeta_ai stats

Run the cookbook example

uv run python -m cookbook.metrics.agent