OpenAI SDK + Chengeta AI
Cache client.chat.completions.create calls — identical requests return instantly without hitting the API.
Install
pip install 'chengeta-ai[openai]'
Example
import openai, time
from chengeta_ai import CacheManager, InMemoryBackend, CacheKeyBuilder
from chengeta_ai.adapters.openai_adapter import OpenAICacheAdapter
client = openai.OpenAI()
manager = CacheManager(
backend=InMemoryBackend(),
key_builder=CacheKeyBuilder(namespace="myapp"),
)
adapter = OpenAICacheAdapter(client, manager)
messages = [{"role": "user", "content": "What are the top 3 benefits of caching LLM responses?"}]
t0 = time.perf_counter()
r1 = adapter.chat_create(model="gpt-4o-mini", messages=messages)
t1 = time.perf_counter()
print(r1.choices[0].message.content)
print(f"First: {t1-t0:.3f}s") # ~0.8s live API call
t2 = time.perf_counter()
r2 = adapter.chat_create(model="gpt-4o-mini", messages=messages)
t3 = time.perf_counter()
print(f"Second: {t3-t2:.4f}s (cache hit)") # ~0.0001s
snap = manager.metrics.snapshot()
print(f"Hit rate: {snap['hit_rate']:.0%}")
Async
client = openai.AsyncOpenAI()
adapter = OpenAICacheAdapter(client, manager)
response = await adapter.achat_create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "Explain vector embeddings"}],
)
With Redis (shared across workers)
from chengeta_ai.backends.redis_backend import RedisBackend
manager = CacheManager(
backend=RedisBackend(url="redis://localhost:6379/0"),
key_builder=CacheKeyBuilder(namespace="prod"),
)
adapter = OpenAICacheAdapter(client, manager)
Invalidate by model
# Clear all cached gpt-4o-mini responses
adapter.invalidate_model("gpt-4o-mini")
With GzipCompressor (reduce Redis memory)
from chengeta_ai import GzipCompressor
manager = CacheManager(
backend=RedisBackend(url="redis://localhost:6379/0"),
key_builder=CacheKeyBuilder(namespace="prod"),
compressor=GzipCompressor(level=6),
)
adapter = OpenAICacheAdapter(client, manager)