Skip to main content

Google Gemini + Chengeta AI

Cache model.generate_content calls — identical prompts return instantly without hitting the API.

Install

pip install 'chengeta-ai[gemini]'

Example

import time
import google.generativeai as genai
from chengeta_ai import CacheManager, InMemoryBackend, CacheKeyBuilder
from chengeta_ai.adapters.gemini_adapter import GeminiCacheAdapter

genai.configure(api_key="your-api-key")
model = genai.GenerativeModel("gemini-2.0-flash")

manager = CacheManager(
backend=InMemoryBackend(),
key_builder=CacheKeyBuilder(namespace="myapp"),
)
adapter = GeminiCacheAdapter(model, manager)

prompt = "What is semantic caching and why does it matter?"

t0 = time.perf_counter()
r1 = adapter.generate_content(prompt)
t1 = time.perf_counter()
print(r1.text)
print(f"First call: {t1 - t0:.3f}s") # live API call

t0 = time.perf_counter()
r2 = adapter.generate_content(prompt)
t1 = time.perf_counter()
print(f"Second call: {t1 - t0:.6f}s") # <1ms cache hit

Async Example

import asyncio
import google.generativeai as genai
from chengeta_ai import CacheManager, InMemoryBackend, CacheKeyBuilder
from chengeta_ai.adapters.gemini_adapter import GeminiCacheAdapter

genai.configure(api_key="your-api-key")
model = genai.GenerativeModel("gemini-2.0-flash")

manager = CacheManager(backend=InMemoryBackend(), key_builder=CacheKeyBuilder())
adapter = GeminiCacheAdapter(model, manager)

async def main():
prompt = "Explain transformers in one sentence."
r1 = await adapter.agenerate_content(prompt)
r2 = await adapter.agenerate_content(prompt)
assert r1.text == r2.text # same cached result

asyncio.run(main())

Multi-Turn Conversation

contents = [
{"role": "user", "parts": ["My name is Alice."]},
{"role": "model", "parts": ["Hello Alice! How can I help?"]},
{"role": "user", "parts": ["What is my name?"]},
]
response = adapter.generate_content(contents)
print(response.text) # "Your name is Alice." — served from cache on second call