Rate Limit Handling
Problem
AI providers enforce rate limits to prevent abuse and ensure fair usage. Exceeding these limits results in:
- HTTP 429 errors
- Request failures
- Service disruption
- Temporary bans
Different providers have different limits:
- OpenAI: 3,500 requests/min (paid tier)
- Anthropic: 50 requests/min (free tier)
- Google AI: 60 requests/min
Solution
Implement intelligent rate limiting with:
- Token bucket algorithm
- Request queuing
- Automatic backoff
- Per-provider limits
- Request prioritization
Code
import { NeuroLink } from "@juspay/neurolink";
type RateLimitConfig = {
requestsPerMinute: number;
burstSize?: number;
retryAfter?: number;
};
class RateLimiter {
private queue: Array<() => Promise<any>> = [];
private processing = false;
private tokens: number;
private lastRefill: number;
private config: Required<RateLimitConfig>;
constructor(config: RateLimitConfig) {
this.config = {
requestsPerMinute: config.requestsPerMinute,
burstSize: config.burstSize || config.requestsPerMinute,
retryAfter: config.retryAfter || 60000,
};
this.tokens = this.config.burstSize;
this.lastRefill = Date.now();
}
/**
* Refill tokens based on time elapsed
*/
private refillTokens() {
const now = Date.now();
const elapsed = now - this.lastRefill;
const tokensToAdd = (elapsed / 60000) * this.config.requestsPerMinute;
this.tokens = Math.min(this.tokens + tokensToAdd, this.config.burstSize);
this.lastRefill = now;
}
/**
* Wait until a token is available
*/
private async waitForToken(): Promise<void> {
this.refillTokens();
if (this.tokens >= 1) {
this.tokens -= 1;
return;
}
// Calculate wait time for next token
const tokensNeeded = 1 - this.tokens;
const waitTime = (tokensNeeded / this.config.requestsPerMinute) * 60000;
console.log(
`