Learn to rate limit ML API endpoints with our step-by-step guide. Secure your APIs, control traffic, and enhance performance effortlessly!

Book a call with an Expert
Starting a new venture? Need to upgrade your web app? RapidDev builds application with your growth in mind.
// Example using Node.js with Redis for Fixed Window Rate Limiting
const redis = require("redis")
const client = redis.createClient() // Connect to Redis
// Middleware for rate limiting API endpoint
const rateLimitMiddleware = (req, res, next) => {
const userIP = req.ip // You can also use API key or user ID
const limit = 100 // Maximum requests allowed per window
const windowSeconds = 60
// Redis key set to identify the user and time window
const key = `rate_limit:${userIP}`
// Use multi for atomic operations
client.multi()
.incr(key) // Increment hit count for this key
.expire(key, windowSeconds) // Set expiry time for the fixed window
.exec((err, replies) => {
if(err) {
// In case of an error, pass to error handler
return next(err)
}
const requestCount = parseInt(replies[0], 10)
if (requestCount > limit) {
// If limit exceeded, respond with a 429 Too Many Requests
return res.status(429).send("Too Many Requests. Please try again later.")
}
// Otherwise, allow API request to proceed
next()
})
}
// Integrate this middleware into your API routes
const express = require("express")
const app = express()
app.use(rateLimitMiddleware) // Apply rate limiting globally or per-route
// Define ML API endpoint
app.post("/ml/api", (req, res) => {
// Handle ML inference logic
res.send("Your ML model has processed the data.")
})
app.listen(3000, () => {
console.log("Server running on port 3000")
})
// Example pseudo-code for Token Bucket implementation in a Python-like syntax
import time
import redis
r = redis.Redis() // Connecting to Redis
def token_bucket(user_key, capacity=50, refill\_rate=1):
key = f"token_bucket:{user_key}"
now = time.time()
// Retrieve the current state of the bucket
bucket = r.hgetall(key)
if not bucket:
// If the bucket does not exist, initialize it
tokens = capacity
last\_refill = now
else:
tokens = float(bucket.get(b'tokens', capacity))
last_refill = float(bucket.get(b'last_refill', now))
// Calculate the number of new tokens since the last refill
elapsed = now - last\_refill
new_tokens = elapsed \* refill_rate
tokens = min(capacity, tokens + new\_tokens)
if tokens < 1:
// Not enough tokens to process the request
return False
// Consume a token and update the bucket state
tokens -= 1
r.hmset(key, {"tokens": tokens, "last\_refill": now})
r.expire(key, 3600) // Set an expiration for bucket state
return True
// In your ML endpoint handler, use token\_bucket check
def ml_api_endpoint(request):
user_identifier = request.user_id // Or IP/API-key
if not token_bucket(user_identifier):
return "429 Too Many Requests"
// Process ML request
return "ML inference successful"
From startups to enterprises and everything in between, see for yourself our incredible impact.
Need a dedicated strategic tech and growth partner? Discover what RapidDev can do for your business! Book a call with our team to schedule a free, no-obligation consultation. We’ll discuss your project and provide a custom quote at no cost.Â