Learn to cache ML models in memory for fast, efficient API responses. Follow our step-by-step guide for improved performance.

Book a call with an Expert
Starting a new venture? Need to upgrade your web app? RapidDev builds application with your growth in mind.
// Import required modules
import pickle // For model serialization/deserialization
from flask import Flask, request, jsonify // For API routing and response handling
app = Flask(**name**)
// Global variable for caching the ML model
cached\_model = None
// Function to load the model into the cache
def load\_model():
global cached\_model
// Check if the model is already in memory
if cached\_model is None:
// Load the ML model from disk once (assuming the model is stored as 'model.pkl')
with open('model.pkl', 'rb') as model\_file:
cached_model = pickle.load(model_file)
// Log status for debugging purposes
print("Model loaded and cached in memory.")
else:
print("Using cached ML model.")
// Actually load the model at startup
load\_model()
// API endpoint for making predictions
@app.route("/predict", methods=["POST"])
def predict():
// Assume JSON payload with an 'input' key containing data for prediction
data = request.get\_json()
// Run the model’s inference using the cached model
prediction = cached\_model.predict([data["input"]])
// Return the prediction in a JSON format
return jsonify({"prediction": prediction.tolist()})
// Run the Flask API server
if **name** == "**main**":
app.run(host="0.0.0.0", port=5000)
// Define a cache manager for the ML model
class ModelCacheManager:
def **init**(self, model\_path):
self.model_path = model_path
self.model = None
// Method to load or refresh the model in cache
def load(self):
if self.model is None:
// Load the model if not already loaded
with open(self.model\_path, "rb") as mf:
self.model = pickle.load(mf)
print("Model loaded into cache.")
else:
print("Model is already cached.")
// Optional method to force reloading the model
def refresh(self):
with open(self.model\_path, "rb") as mf:
self.model = pickle.load(mf)
print("Model cache refreshed.")
// Method to make predictions using the cached model
def predict(self, input\_data):
if self.model is None:
self.load()
return self.model.predict(input\_data)
// Instantiate the cache manager
model\_cache = ModelCacheManager("model.pkl")
model\_cache.load()
// Use the cache manager in an API endpoint
@app.route("/advanced\_predict", methods=["POST"])
def advanced\_predict():
data = request.get\_json()
// Pass the prepared data for prediction; note that predict expects a list/array input
result = model\_cache.predict([data["input"]])
return jsonify({"prediction": result.tolist()})
From startups to enterprises and everything in between, see for yourself our incredible impact.
Need a dedicated strategic tech and growth partner? Discover what RapidDev can do for your business! Book a call with our team to schedule a free, no-obligation consultation. We’ll discuss your project and provide a custom quote at no cost.Â