Every external call needs a timeout, every timeout needs a fallback — resilience patterns for HTTP, databases, and third-party services
88
90%
Does it follow best practices?
Impact
85%
4.72xAverage score across 5 eval scenarios
Passed
No known issues
The Iron Rule: Every external call needs a timeout. Every timeout needs a fallback.
When your code calls anything outside the process boundary — an HTTP API, a database, a cache, a message queue — that call can fail, hang, or slow down. Your code must handle all three. If you only add try/catch without a timeout, the request hangs. If you only add a timeout without a fallback, the user gets an error. You need both.
No exceptions. A missing timeout is a production outage waiting to happen. When a dependency hangs, your server's connection pool fills up, all requests block, and the entire service goes down — not just the requests that depend on the failing service.
Default: 5 seconds. Use 5s unless you have data showing the call needs more or less.
// CORRECT: fetch with timeout
const response = await fetch(url, {
signal: AbortSignal.timeout(5000),
});
// CORRECT: axios with timeout
const response = await axios.get(url, { timeout: 5000 });
// WRONG: bare fetch with no timeout — will hang forever if server is unresponsive
const response = await fetch(url);// PostgreSQL — set at pool level
const pool = new Pool({
connectionTimeoutMillis: 5000,
query_timeout: 10000,
statement_timeout: 10000,
idleTimeoutMillis: 30000,
});
// SQLite — set busy timeout
const db = new Database("app.db");
db.pragma("busy_timeout = 5000");
// MySQL — set connect and query timeout
const pool = mysql.createPool({
connectTimeout: 5000,
// Per-query: connection.query({ sql: '...', timeout: 5000 })
});
// Mongoose/MongoDB — set timeouts
mongoose.connect(uri, {
serverSelectionTimeoutMS: 5000,
socketTimeoutMS: 10000,
});# CORRECT: requests with timeout
response = requests.get(url, timeout=5)
# CORRECT: httpx with timeout
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get(url)
# WRONG: no timeout — blocks thread forever
response = requests.get(url)// CORRECT: http client with timeout
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Get(url)
// CORRECT: context with deadline
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
req, _ := http.NewRequestWithContext(ctx, "GET", url, nil)When a call fails or times out, return something useful instead of crashing. The fallback depends on the context:
| Situation | Fallback | Example |
|---|---|---|
| Data that changes slowly | Serve stale/cached data | Menu items, product catalog |
| Data with a sensible default | Return the default | Estimated wait: 10 min |
| One of several data sources | Return partial response | Dashboard: show orders, skip analytics |
| Write operation | Queue for retry | Order placed in dead letter queue |
| Non-critical feature | Disable the feature | Hide recommendations widget |
let cachedWeather: WeatherData | null = null;
async function getWeather(city: string): Promise<WeatherData> {
try {
const response = await fetch(`https://api.weather.com/v1/${city}`, {
signal: AbortSignal.timeout(5000),
});
if (!response.ok) throw new Error(`Weather API: ${response.status}`);
const data = await response.json();
cachedWeather = data; // Update cache on success
return data;
} catch (err) {
logger.warn({ err, city }, "weather_api_unavailable");
if (cachedWeather) {
return { ...cachedWeather, stale: true }; // Flag as stale
}
return { temperature: null, conditions: "unknown", unavailable: true };
}
}When an endpoint aggregates data from multiple sources, wrap each source in its own try/catch. Never let one failing dependency take down the whole response.
app.get("/api/dashboard", async (req, res) => {
const result: Record<string, unknown> = {};
const errors: string[] = [];
// Each dependency is isolated — one failure does not block the others
const [weather, stocks, news] = await Promise.allSettled([
fetchWeather(),
fetchStockPrices(),
fetchTopNews(),
]);
result.weather = weather.status === "fulfilled" ? weather.value : null;
if (weather.status === "rejected") errors.push("weather_unavailable");
result.stocks = stocks.status === "fulfilled" ? stocks.value : null;
if (stocks.status === "rejected") errors.push("stocks_unavailable");
result.news = news.status === "fulfilled" ? news.value : [];
if (news.status === "rejected") errors.push("news_unavailable");
res.json({
data: result,
warnings: errors.length > 0 ? errors : undefined,
});
});async function getEstimatedWaitTime(): Promise<number> {
try {
const result = await db.query(
"SELECT COUNT(*) as count FROM orders WHERE status = 'preparing'"
);
return result.count * 3; // 3 minutes per order
} catch {
return 15; // Safe default when DB is down
}
}Retries without backoff cause a thundering herd that makes outages worse. Retries without jitter cause synchronized spikes. Retrying non-transient errors (400, 404, 422) wastes resources.
async function withRetry<T>(
fn: () => Promise<T>,
options: { maxAttempts?: number; baseDelayMs?: number; isRetryable?: (err: unknown) => boolean } = {}
): Promise<T> {
const { maxAttempts = 3, baseDelayMs = 1000, isRetryable = isTransientError } = options;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
return await fn();
} catch (err) {
if (attempt === maxAttempts || !isRetryable(err)) throw err;
const delay = baseDelayMs * Math.pow(2, attempt - 1); // 1s, 2s, 4s
const jitter = delay * (0.5 + Math.random() * 0.5); // Add randomness
await new Promise((r) => setTimeout(r, jitter));
}
}
throw new Error("unreachable");
}
function isTransientError(err: unknown): boolean {
if (err instanceof Error && "status" in err) {
const status = (err as any).status;
// Retry on 429 (rate limit), 502, 503, 504 (server issues)
return [429, 502, 503, 504].includes(status);
}
// Retry on network errors (ECONNRESET, ETIMEDOUT, etc.)
if (err instanceof TypeError && err.message.includes("fetch failed")) return true;
return false;
}When a dependency is down, stop sending requests to it. This prevents cascading failures and gives the dependency time to recover.
class CircuitBreaker {
private failures = 0;
private lastFailure = 0;
private state: "closed" | "open" | "half-open" = "closed";
constructor(
private threshold = 5, // Open after 5 failures
private resetTimeMs = 30000 // Try again after 30s
) {}
async call<T>(fn: () => Promise<T>, fallback: () => T): Promise<T> {
if (this.state === "open") {
if (Date.now() - this.lastFailure > this.resetTimeMs) {
this.state = "half-open"; // Allow one probe request
} else {
return fallback(); // Circuit is open — use fallback immediately
}
}
try {
const result = await fn();
this.failures = 0;
this.state = "closed";
return result;
} catch (err) {
this.failures++;
this.lastFailure = Date.now();
if (this.failures >= this.threshold) {
this.state = "open";
}
return fallback();
}
}
}
// Usage: one circuit breaker per dependency
const weatherBreaker = new CircuitBreaker();
const stocksBreaker = new CircuitBreaker();
const weather = await weatherBreaker.call(
() => fetchWeather(city),
() => ({ temperature: null, conditions: "unknown", unavailable: true })
);Each external dependency should have its own:
// CORRECT: each service has its own resilience config
const weatherClient = {
timeout: 3000,
retries: 2,
circuitBreaker: new CircuitBreaker(5, 30000),
fallback: () => ({ unavailable: true }),
};
const paymentClient = {
timeout: 10000, // Payments need more time
retries: 0, // NEVER retry payments (idempotency risk)
circuitBreaker: new CircuitBreaker(3, 60000),
fallback: null, // No fallback — fail explicitly
};Every caught failure should be logged with enough context to diagnose the problem. Use structured logging. Include the dependency name, the error, and whether a fallback was used.
catch (err) {
logger.warn({
err,
dependency: "weather-api",
city,
fallback: "stale-cache",
cacheAge: Date.now() - cacheTimestamp,
}, "dependency_call_failed_using_fallback");
}fetch(), axios(), requests.get(), or HTTP call: add a timeout (5s default)Promise.allSettled() so one failure does not block the othersbusy_timeoutfetch( without signal: or timeout — these hang forever on failurerequests.get( without timeout= — same problem in PythonPromise.all( with external calls — switch to Promise.allSettled( so one failure does not reject everything