A comprehensive HTTP client library that supports many features left out of other HTTP libraries.
—
File-based HTTP caching with Cache-Control header support, ETag validation, and Last-Modified handling. Caching improves performance by storing responses locally and respecting HTTP cache semantics, reducing network requests and server load.
File-based cache implementation that stores HTTP responses on disk with proper cache validation and expiration handling.
class FileCache:
"""File-based HTTP cache implementation."""
def __init__(self, cache, safe=None):
"""
Initialize file cache.
Args:
cache (str): Directory path for cache storage
safe (callable): Function to generate safe filenames from cache keys
(default: uses built-in safe filename generation)
"""
def get(self, key):
"""
Retrieve cached content by key.
Args:
key (str): Cache key
Returns:
bytes: Cached content, or None if not found
"""
def set(self, key, value):
"""
Store content in cache.
Args:
key (str): Cache key
value (bytes): Content to cache
"""
def delete(self, key):
"""
Remove cached content.
Args:
key (str): Cache key to remove
"""HTTP caching is integrated into the Http client and follows standard HTTP caching semantics.
# Http constructor with caching
h = httplib2.Http(cache=cache_object_or_path)
# Cache can be:
# - None: No caching
# - str: Directory path (creates FileCache automatically)
# - FileCache instance: Custom cache implementation
# - Custom object: Must implement get/set/delete methodsimport httplib2
# Create Http client with file-based caching
h = httplib2.Http(".cache")
# First request - fetches from server and stores in cache
(resp, content) = h.request("http://example.org/data.json")
print(f"First request status: {resp.status}")
# Second request - may use cached version if still valid
(resp, content) = h.request("http://example.org/data.json")
if 'fromcache' in resp:
print("Response served from cache")
else:
print("Response fetched from server")import httplib2
h = httplib2.Http(".cache")
# Force fresh request, bypassing cache
headers = {'cache-control': 'no-cache'}
(resp, content) = h.request("http://example.org/", headers=headers)
# Request with specific max-age
headers = {'cache-control': 'max-age=3600'} # 1 hour
(resp, content) = h.request("http://example.org/", headers=headers)
# Check server's cache control directives
cache_control = resp.get('cache-control', '')
if 'no-cache' in cache_control:
print("Server says not to cache this response")
elif 'max-age=' in cache_control:
max_age = cache_control.split('max-age=')[1].split(',')[0]
print(f"Server allows caching for {max_age} seconds")import httplib2
h = httplib2.Http(".cache")
# First request stores ETag
(resp, content) = h.request("http://example.org/document")
etag = resp.get('etag')
if etag:
print(f"Document ETag: {etag}")
# Subsequent requests use If-None-Match header automatically
(resp, content) = h.request("http://example.org/document")
if resp.status == 304:
print("Document hasn't changed (304 Not Modified)")
elif resp.status == 200:
print("Document was updated")import httplib2
h = httplib2.Http(".cache")
# First request stores Last-Modified header
(resp, content) = h.request("http://example.org/file.txt")
last_modified = resp.get('last-modified')
if last_modified:
print(f"File last modified: {last_modified}")
# Subsequent requests use If-Modified-Since header automatically
(resp, content) = h.request("http://example.org/file.txt")
if resp.status == 304:
print("File hasn't been modified")import httplib2
class MemoryCache:
"""Simple in-memory cache implementation."""
def __init__(self):
self.cache = {}
def get(self, key):
return self.cache.get(key)
def set(self, key, value):
self.cache[key] = value
def delete(self, key):
self.cache.pop(key, None)
# Use custom cache
memory_cache = MemoryCache()
h = httplib2.Http(cache=memory_cache)
(resp, content) = h.request("http://example.org/")import httplib2
import os
# Create cache in specific directory
cache_dir = "/tmp/http_cache"
h = httplib2.Http(cache_dir)
# Check cache directory
if os.path.exists(cache_dir):
cache_files = os.listdir(cache_dir)
print(f"Cache contains {len(cache_files)} files")
# Clear cache manually if needed
import shutil
shutil.rmtree(cache_dir)httplib2 implements proper HTTP cache validation:
Request Directives:
no-cache: Force revalidation with serverno-store: Don't cache request or responsemax-age=<seconds>: Maximum acceptable agemax-stale=<seconds>: Accept stale responsesResponse Directives:
no-cache: Must revalidate before useno-store: Don't cache this responseprivate: Only cache in private cachespublic: May cache in shared cachesmax-age=<seconds>: Maximum cache lifetimemust-revalidate: Revalidate when staleimport httplib2
from datetime import datetime, timedelta
h = httplib2.Http(".cache")
# Response with explicit expiration
(resp, content) = h.request("http://example.org/news")
# Check expiration information
expires = resp.get('expires')
cache_control = resp.get('cache-control', '')
if 'max-age=' in cache_control:
max_age = int(cache_control.split('max-age=')[1].split(',')[0])
print(f"Cached for {max_age} seconds")
elif expires:
print(f"Expires: {expires}")Cache keys are generated from:
Safe filename generation:
import httplib2
import os
import time
# Monitor cache usage
cache_dir = ".cache"
h = httplib2.Http(cache_dir)
def cache_stats(cache_path):
"""Get cache statistics."""
if not os.path.exists(cache_path):
return {"files": 0, "size": 0}
files = os.listdir(cache_path)
total_size = sum(
os.path.getsize(os.path.join(cache_path, f))
for f in files
)
return {
"files": len(files),
"size": total_size,
"size_mb": total_size / (1024 * 1024)
}
# Before requests
stats_before = cache_stats(cache_dir)
print(f"Cache before: {stats_before}")
# Make some requests
for url in ["http://example.org/", "http://example.org/about", "http://example.org/contact"]:
(resp, content) = h.request(url)
# After requests
stats_after = cache_stats(cache_dir)
print(f"Cache after: {stats_after}")Install with Tessl CLI
npx tessl i tessl/pypi-httplib2