Google Drive Public File/Folder Downloader that bypasses security notices and provides recursive folder downloads
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Download files with intelligent caching, hash verification, and post-processing capabilities for reliable automation workflows.
Downloads files with caching support and integrity verification using multiple hash algorithms.
from typing import Optional
def cached_download(
url=None,
path=None,
md5=None,
quiet=False,
postprocess=None,
hash: Optional[str] = None,
**kwargs
) -> str:
"""
Download file with caching and hash verification.
Parameters:
- url (str): URL to download from. Google Drive URLs supported.
- path (str): Cache file path. If None, auto-generated from URL.
- md5 (str): Expected MD5 hash (deprecated, use hash parameter).
- quiet (bool): Suppress terminal output. Default: False.
- postprocess (callable): Function to call with filename after download.
- hash (str): Hash in format 'algorithm:hexvalue' (e.g., 'sha256:abc123...').
Supported: md5, sha1, sha256, sha512.
- **kwargs: Additional arguments passed to download() function.
Returns:
str: Path to cached file.
Raises:
AssertionError: When file hash doesn't match expected value.
ValueError: When both md5 and hash parameters are specified.
"""import gdown
# Simple cached download
url = "https://drive.google.com/uc?id=1l_5RK28JRL19wpT22B-DY9We3TVXnnQQ"
cached_path = gdown.cached_download(url)
print(f"File cached at: {cached_path}")
# Subsequent calls return cached file immediately
cached_path_again = gdown.cached_download(url) # No download, returns cached file# SHA256 verification
url = "https://example.com/data.zip"
expected_hash = "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
try:
path = gdown.cached_download(url, hash=expected_hash)
print(f"File verified and cached: {path}")
except AssertionError as e:
print(f"Hash verification failed: {e}")# MD5 verification
gdown.cached_download(url, hash="md5:5d41402abc4b2a76b9719d911017c592")
# SHA1 verification
gdown.cached_download(url, hash="sha1:aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d")
# SHA512 verification
gdown.cached_download(url, hash="sha512:b109f3bbbc244eb82441917ed06d618b9008dd09b3befd1b5e07394c706a8bb980b1d7785e5976ec049b46df5f1326af5a2ea6d103fd07c95385ffab0cacbc86")# Specify custom cache path
custom_path = "/tmp/my_cache/important_file.zip"
gdown.cached_download(url, path=custom_path, hash="sha256:abc123...")# Extract archive after download
def extract_archive(filepath):
print(f"Extracting {filepath}")
gdown.extractall(filepath, to="./extracted/")
# Download, verify, and extract
gdown.cached_download(
url="https://example.com/archive.zip",
hash="sha256:expected_hash_here",
postprocess=extract_archive
)# Use all download() parameters
gdown.cached_download(
url="https://drive.google.com/uc?id=FILE_ID",
path="./cache/myfile.zip",
hash="sha256:expected_hash",
proxy="http://proxy:8080",
speed=512*1024, # 512KB/s
use_cookies=True,
fuzzy=True
)def md5sum(filename, blocksize=None) -> str:
"""
Calculate MD5 hash of file (deprecated).
Parameters:
- filename (str): Path to file to hash
- blocksize (int): Block size for reading file chunks. Default: 65536
Returns:
str: MD5 hexdigest string
Note: Deprecated and will be removed in future versions.
Use hash parameter in cached_download() instead.
"""import gdown
# Calculate MD5 hash of downloaded file (deprecated usage)
file_path = "downloaded_file.zip"
hash_value = gdown.md5sum(file_path)
print(f"MD5: {hash_value}")
# Preferred approach: Use hash parameter in cached_download
gdown.cached_download(url, hash=f"md5:{hash_value}")Files are cached in ~/.cache/gdown/ with URL-based naming:
~/.cache/gdown/
├── https-COLON--SLASH--SLASH-drive.google.com-SLASH-uc-QUESTION-id-EQUAL-FILE_ID
├── _dl_lock # Download lock file
└── cookies.txt # Cookie storageURLs are converted to filenames by replacing special characters:
/ → -SLASH-: → -COLON-= → -EQUAL-? → -QUESTION-from gdown.exceptions import FileURLRetrievalError
try:
# Hash mismatch example
gdown.cached_download(
"https://example.com/file.zip",
hash="sha256:wrong_hash_value"
)
except AssertionError as e:
print(f"Hash verification failed: {e}")
# Re-download with correct hash or investigate file corruption
try:
# Download failure
gdown.cached_download("https://invalid-url.com/file.zip")
except FileURLRetrievalError as e:
print(f"Download failed: {e}")All algorithms from Python's hashlib.algorithms_guaranteed:
# Always use hash verification for production
def reliable_download(url, expected_hash, max_retries=3):
for attempt in range(max_retries):
try:
return gdown.cached_download(url, hash=expected_hash)
except AssertionError:
if attempt == max_retries - 1:
raise
print(f"Hash mismatch, retrying... ({attempt + 1}/{max_retries})")# Pipeline with post-processing
def process_dataset(url, dataset_hash):
# Download and verify
archive_path = gdown.cached_download(url, hash=dataset_hash)
# Extract
extract_dir = "./data/"
extracted_files = gdown.extractall(archive_path, to=extract_dir)
# Process files
for file_path in extracted_files:
if file_path.endswith('.csv'):
# Process CSV data
pass
return extracted_filesInstall with Tessl CLI
npx tessl i tessl/pypi-gdown