Google Drive Public File/Folder Downloader that bypasses security notices and provides recursive folder downloads
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Recursive downloading of Google Drive folders with directory structure preservation and batch file handling.
Downloads entire Google Drive folders with recursive structure preservation, supporting up to 50 files per folder.
from typing import Union, List
def download_folder(
url=None,
id=None,
output=None,
quiet=False,
proxy=None,
speed=None,
use_cookies=True,
remaining_ok=False,
verify=True,
user_agent=None,
skip_download: bool = False,
resume=False
) -> Union[List[str], List[GoogleDriveFileToDownload], None]:
"""
Downloads entire folder from Google Drive URL.
Parameters:
- url (str): Google Drive folder URL. Must be format 'https://drive.google.com/drive/folders/{id}'.
- id (str): Google Drive folder ID. Cannot be used with url parameter.
- output (str): Output directory path. If None, uses folder name from Google Drive.
- quiet (bool): Suppress terminal output. Default: False.
- proxy (str): Proxy configuration in format 'protocol://host:port'.
- speed (float): Download speed limit in bytes per second.
- use_cookies (bool): Use cookies from ~/.cache/gdown/cookies.txt. Default: True.
- remaining_ok (bool): Allow downloading folders at maximum file limit (50 files). Default: False.
- verify (bool/str): TLS certificate verification. True/False or path to CA bundle. Default: True.
- user_agent (str): Custom user agent string.
- skip_download (bool): Return file list without downloading (dry run). Default: False.
- resume (bool): Resume interrupted downloads, skip completed files. Default: False.
Returns:
Union[List[str], List[GoogleDriveFileToDownload], None]:
- If skip_download=False: List of downloaded file paths or None if failed.
- If skip_download=True: List of GoogleDriveFileToDownload objects.
Raises:
FolderContentsMaximumLimitError: When folder contains more than 50 files.
FileURLRetrievalError: When unable to access folder or retrieve file URLs.
ValueError: When both url and id are specified or neither.
"""import collections
GoogleDriveFileToDownload = collections.namedtuple(
"GoogleDriveFileToDownload",
("id", "path", "local_path")
)Named tuple container for file download information with the following fields:
import gdown
# Download entire folder
folder_url = "https://drive.google.com/drive/folders/15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl"
downloaded_files = gdown.download_folder(folder_url, output="./my_folder")
print(f"Downloaded {len(downloaded_files)} files:")
for file_path in downloaded_files:
print(f" {file_path}")# Using folder ID directly
folder_id = "15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl"
downloaded_files = gdown.download_folder(id=folder_id, output="./dataset")# Get file list without downloading
folder_url = "https://drive.google.com/drive/folders/FOLDER_ID"
file_info = gdown.download_folder(folder_url, skip_download=True)
print("Files in folder:")
for file_obj in file_info:
print(f"ID: {file_obj.id}")
print(f"Path: {file_obj.path}")
print(f"Local path: {file_obj.local_path}")
print("---")# Resume partial folder download
gdown.download_folder(
folder_url,
output="./large_dataset",
resume=True,
quiet=False # Show progress for resumed files
)# Folder download with speed limit and proxy
gdown.download_folder(
url=folder_url,
output="./data",
speed=2*1024*1024, # 2MB/s limit
proxy="http://corporate-proxy:8080",
use_cookies=True,
remaining_ok=True # Allow folders with 50 files
)gdown maintains the original Google Drive folder structure:
Original Google Drive:
📁 Dataset/
├── 📁 train/
│ ├── image1.jpg
│ └── image2.jpg
├── 📁 test/
│ └── image3.jpg
└── README.txt
Downloaded Structure:
./my_folder/
├── train/
│ ├── image1.jpg
│ └── image2.jpg
├── test/
│ └── image3.jpg
└── README.txtFolderContentsMaximumLimitError by defaultremaining_ok=True to allow download at limit# For private folders, place cookies in ~/.cache/gdown/cookies.txt
# Format: Mozilla/Netscape cookie jar
# Or disable cookies for public folders only
gdown.download_folder(url, use_cookies=False)from gdown.exceptions import FolderContentsMaximumLimitError, FileURLRetrievalError
try:
files = gdown.download_folder("https://drive.google.com/drive/folders/FOLDER_ID")
print(f"Successfully downloaded {len(files)} files")
except FolderContentsMaximumLimitError:
print("Folder contains more than 50 files. Use remaining_ok=True to download anyway.")
except FileURLRetrievalError as e:
print(f"Failed to access folder: {e}")
# Check folder permissions, URL validity, or network connectivity
except ValueError as e:
print(f"Invalid parameters: {e}")def download_large_folder(folder_url, output_dir):
"""Download folder with proper error handling for size limits."""
try:
# First try normal download
return gdown.download_folder(folder_url, output=output_dir)
except FolderContentsMaximumLimitError:
print("Folder at maximum size limit (50 files)")
# Option 1: Download anyway
response = input("Download anyway? (y/n): ")
if response.lower() == 'y':
return gdown.download_folder(
folder_url,
output=output_dir,
remaining_ok=True
)
# Option 2: Get file list for manual selection
file_list = gdown.download_folder(folder_url, skip_download=True)
print(f"Folder contains {len(file_list)} files:")
for i, file_obj in enumerate(file_list[:10]): # Show first 10
print(f"{i+1}. {file_obj.path}")
return Nonedef process_dataset_folder(folder_url):
"""Download and process entire dataset folder."""
# Download with resume support
files = gdown.download_folder(
folder_url,
output="./dataset",
resume=True,
quiet=False
)
# Process files by type
for file_path in files:
if file_path.endswith('.csv'):
# Process CSV files
print(f"Processing CSV: {file_path}")
elif file_path.endswith(('.jpg', '.png')):
# Process images
print(f"Processing image: {file_path}")
return files# For large folders, monitor download progress
import os
def monitor_folder_download(folder_url, output_dir):
"""Download folder with progress monitoring."""
# Get file list first
file_list = gdown.download_folder(folder_url, skip_download=True)
total_files = len(file_list)
print(f"Preparing to download {total_files} files...")
# Start actual download
downloaded_files = gdown.download_folder(
folder_url,
output=output_dir,
quiet=False,
resume=True
)
if downloaded_files:
print(f"✅ Successfully downloaded {len(downloaded_files)}/{total_files} files")
# Verify all files exist
missing = []
for expected_file in file_list:
if not os.path.exists(expected_file.local_path):
missing.append(expected_file.path)
if missing:
print(f"⚠️ Missing {len(missing)} files:")
for path in missing[:5]: # Show first 5
print(f" - {path}")
return downloaded_filesInstall with Tessl CLI
npx tessl i tessl/pypi-gdown