tessl/pypi-gdown

Google Drive Public File/Folder Downloader that bypasses security notices and provides recursive folder downloads

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Folder Operations

Name: tessl/pypi-gdown
Author: tessl

Recursive downloading of Google Drive folders with directory structure preservation and batch file handling.

Capabilities

Folder Download Function

Downloads entire Google Drive folders with recursive structure preservation, supporting up to 50 files per folder.

from typing import Union, List

def download_folder(
    url=None,
    id=None,
    output=None,
    quiet=False,
    proxy=None,
    speed=None,
    use_cookies=True,
    remaining_ok=False,
    verify=True,
    user_agent=None,
    skip_download: bool = False,
    resume=False
) -> Union[List[str], List[GoogleDriveFileToDownload], None]:
    """
    Downloads entire folder from Google Drive URL.

    Parameters:
    - url (str): Google Drive folder URL. Must be format 'https://drive.google.com/drive/folders/{id}'.
    - id (str): Google Drive folder ID. Cannot be used with url parameter.
    - output (str): Output directory path. If None, uses folder name from Google Drive.
    - quiet (bool): Suppress terminal output. Default: False.
    - proxy (str): Proxy configuration in format 'protocol://host:port'.
    - speed (float): Download speed limit in bytes per second.
    - use_cookies (bool): Use cookies from ~/.cache/gdown/cookies.txt. Default: True.
    - remaining_ok (bool): Allow downloading folders at maximum file limit (50 files). Default: False.
    - verify (bool/str): TLS certificate verification. True/False or path to CA bundle. Default: True.
    - user_agent (str): Custom user agent string.
    - skip_download (bool): Return file list without downloading (dry run). Default: False.
    - resume (bool): Resume interrupted downloads, skip completed files. Default: False.

    Returns:
    Union[List[str], List[GoogleDriveFileToDownload], None]: 
        - If skip_download=False: List of downloaded file paths or None if failed.
        - If skip_download=True: List of GoogleDriveFileToDownload objects.

    Raises:
    FolderContentsMaximumLimitError: When folder contains more than 50 files.
    FileURLRetrievalError: When unable to access folder or retrieve file URLs.
    ValueError: When both url and id are specified or neither.
    """

Data Types

import collections

GoogleDriveFileToDownload = collections.namedtuple(
    "GoogleDriveFileToDownload",
    ("id", "path", "local_path")
)

Named tuple container for file download information with the following fields:

id (str): Google Drive file ID
path (str): Relative path within folder structure
local_path (str): Local filesystem path where file will be saved

Usage Examples

Basic Folder Download

import gdown

# Download entire folder
folder_url = "https://drive.google.com/drive/folders/15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl"
downloaded_files = gdown.download_folder(folder_url, output="./my_folder")

print(f"Downloaded {len(downloaded_files)} files:")
for file_path in downloaded_files:
    print(f"  {file_path}")

Folder Download with ID

# Using folder ID directly
folder_id = "15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl"
downloaded_files = gdown.download_folder(id=folder_id, output="./dataset")

Dry Run (List Files Without Downloading)

# Get file list without downloading
folder_url = "https://drive.google.com/drive/folders/FOLDER_ID"
file_info = gdown.download_folder(folder_url, skip_download=True)

print("Files in folder:")
for file_obj in file_info:
    print(f"ID: {file_obj.id}")
    print(f"Path: {file_obj.path}")
    print(f"Local path: {file_obj.local_path}")
    print("---")

Resume Interrupted Downloads

# Resume partial folder download
gdown.download_folder(
    folder_url,
    output="./large_dataset",
    resume=True,
    quiet=False  # Show progress for resumed files
)

Advanced Configuration

# Folder download with speed limit and proxy
gdown.download_folder(
    url=folder_url,
    output="./data",
    speed=2*1024*1024,  # 2MB/s limit
    proxy="http://corporate-proxy:8080",
    use_cookies=True,
    remaining_ok=True  # Allow folders with 50 files
)

Folder Structure Preservation

gdown maintains the original Google Drive folder structure:

Original Google Drive:
📁 Dataset/
├── 📁 train/
│   ├── image1.jpg
│   └── image2.jpg
├── 📁 test/
│   └── image3.jpg
└── README.txt

Downloaded Structure:
./my_folder/
├── train/
│   ├── image1.jpg
│   └── image2.jpg
├── test/
│   └── image3.jpg
└── README.txt

Limitations and Constraints

File Count Limit

Maximum: 50 files per folder (Google Drive API restriction)
Behavior: Raises FolderContentsMaximumLimitError by default
Override: Use remaining_ok=True to allow download at limit

Supported File Types

All file types supported by Google Drive
Google Workspace documents (Docs/Sheets/Slides) downloaded in default formats
Binary files, images, archives, etc.

Authentication

# For private folders, place cookies in ~/.cache/gdown/cookies.txt
# Format: Mozilla/Netscape cookie jar

# Or disable cookies for public folders only
gdown.download_folder(url, use_cookies=False)

Error Handling

from gdown.exceptions import FolderContentsMaximumLimitError, FileURLRetrievalError

try:
    files = gdown.download_folder("https://drive.google.com/drive/folders/FOLDER_ID")
    print(f"Successfully downloaded {len(files)} files")
    
except FolderContentsMaximumLimitError:
    print("Folder contains more than 50 files. Use remaining_ok=True to download anyway.")
    
except FileURLRetrievalError as e:
    print(f"Failed to access folder: {e}")
    # Check folder permissions, URL validity, or network connectivity
    
except ValueError as e:
    print(f"Invalid parameters: {e}")

Handling Large Folders

def download_large_folder(folder_url, output_dir):
    """Download folder with proper error handling for size limits."""
    try:
        # First try normal download
        return gdown.download_folder(folder_url, output=output_dir)
        
    except FolderContentsMaximumLimitError:
        print("Folder at maximum size limit (50 files)")
        
        # Option 1: Download anyway
        response = input("Download anyway? (y/n): ")
        if response.lower() == 'y':
            return gdown.download_folder(
                folder_url, 
                output=output_dir, 
                remaining_ok=True
            )
        
        # Option 2: Get file list for manual selection
        file_list = gdown.download_folder(folder_url, skip_download=True)
        print(f"Folder contains {len(file_list)} files:")
        for i, file_obj in enumerate(file_list[:10]):  # Show first 10
            print(f"{i+1}. {file_obj.path}")
        
        return None

Best Practices

Batch Processing

def process_dataset_folder(folder_url):
    """Download and process entire dataset folder."""
    
    # Download with resume support
    files = gdown.download_folder(
        folder_url,
        output="./dataset",
        resume=True,
        quiet=False
    )
    
    # Process files by type
    for file_path in files:
        if file_path.endswith('.csv'):
            # Process CSV files
            print(f"Processing CSV: {file_path}")
        elif file_path.endswith(('.jpg', '.png')):
            # Process images
            print(f"Processing image: {file_path}")
    
    return files

Monitoring Progress

# For large folders, monitor download progress
import os

def monitor_folder_download(folder_url, output_dir):
    """Download folder with progress monitoring."""
    
    # Get file list first
    file_list = gdown.download_folder(folder_url, skip_download=True)
    total_files = len(file_list)
    
    print(f"Preparing to download {total_files} files...")
    
    # Start actual download
    downloaded_files = gdown.download_folder(
        folder_url,
        output=output_dir,
        quiet=False,
        resume=True
    )
    
    if downloaded_files:
        print(f"✅ Successfully downloaded {len(downloaded_files)}/{total_files} files")
        
        # Verify all files exist
        missing = []
        for expected_file in file_list:
            if not os.path.exists(expected_file.local_path):
                missing.append(expected_file.path)
        
        if missing:
            print(f"⚠️  Missing {len(missing)} files:")
            for path in missing[:5]:  # Show first 5
                print(f"  - {path}")
    
    return downloaded_files

Install with Tessl CLI