A Python library and command-line interface for interacting with the Open Science Framework
—
Utility functions for path handling, file operations, and custom exception classes for comprehensive error handling in OSFClient operations.
Functions for handling and normalizing file paths in OSF storage systems.
def norm_remote_path(path):
"""
Normalize remote file path.
All remote paths are treated as absolute paths. Removes leading
slash and normalizes path separators.
Args:
path (str): File or directory path
Returns:
str: Normalized path without leading slash
"""
def split_storage(path, default='osfstorage'):
"""
Extract storage provider name from file path.
If path begins with a known storage provider, the provider name
is removed from the path. Otherwise returns the default provider
and leaves path unchanged.
Args:
path (str): File path that may include storage provider prefix
default (str): Default storage provider if none found in path
Returns:
tuple: (provider_name, remaining_path)
Examples:
split_storage('github/README.md') -> ('github', 'README.md')
split_storage('data/file.txt') -> ('osfstorage', 'data/file.txt')
"""Utility functions for file system operations and file analysis.
def makedirs(path, mode=511, exist_ok=False):
"""
Create directory and any necessary parent directories.
Cross-platform wrapper around os.makedirs with Python 2/3 compatibility.
Args:
path (str): Directory path to create
mode (int): Directory permissions (default: 0o777)
exist_ok (bool): Don't raise exception if directory already exists
Returns:
None
Note:
Mode 511 is decimal for octal 0o777 (full permissions)
"""
def file_empty(fp):
"""
Determine if a file is empty.
Args:
fp (file): File pointer opened for reading
Returns:
bool: True if file is empty, False otherwise
Note:
File pointer position is reset to beginning after check.
"""
def get_local_file_size(fp):
"""
Get file size from file pointer.
Args:
fp (file): File pointer
Returns:
int: File size in bytes
"""Functions for calculating file checksums and hash verification.
def checksum(file_path, hash_type='md5', block_size=65536):
"""
Calculate hash of a file.
Args:
file_path (str): Path to file to hash
hash_type (str): Hash algorithm ('md5' or 'sha256')
block_size (int): Size of blocks to read (default: 64KB)
Returns:
str: Hexadecimal hash digest
Raises:
ValueError: If hash_type is not 'md5' or 'sha256'
Note:
MD5 is faster than SHA256 and is the default for compatibility
with OSF's hash storage format.
"""KNOWN_PROVIDERS = ['osfstorage', 'github', 'figshare', 'googledrive', 'owncloud']List of storage providers supported by OSFClient. Used by split_storage() to recognize provider prefixes in file paths.
Custom exception hierarchy for OSF-specific error handling.
class OSFException(Exception):
"""
Base exception class for all OSF-related errors.
Inherits from built-in Exception class. All other OSF exceptions
inherit from this class.
"""
class UnauthorizedException(OSFException):
"""
Raised when authentication fails or access is denied.
Typically occurs when:
- Invalid credentials provided
- Token has expired or been revoked
- Attempting to access private resources without authentication
- Insufficient permissions for requested operation
"""
class FolderExistsException(OSFException):
"""
Raised when attempting to create a folder that already exists.
Args:
name (str): Name of the folder that already exists
Attributes:
args (tuple): Contains error message with folder name
"""from osfclient.utils import norm_remote_path, split_storage
# Normalize paths
path1 = norm_remote_path('/data/file.txt')
print(path1) # 'data/file.txt'
path2 = norm_remote_path('folder/subfolder/file.txt')
print(path2) # 'folder/subfolder/file.txt'
# Split storage providers from paths
provider, path = split_storage('github/README.md')
print(f"Provider: {provider}, Path: {path}") # Provider: github, Path: README.md
provider, path = split_storage('data/analysis.csv')
print(f"Provider: {provider}, Path: {path}") # Provider: osfstorage, Path: data/analysis.csv
# Custom default provider
provider, path = split_storage('data/file.txt', default='figshare')
print(f"Provider: {provider}, Path: {path}") # Provider: figshare, Path: data/file.txtfrom osfclient.utils import makedirs, file_empty, get_local_file_size
import os
# Create directories safely
makedirs('/path/to/new/directory', exist_ok=True)
# Check if file is empty before processing
with open('data.txt', 'rb') as f:
if file_empty(f):
print("File is empty, skipping processing")
else:
print(f"File size: {get_local_file_size(f)} bytes")
# Process file contentfrom osfclient.utils import checksum
# Calculate MD5 hash (default)
md5_hash = checksum('/path/to/file.txt')
print(f"MD5: {md5_hash}")
# Calculate SHA256 hash
sha256_hash = checksum('/path/to/file.txt', hash_type='sha256')
print(f"SHA256: {sha256_hash}")
# Custom block size for large files
large_file_hash = checksum('/path/to/large_file.bin', block_size=1024*1024) # 1MB blocks
# Verify file integrity
expected_hash = 'a1b2c3d4e5f6...'
actual_hash = checksum('downloaded_file.txt')
if actual_hash == expected_hash:
print("File integrity verified")
else:
print("File may be corrupted")from osfclient import OSF
from osfclient.exceptions import OSFException, UnauthorizedException, FolderExistsException
try:
# Authentication
osf = OSF(username='invalid', password='invalid')
if not osf.can_login:
raise UnauthorizedException("Invalid credentials")
# Project access
project = osf.project('invalid_project_id')
except UnauthorizedException as e:
print(f"Authentication error: {e}")
# Prompt for new credentials or use token auth
except OSFException as e:
print(f"OSF API error: {e}")
# Handle general OSF errors
except Exception as e:
print(f"Unexpected error: {e}")
try:
# Folder creation
storage = project.storage()
new_folder = storage.create_folder('analysis_results')
except FolderExistsException as e:
print(f"Folder already exists: {e}")
# Use exist_ok=True to avoid this exception
existing_folder = storage.create_folder('analysis_results', exist_ok=True)
except OSFException as e:
print(f"Failed to create folder: {e}")from osfclient.utils import KNOWN_PROVIDERS
# Check if storage provider is supported
def is_supported_provider(provider_name):
return provider_name in KNOWN_PROVIDERS
# Validate storage path format
def validate_storage_path(path):
provider, clean_path = split_storage(path)
if provider not in KNOWN_PROVIDERS:
raise ValueError(f"Unsupported storage provider: {provider}")
return provider, norm_remote_path(clean_path)
# Usage
try:
provider, path = validate_storage_path('github/docs/README.md')
print(f"Valid path - Provider: {provider}, Path: {path}")
except ValueError as e:
print(f"Invalid path: {e}")import os
from osfclient.utils import makedirs
# Create nested directory structure safely
base_path = os.path.expanduser('~/osf_projects')
project_path = os.path.join(base_path, 'project_123', 'data', 'raw')
# This works on Windows, macOS, and Linux
makedirs(project_path, exist_ok=True)
# Now safe to create files in the directory
file_path = os.path.join(project_path, 'experiment.csv')
with open(file_path, 'w') as f:
f.write('data,goes,here\n')Install with Tessl CLI
npx tessl i tessl/pypi-osfclient