A python client for the GitHub API
—
Pagination module providing utilities for handling paginated GitHub API responses and parsing pagination headers efficiently.
Convert GitHub API operations into iterators for handling large result sets.
def paged(oper, *args, per_page=30, max_pages=9999, **kwargs):
"""
Convert API operation into an iterator for pagination.
Parameters:
- oper: callable, GitHub API operation function
- *args: positional arguments for the operation
- per_page: int, number of items per page (default 30)
- max_pages: int, maximum pages to fetch (default 9999)
- **kwargs: keyword arguments for the operation
Yields:
API response objects for each page
"""Parse RFC 5988 link headers from GitHub API responses.
def parse_link_hdr(header):
"""
Parse RFC 5988 link header from GitHub API response.
Parameters:
- header: str, link header value
Returns:
list: List of tuples containing (url, attributes_dict)
"""
def pages(oper, n_pages, *args, n_workers=None, per_page=100, **kwargs):
"""
Get multiple pages from an operation in parallel.
Parameters:
- oper: callable, GitHub API operation function
- n_pages: int, number of pages to retrieve
- *args: positional arguments for the operation
- n_workers: int, number of parallel workers (defaults to n_pages)
- per_page: int, items per page
- **kwargs: keyword arguments for the operation
Returns:
list: List of page results retrieved in parallel
"""from ghapi.all import GhApi, paged
api = GhApi(token='your_token')
# Paginate through all issues in a repository
for page in paged(api.issues.list_for_repo, owner='user', repo='repo', per_page=50):
for issue in page:
print(f"#{issue.number}: {issue.title}")
# Limit to first 5 pages
for page in paged(api.repos.list_for_user, username='user', per_page=100, max_pages=5):
for repo in page:
print(f"{repo.full_name}: {repo.stargazers_count} stars")from ghapi.all import GhApi, paged
api = GhApi(token='your_token')
# Get all repositories for an organization
all_repos = []
for page in paged(api.repos.list_for_org, org='organization', per_page=100):
all_repos.extend(page)
print(f"Total repositories: {len(all_repos)}")
# Process repositories in batches
for page in paged(api.repos.list_for_org, org='org', per_page=25):
print(f"Processing batch of {len(page)} repositories:")
for repo in page:
print(f" - {repo.name} ({repo.language})")
# Get multiple pages in parallel
from ghapi.all import pages
page_results = pages(api.repos.list_for_org, 5, org='organization', per_page=20)
total_repos = sum(len(page) for page in page_results)
print(f"Retrieved {total_repos} repositories across 5 pages in parallel")from ghapi.all import GhApi, paged
api = GhApi(token='your_token', owner='user', repo='repo')
# Get all open issues
open_issues = []
for page in paged(api.issues.list_for_repo, state='open', per_page=100):
open_issues.extend(page)
print(f"Open issues: {len(open_issues)}")
# Get all pull requests with specific label
for page in paged(api.pulls.list, state='all', per_page=50):
for pr in page:
if any(label.name == 'enhancement' for label in pr.labels):
print(f"Enhancement PR: #{pr.number} - {pr.title}")
# Get issue comments across all issues
for page in paged(api.issues.list_comments_for_repo, per_page=100):
for comment in page:
print(f"Comment by {comment.user.login}: {comment.body[:50]}...")from ghapi.all import GhApi, paged
api = GhApi(token='your_token')
# Get all commits in repository
commit_count = 0
for page in paged(api.repos.list_commits, owner='user', repo='repo', per_page=100):
commit_count += len(page)
for commit in page:
print(f"{commit.sha[:8]}: {commit.commit.message.split('\\n')[0]}")
print(f"Total commits: {commit_count}")
# Get repository events
for page in paged(api.activity.list_repo_events, owner='user', repo='repo', per_page=50):
for event in page:
print(f"{event.type}: {event.actor.login} at {event.created_at}")from ghapi.all import GhApi, paged
api = GhApi(token='your_token')
# Get all followers for a user
followers = []
for page in paged(api.users.list_followers_for_user, username='user', per_page=100):
followers.extend(page)
print(f"Total followers: {len(followers)}")
# Get organization members
for page in paged(api.orgs.list_members, org='organization', per_page=50):
for member in page:
print(f"Member: {member.login}")
# Get user's starred repositories
starred_repos = []
for page in paged(api.activity.list_repos_starred_by_user, username='user', per_page=100):
starred_repos.extend(page)
print(f"Starred repositories: {len(starred_repos)}")from ghapi.all import GhApi, paged
from datetime import datetime, timedelta
api = GhApi(token='your_token')
# Get recent issues (last 30 days)
thirty_days_ago = datetime.now() - timedelta(days=30)
recent_issues = []
for page in paged(api.issues.list_for_repo,
owner='user', repo='repo',
state='all',
since=thirty_days_ago.isoformat(),
per_page=100):
recent_issues.extend(page)
print(f"Issues in last 30 days: {len(recent_issues)}")
# Get pull requests by author
author_prs = []
for page in paged(api.pulls.list, owner='user', repo='repo', state='all', per_page=100):
for pr in page:
if pr.user.login == 'specific_author':
author_prs.append(pr)
print(f"PRs by author: {len(author_prs)}")from ghapi.all import parse_link_hdr
# Example link header from GitHub API
link_header = '<https://api.github.com/repos/user/repo/issues?page=2>; rel="next", <https://api.github.com/repos/user/repo/issues?page=5>; rel="last"'
# Parse the header
links = parse_link_hdr(link_header)
for url, attrs in links:
print(f"URL: {url}")
for key, value in attrs.items():
print(f" {key}: {value}")
# Extract specific relation types
next_url = None
last_url = None
for url, attrs in links:
if attrs.get('rel') == 'next':
next_url = url
elif attrs.get('rel') == 'last':
last_url = url
if next_url:
print(f"Next page: {next_url}")
if last_url:
print(f"Last page: {last_url}")from ghapi.all import GhApi, paged
api = GhApi(token='your_token')
def collect_all_data(operation_func, *args, **kwargs):
"""Collect all paginated data into a single list."""
all_data = []
for page in paged(operation_func, *args, per_page=100, **kwargs):
all_data.extend(page)
print(f"Collected {len(all_data)} items so far...")
return all_data
# Use custom collector
all_issues = collect_all_data(
api.issues.list_for_repo,
owner='user',
repo='repo',
state='all'
)
print(f"Total issues collected: {len(all_issues)}")
def process_in_chunks(operation_func, chunk_processor, *args, **kwargs):
"""Process paginated data in chunks."""
for page_num, page in enumerate(paged(operation_func, *args, per_page=50, **kwargs)):
print(f"Processing page {page_num + 1}...")
chunk_processor(page)
def process_repo_chunk(repos):
"""Process a chunk of repositories."""
for repo in repos:
if repo.stargazers_count > 100:
print(f"Popular repo: {repo.full_name} ({repo.stargazers_count} stars)")
# Process repositories in chunks
process_in_chunks(
api.repos.list_for_org,
process_repo_chunk,
org='organization'
)from ghapi.all import GhApi, paged
import time
api = GhApi(token='your_token')
def safe_paginate(operation_func, *args, retry_count=3, **kwargs):
"""Paginate with error handling and retries."""
page_num = 0
for page in paged(operation_func, *args, per_page=100, **kwargs):
page_num += 1
retries = 0
while retries < retry_count:
try:
# Process the page
yield page
break
except Exception as e:
retries += 1
print(f"Error processing page {page_num}, retry {retries}: {e}")
if retries < retry_count:
time.sleep(2 ** retries) # Exponential backoff
else:
print(f"Failed to process page {page_num} after {retry_count} retries")
raise
# Use safe pagination
try:
for page in safe_paginate(api.repos.list_for_org, org='large_org'):
for repo in page:
print(f"Processing: {repo.name}")
except Exception as e:
print(f"Pagination failed: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-ghapi