tessl/pypi-ghapi

A python client for the GitHub API

—

Pending

Overview

Eval results

Files

Pagination

Name: tessl/pypi-ghapi
Author: tessl

Pagination module providing utilities for handling paginated GitHub API responses and parsing pagination headers efficiently.

Capabilities

Paginated Operations

Convert GitHub API operations into iterators for handling large result sets.

def paged(oper, *args, per_page=30, max_pages=9999, **kwargs):
    """
    Convert API operation into an iterator for pagination.
    
    Parameters:
    - oper: callable, GitHub API operation function
    - *args: positional arguments for the operation
    - per_page: int, number of items per page (default 30)
    - max_pages: int, maximum pages to fetch (default 9999)
    - **kwargs: keyword arguments for the operation
    
    Yields:
    API response objects for each page
    """

Link Header Parsing

Parse RFC 5988 link headers from GitHub API responses.

def parse_link_hdr(header):
    """
    Parse RFC 5988 link header from GitHub API response.
    
    Parameters:
    - header: str, link header value
    
    Returns:
    list: List of tuples containing (url, attributes_dict)
    """

def pages(oper, n_pages, *args, n_workers=None, per_page=100, **kwargs):
    """
    Get multiple pages from an operation in parallel.
    
    Parameters:
    - oper: callable, GitHub API operation function
    - n_pages: int, number of pages to retrieve
    - *args: positional arguments for the operation
    - n_workers: int, number of parallel workers (defaults to n_pages)
    - per_page: int, items per page
    - **kwargs: keyword arguments for the operation
    
    Returns:
    list: List of page results retrieved in parallel
    """

Usage Examples

Basic Pagination

from ghapi.all import GhApi, paged

api = GhApi(token='your_token')

# Paginate through all issues in a repository
for page in paged(api.issues.list_for_repo, owner='user', repo='repo', per_page=50):
    for issue in page:
        print(f"#{issue.number}: {issue.title}")

# Limit to first 5 pages
for page in paged(api.repos.list_for_user, username='user', per_page=100, max_pages=5):
    for repo in page:
        print(f"{repo.full_name}: {repo.stargazers_count} stars")

Repository Pagination

from ghapi.all import GhApi, paged

api = GhApi(token='your_token')

# Get all repositories for an organization
all_repos = []
for page in paged(api.repos.list_for_org, org='organization', per_page=100):
    all_repos.extend(page)

print(f"Total repositories: {len(all_repos)}")

# Process repositories in batches
for page in paged(api.repos.list_for_org, org='org', per_page=25):
    print(f"Processing batch of {len(page)} repositories:")
    for repo in page:
        print(f"  - {repo.name} ({repo.language})")

# Get multiple pages in parallel
from ghapi.all import pages
page_results = pages(api.repos.list_for_org, 5, org='organization', per_page=20)
total_repos = sum(len(page) for page in page_results)
print(f"Retrieved {total_repos} repositories across 5 pages in parallel")

Issue and Pull Request Pagination

from ghapi.all import GhApi, paged

api = GhApi(token='your_token', owner='user', repo='repo')

# Get all open issues
open_issues = []
for page in paged(api.issues.list_for_repo, state='open', per_page=100):
    open_issues.extend(page)

print(f"Open issues: {len(open_issues)}")

# Get all pull requests with specific label
for page in paged(api.pulls.list, state='all', per_page=50):
    for pr in page:
        if any(label.name == 'enhancement' for label in pr.labels):
            print(f"Enhancement PR: #{pr.number} - {pr.title}")

# Get issue comments across all issues
for page in paged(api.issues.list_comments_for_repo, per_page=100):
    for comment in page:
        print(f"Comment by {comment.user.login}: {comment.body[:50]}...")

Commit and Event Pagination

from ghapi.all import GhApi, paged

api = GhApi(token='your_token')

# Get all commits in repository
commit_count = 0
for page in paged(api.repos.list_commits, owner='user', repo='repo', per_page=100):
    commit_count += len(page)
    for commit in page:
        print(f"{commit.sha[:8]}: {commit.commit.message.split('\\n')[0]}")

print(f"Total commits: {commit_count}")

# Get repository events  
for page in paged(api.activity.list_repo_events, owner='user', repo='repo', per_page=50):
    for event in page:
        print(f"{event.type}: {event.actor.login} at {event.created_at}")

User and Organization Data

from ghapi.all import GhApi, paged

api = GhApi(token='your_token')

# Get all followers for a user
followers = []
for page in paged(api.users.list_followers_for_user, username='user', per_page=100):
    followers.extend(page)

print(f"Total followers: {len(followers)}")

# Get organization members
for page in paged(api.orgs.list_members, org='organization', per_page=50):
    for member in page:
        print(f"Member: {member.login}")

# Get user's starred repositories
starred_repos = []
for page in paged(api.activity.list_repos_starred_by_user, username='user', per_page=100):
    starred_repos.extend(page)

print(f"Starred repositories: {len(starred_repos)}")

Advanced Pagination with Filtering

from ghapi.all import GhApi, paged
from datetime import datetime, timedelta

api = GhApi(token='your_token')

# Get recent issues (last 30 days)
thirty_days_ago = datetime.now() - timedelta(days=30)
recent_issues = []

for page in paged(api.issues.list_for_repo, 
                  owner='user', repo='repo',
                  state='all', 
                  since=thirty_days_ago.isoformat(),
                  per_page=100):
    recent_issues.extend(page)

print(f"Issues in last 30 days: {len(recent_issues)}")

# Get pull requests by author
author_prs = []
for page in paged(api.pulls.list, owner='user', repo='repo', state='all', per_page=100):
    for pr in page:
        if pr.user.login == 'specific_author':
            author_prs.append(pr)

print(f"PRs by author: {len(author_prs)}")

Link Header Parsing

from ghapi.all import parse_link_hdr

# Example link header from GitHub API
link_header = '<https://api.github.com/repos/user/repo/issues?page=2>; rel="next", <https://api.github.com/repos/user/repo/issues?page=5>; rel="last"'

# Parse the header
links = parse_link_hdr(link_header)
for url, attrs in links:
    print(f"URL: {url}")
    for key, value in attrs.items():
        print(f"  {key}: {value}")

# Extract specific relation types
next_url = None
last_url = None
for url, attrs in links:
    if attrs.get('rel') == 'next':
        next_url = url
    elif attrs.get('rel') == 'last':
        last_url = url

if next_url:
    print(f"Next page: {next_url}")
if last_url:
    print(f"Last page: {last_url}")

Custom Pagination Logic

from ghapi.all import GhApi, paged

api = GhApi(token='your_token')

def collect_all_data(operation_func, *args, **kwargs):
    """Collect all paginated data into a single list."""
    all_data = []
    for page in paged(operation_func, *args, per_page=100, **kwargs):
        all_data.extend(page)
        print(f"Collected {len(all_data)} items so far...")
    return all_data

# Use custom collector
all_issues = collect_all_data(
    api.issues.list_for_repo,
    owner='user',
    repo='repo',
    state='all'
)

print(f"Total issues collected: {len(all_issues)}")

def process_in_chunks(operation_func, chunk_processor, *args, **kwargs):
    """Process paginated data in chunks."""
    for page_num, page in enumerate(paged(operation_func, *args, per_page=50, **kwargs)):
        print(f"Processing page {page_num + 1}...")
        chunk_processor(page)

def process_repo_chunk(repos):
    """Process a chunk of repositories."""
    for repo in repos:
        if repo.stargazers_count > 100:
            print(f"Popular repo: {repo.full_name} ({repo.stargazers_count} stars)")

# Process repositories in chunks
process_in_chunks(
    api.repos.list_for_org,
    process_repo_chunk,
    org='organization'
)

Error Handling with Pagination

from ghapi.all import GhApi, paged
import time

api = GhApi(token='your_token')

def safe_paginate(operation_func, *args, retry_count=3, **kwargs):
    """Paginate with error handling and retries."""
    page_num = 0
    for page in paged(operation_func, *args, per_page=100, **kwargs):
        page_num += 1
        retries = 0
        
        while retries < retry_count:
            try:
                # Process the page
                yield page
                break
            except Exception as e:
                retries += 1
                print(f"Error processing page {page_num}, retry {retries}: {e}")
                if retries < retry_count:
                    time.sleep(2 ** retries)  # Exponential backoff
                else:
                    print(f"Failed to process page {page_num} after {retry_count} retries")
                    raise

# Use safe pagination
try:
    for page in safe_paginate(api.repos.list_for_org, org='large_org'):
        for repo in page:
            print(f"Processing: {repo.name}")
except Exception as e:
    print(f"Pagination failed: {e}")

Install with Tessl CLI