CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-packageurl-python

A purl aka. Package URL parser and builder

Pending
Overview
Eval results
Files

url-conversion.mddocs/

URL Conversion Utilities

Bidirectional conversion between arbitrary URLs and PackageURLs, including repository URL inference and download URL generation for various package ecosystems. These utilities enable automatic PURL generation from repository URLs and reconstruction of repository/download URLs from PURLs.

Capabilities

URL to PURL Conversion

Convert arbitrary URLs to PackageURL objects by pattern matching and inference.

from packageurl.contrib.url2purl import url2purl, get_purl, purl_router

def url2purl(url):
    """
    Convert a URL to a PackageURL object by inference.
    
    Args:
        url (str): URL to convert (repository, download, or package page URL)
        
    Returns:
        PackageURL | None: Inferred PackageURL object, or None if no pattern matches
    """

get_purl = url2purl  # Alias for url2purl function

purl_router = Router()
# Global router instance for URL pattern matching

def purl_from_pattern(type_, pattern, url, qualifiers=None):
    """
    Create PackageURL from URL using regex pattern matching.
    
    Args:
        type_ (str): Package type for the resulting PURL
        pattern (str): Regular expression pattern with named groups
        url (str): URL to match against pattern
        qualifiers (dict, optional): Additional qualifiers to include
        
    Returns:
        PackageURL | None: PackageURL object if pattern matches, None otherwise
    """

def register_pattern(type_, pattern, router=purl_router):
    """
    Register a URL pattern with its corresponding package type.
    
    Args:
        type_ (str): Package type for URLs matching this pattern
        pattern (str): Regular expression pattern to match URLs
        router (Router, optional): Router to register with (default: purl_router)
    """

def build_generic_purl(url):
    """
    Build a generic PackageURL from an arbitrary URL when no specific pattern matches.
    
    Args:
        url (str): URL to convert to generic PURL
        
    Returns:
        PackageURL | None: Generic PackageURL object or None if URL is invalid
    """

PURL to URL Conversion

Generate repository and download URLs from PackageURL objects for various package ecosystems.

from packageurl.contrib.purl2url import (
    get_repo_url, 
    get_download_url, 
    get_inferred_urls,
    get_repo_download_url_by_package_type
)

def get_repo_url(purl):
    """
    Get repository URL from a PackageURL.
    
    Args:
        purl (str | PackageURL): PURL string or PackageURL object
        
    Returns:
        str | None: Repository URL, or None if cannot be inferred
    """

def get_download_url(purl):
    """
    Get download URL from a PackageURL.
    
    Args:
        purl (str | PackageURL): PURL string or PackageURL object
        
    Returns:
        str | None: Download URL, or None if cannot be inferred
    """

def get_inferred_urls(purl):
    """
    Get all inferred URLs (repository and download) from a PackageURL.
    
    Args:
        purl (str | PackageURL): PURL string or PackageURL object
        
    Returns:
        list[str]: List of inferred URLs (repository URLs, download URLs, etc.)
    """

def get_repo_download_url_by_package_type(
    type, 
    namespace, 
    name, 
    version, 
    archive_extension="tar.gz"
):
    """
    Get download URL for hosted git repository by package type.
    
    Args:
        type (str): Package type ('github', 'bitbucket', 'gitlab')
        namespace (str): Repository namespace/owner
        name (str): Repository name
        version (str): Version/tag to download
        archive_extension (str): Archive format ('zip' or 'tar.gz', default: 'tar.gz')
        
    Returns:
        str | None: Download URL for the repository archive
        
    Raises:
        ValueError: If archive_extension is not 'zip' or 'tar.gz'
    """

Routing System

Pattern-based URL routing system for extensible URL matching and processing.

from packageurl.contrib.route import Router, NoRouteAvailable

class Router:
    """
    URL routing system for pattern-based URL matching.
    Supports regex patterns and callable routing handlers.
    """
    
    def append(self, pattern, endpoint):
        """
        Add a routing pattern and endpoint at the end of the route map.
        
        Args:
            pattern (str): Regex pattern to match URLs
            endpoint (callable): Function to handle matched URLs
        """
    
    def process(self, string, *args, **kwargs):
        """
        Process a URL by finding matching pattern and executing endpoint.
        
        Args:
            string (str): URL to process
            *args, **kwargs: Additional arguments passed to endpoint
            
        Returns:
            Result of the matched endpoint function
            
        Raises:
            NoRouteAvailable: If no pattern matches the URL
        """
    
    def route(self, *patterns):
        """
        Decorator to make a callable routed to one or more patterns.
        
        Args:
            *patterns (str): URL patterns to match
            
        Returns:
            Decorator function for registering endpoints
        """

class NoRouteAvailable(Exception):
    """Exception raised when no route matches a URL."""

Constants

DEFAULT_MAVEN_REPOSITORY = "https://repo.maven.apache.org/maven2"
# Default Maven Central repository URL

Usage Examples

URL to PURL Conversion

from packageurl.contrib.url2purl import url2purl

# Convert GitHub URLs
github_purl = url2purl("https://github.com/package-url/packageurl-python")
print(github_purl)
# PackageURL(type='github', namespace='package-url', name='packageurl-python', version=None, qualifiers={}, subpath=None)

# Convert npm registry URLs
npm_purl = url2purl("https://www.npmjs.com/package/lodash")
print(npm_purl)
# PackageURL(type='npm', namespace=None, name='lodash', version=None, qualifiers={}, subpath=None)

# Convert PyPI URLs
pypi_purl = url2purl("https://pypi.org/project/django/")
print(pypi_purl) 
# PackageURL(type='pypi', namespace=None, name='django', version=None, qualifiers={}, subpath=None)

PURL to URL Conversion

from packageurl.contrib.purl2url import get_repo_url, get_download_url, get_inferred_urls

# Get repository URL
repo_url = get_repo_url("pkg:github/django/django@4.2.0")
print(repo_url)
# "https://github.com/django/django"

# Get download URL
download_url = get_download_url("pkg:pypi/requests@2.28.0")
print(download_url)
# "https://pypi.org/project/requests/2.28.0/#files"

# Get all inferred URLs
all_urls = get_inferred_urls("pkg:npm/lodash@4.17.21")
print(all_urls)
# ["https://www.npmjs.com/package/lodash", "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz"]

Repository Archive Downloads

from packageurl.contrib.purl2url import get_repo_download_url_by_package_type

# Get GitHub archive download URL
github_zip = get_repo_download_url_by_package_type(
    "github", "microsoft", "typescript", "v4.8.0", "zip"
)
print(github_zip)
# "https://github.com/microsoft/typescript/archive/v4.8.0.zip"

# Get GitLab archive download URL
gitlab_tar = get_repo_download_url_by_package_type(
    "gitlab", "gitlab-org", "gitlab", "v15.0.0", "tar.gz"
)
print(gitlab_tar)
# "https://gitlab.com/gitlab-org/gitlab/-/archive/v15.0.0/gitlab-v15.0.0.tar.gz"

# Get Bitbucket archive download URL
bitbucket_tar = get_repo_download_url_by_package_type(
    "bitbucket", "atlassian", "stash", "v7.0.0"
)
print(bitbucket_tar)
# "https://bitbucket.org/atlassian/stash/get/v7.0.0.tar.gz"

Custom Routing

from packageurl.contrib.route import Router, NoRouteAvailable
from packageurl import PackageURL
import re

# Create custom router
custom_router = Router()

# Add custom route handler
def handle_custom_registry(url):
    """Handle URLs from a custom package registry."""
    match = re.search(r'/packages/([^/]+)/([^/]+)', url)
    if match:
        namespace, name = match.groups()
        return PackageURL(type="custom", namespace=namespace, name=name)
    return None

custom_router.append(r'https://custom-registry\.com/packages/', handle_custom_registry)

# Use custom router
try:
    purl = custom_router.process("https://custom-registry.com/packages/myorg/mypackage")
    print(purl)
except NoRouteAvailable:
    print("No route found for URL")

Ecosystem-Specific Examples

# RubyGems
gem_repo = get_repo_url("pkg:gem/rails@7.0.0")
gem_download = get_download_url("pkg:gem/rails@7.0.0")

# Maven
maven_repo = get_repo_url("pkg:maven/org.springframework/spring-core@5.3.21")
maven_download = get_download_url("pkg:maven/org.springframework/spring-core@5.3.21")

# Docker
docker_urls = get_inferred_urls("pkg:docker/library/nginx@1.21.0")

# Cargo (Rust)
cargo_repo = get_repo_url("pkg:cargo/serde@1.0.136")
cargo_download = get_download_url("pkg:cargo/serde@1.0.136")

Install with Tessl CLI

npx tessl i tessl/pypi-packageurl-python

docs

core-operations.md

ecosystem-utilities.md

framework-integrations.md

index.md

url-conversion.md

tile.json