0
# URL Conversion Utilities
1
2
Bidirectional conversion between arbitrary URLs and PackageURLs, including repository URL inference and download URL generation for various package ecosystems. These utilities enable automatic PURL generation from repository URLs and reconstruction of repository/download URLs from PURLs.
3
4
## Capabilities
5
6
### URL to PURL Conversion
7
8
Convert arbitrary URLs to PackageURL objects by pattern matching and inference.
9
10
```python { .api }
11
from packageurl.contrib.url2purl import url2purl, get_purl, purl_router
12
13
def url2purl(url):
14
"""
15
Convert a URL to a PackageURL object by inference.
16
17
Args:
18
url (str): URL to convert (repository, download, or package page URL)
19
20
Returns:
21
PackageURL | None: Inferred PackageURL object, or None if no pattern matches
22
"""
23
24
get_purl = url2purl # Alias for url2purl function
25
26
purl_router = Router()
27
# Global router instance for URL pattern matching
28
29
def purl_from_pattern(type_, pattern, url, qualifiers=None):
30
"""
31
Create PackageURL from URL using regex pattern matching.
32
33
Args:
34
type_ (str): Package type for the resulting PURL
35
pattern (str): Regular expression pattern with named groups
36
url (str): URL to match against pattern
37
qualifiers (dict, optional): Additional qualifiers to include
38
39
Returns:
40
PackageURL | None: PackageURL object if pattern matches, None otherwise
41
"""
42
43
def register_pattern(type_, pattern, router=purl_router):
44
"""
45
Register a URL pattern with its corresponding package type.
46
47
Args:
48
type_ (str): Package type for URLs matching this pattern
49
pattern (str): Regular expression pattern to match URLs
50
router (Router, optional): Router to register with (default: purl_router)
51
"""
52
53
def build_generic_purl(url):
54
"""
55
Build a generic PackageURL from an arbitrary URL when no specific pattern matches.
56
57
Args:
58
url (str): URL to convert to generic PURL
59
60
Returns:
61
PackageURL | None: Generic PackageURL object or None if URL is invalid
62
"""
63
```
64
65
### PURL to URL Conversion
66
67
Generate repository and download URLs from PackageURL objects for various package ecosystems.
68
69
```python { .api }
70
from packageurl.contrib.purl2url import (
71
get_repo_url,
72
get_download_url,
73
get_inferred_urls,
74
get_repo_download_url_by_package_type
75
)
76
77
def get_repo_url(purl):
78
"""
79
Get repository URL from a PackageURL.
80
81
Args:
82
purl (str | PackageURL): PURL string or PackageURL object
83
84
Returns:
85
str | None: Repository URL, or None if cannot be inferred
86
"""
87
88
def get_download_url(purl):
89
"""
90
Get download URL from a PackageURL.
91
92
Args:
93
purl (str | PackageURL): PURL string or PackageURL object
94
95
Returns:
96
str | None: Download URL, or None if cannot be inferred
97
"""
98
99
def get_inferred_urls(purl):
100
"""
101
Get all inferred URLs (repository and download) from a PackageURL.
102
103
Args:
104
purl (str | PackageURL): PURL string or PackageURL object
105
106
Returns:
107
list[str]: List of inferred URLs (repository URLs, download URLs, etc.)
108
"""
109
110
def get_repo_download_url_by_package_type(
111
type,
112
namespace,
113
name,
114
version,
115
archive_extension="tar.gz"
116
):
117
"""
118
Get download URL for hosted git repository by package type.
119
120
Args:
121
type (str): Package type ('github', 'bitbucket', 'gitlab')
122
namespace (str): Repository namespace/owner
123
name (str): Repository name
124
version (str): Version/tag to download
125
archive_extension (str): Archive format ('zip' or 'tar.gz', default: 'tar.gz')
126
127
Returns:
128
str | None: Download URL for the repository archive
129
130
Raises:
131
ValueError: If archive_extension is not 'zip' or 'tar.gz'
132
"""
133
```
134
135
### Routing System
136
137
Pattern-based URL routing system for extensible URL matching and processing.
138
139
```python { .api }
140
from packageurl.contrib.route import Router, NoRouteAvailable
141
142
class Router:
143
"""
144
URL routing system for pattern-based URL matching.
145
Supports regex patterns and callable routing handlers.
146
"""
147
148
def append(self, pattern, endpoint):
149
"""
150
Add a routing pattern and endpoint at the end of the route map.
151
152
Args:
153
pattern (str): Regex pattern to match URLs
154
endpoint (callable): Function to handle matched URLs
155
"""
156
157
def process(self, string, *args, **kwargs):
158
"""
159
Process a URL by finding matching pattern and executing endpoint.
160
161
Args:
162
string (str): URL to process
163
*args, **kwargs: Additional arguments passed to endpoint
164
165
Returns:
166
Result of the matched endpoint function
167
168
Raises:
169
NoRouteAvailable: If no pattern matches the URL
170
"""
171
172
def route(self, *patterns):
173
"""
174
Decorator to make a callable routed to one or more patterns.
175
176
Args:
177
*patterns (str): URL patterns to match
178
179
Returns:
180
Decorator function for registering endpoints
181
"""
182
183
class NoRouteAvailable(Exception):
184
"""Exception raised when no route matches a URL."""
185
```
186
187
## Constants
188
189
```python { .api }
190
DEFAULT_MAVEN_REPOSITORY = "https://repo.maven.apache.org/maven2"
191
# Default Maven Central repository URL
192
```
193
194
## Usage Examples
195
196
### URL to PURL Conversion
197
198
```python
199
from packageurl.contrib.url2purl import url2purl
200
201
# Convert GitHub URLs
202
github_purl = url2purl("https://github.com/package-url/packageurl-python")
203
print(github_purl)
204
# PackageURL(type='github', namespace='package-url', name='packageurl-python', version=None, qualifiers={}, subpath=None)
205
206
# Convert npm registry URLs
207
npm_purl = url2purl("https://www.npmjs.com/package/lodash")
208
print(npm_purl)
209
# PackageURL(type='npm', namespace=None, name='lodash', version=None, qualifiers={}, subpath=None)
210
211
# Convert PyPI URLs
212
pypi_purl = url2purl("https://pypi.org/project/django/")
213
print(pypi_purl)
214
# PackageURL(type='pypi', namespace=None, name='django', version=None, qualifiers={}, subpath=None)
215
```
216
217
### PURL to URL Conversion
218
219
```python
220
from packageurl.contrib.purl2url import get_repo_url, get_download_url, get_inferred_urls
221
222
# Get repository URL
223
repo_url = get_repo_url("pkg:github/django/django@4.2.0")
224
print(repo_url)
225
# "https://github.com/django/django"
226
227
# Get download URL
228
download_url = get_download_url("pkg:pypi/requests@2.28.0")
229
print(download_url)
230
# "https://pypi.org/project/requests/2.28.0/#files"
231
232
# Get all inferred URLs
233
all_urls = get_inferred_urls("pkg:npm/lodash@4.17.21")
234
print(all_urls)
235
# ["https://www.npmjs.com/package/lodash", "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz"]
236
```
237
238
### Repository Archive Downloads
239
240
```python
241
from packageurl.contrib.purl2url import get_repo_download_url_by_package_type
242
243
# Get GitHub archive download URL
244
github_zip = get_repo_download_url_by_package_type(
245
"github", "microsoft", "typescript", "v4.8.0", "zip"
246
)
247
print(github_zip)
248
# "https://github.com/microsoft/typescript/archive/v4.8.0.zip"
249
250
# Get GitLab archive download URL
251
gitlab_tar = get_repo_download_url_by_package_type(
252
"gitlab", "gitlab-org", "gitlab", "v15.0.0", "tar.gz"
253
)
254
print(gitlab_tar)
255
# "https://gitlab.com/gitlab-org/gitlab/-/archive/v15.0.0/gitlab-v15.0.0.tar.gz"
256
257
# Get Bitbucket archive download URL
258
bitbucket_tar = get_repo_download_url_by_package_type(
259
"bitbucket", "atlassian", "stash", "v7.0.0"
260
)
261
print(bitbucket_tar)
262
# "https://bitbucket.org/atlassian/stash/get/v7.0.0.tar.gz"
263
```
264
265
### Custom Routing
266
267
```python
268
from packageurl.contrib.route import Router, NoRouteAvailable
269
from packageurl import PackageURL
270
import re
271
272
# Create custom router
273
custom_router = Router()
274
275
# Add custom route handler
276
def handle_custom_registry(url):
277
"""Handle URLs from a custom package registry."""
278
match = re.search(r'/packages/([^/]+)/([^/]+)', url)
279
if match:
280
namespace, name = match.groups()
281
return PackageURL(type="custom", namespace=namespace, name=name)
282
return None
283
284
custom_router.append(r'https://custom-registry\.com/packages/', handle_custom_registry)
285
286
# Use custom router
287
try:
288
purl = custom_router.process("https://custom-registry.com/packages/myorg/mypackage")
289
print(purl)
290
except NoRouteAvailable:
291
print("No route found for URL")
292
```
293
294
### Ecosystem-Specific Examples
295
296
```python
297
# RubyGems
298
gem_repo = get_repo_url("pkg:gem/rails@7.0.0")
299
gem_download = get_download_url("pkg:gem/rails@7.0.0")
300
301
# Maven
302
maven_repo = get_repo_url("pkg:maven/org.springframework/spring-core@5.3.21")
303
maven_download = get_download_url("pkg:maven/org.springframework/spring-core@5.3.21")
304
305
# Docker
306
docker_urls = get_inferred_urls("pkg:docker/library/nginx@1.21.0")
307
308
# Cargo (Rust)
309
cargo_repo = get_repo_url("pkg:cargo/serde@1.0.136")
310
cargo_download = get_download_url("pkg:cargo/serde@1.0.136")
311
```