High performance Python library for data extraction, analysis, conversion & manipulation of PDF and other documents.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Coordinate system handling with matrices, rectangles, points, and quads for precise positioning and transformations. PyMuPDF provides comprehensive geometry classes essential for layout manipulation, coordinate calculations, and spatial operations.
2D transformation matrices for scaling, rotation, translation, and general coordinate transformations.
class Matrix:
def __init__(self, a: float = 1.0, b: float = 0.0, c: float = 0.0,
d: float = 1.0, e: float = 0.0, f: float = 0.0):
"""
Create transformation matrix.
Parameters:
- a, b, c, d, e, f: matrix coefficients [a c e; b d f; 0 0 1]
Default creates identity matrix
"""
def prerotate(self, deg: float) -> Matrix:
"""
Pre-multiply with rotation matrix.
Parameters:
- deg: rotation angle in degrees (counterclockwise)
Returns:
Self for method chaining
"""
def prescale(self, sx: float, sy: float = None) -> Matrix:
"""
Pre-multiply with scaling matrix.
Parameters:
- sx: x-direction scale factor
- sy: y-direction scale factor (defaults to sx for uniform scaling)
Returns:
Self for method chaining
"""
def pretranslate(self, tx: float, ty: float) -> Matrix:
"""
Pre-multiply with translation matrix.
Parameters:
- tx: x-direction translation
- ty: y-direction translation
Returns:
Self for method chaining
"""
def preshear(self, sx: float, sy: float) -> Matrix:
"""
Pre-multiply with shear matrix.
Parameters:
- sx: x-direction shear factor
- sy: y-direction shear factor
Returns:
Self for method chaining
"""
def concat(self, matrix: Matrix) -> Matrix:
"""
Concatenate with another matrix.
Parameters:
- matrix: matrix to concatenate
Returns:
Self for method chaining
"""
def invert(self) -> Matrix:
"""
Invert the matrix.
Returns:
Inverted Matrix object
Raises:
RuntimeError if matrix is not invertible
"""
def norm(self) -> float:
"""
Calculate matrix norm (Euclidean length).
Returns:
Matrix norm value
"""
@property
def a(self) -> float:
"""Matrix coefficient a (x-scale)."""
@property
def b(self) -> float:
"""Matrix coefficient b (y-skew)."""
@property
def c(self) -> float:
"""Matrix coefficient c (x-skew)."""
@property
def d(self) -> float:
"""Matrix coefficient d (y-scale)."""
@property
def e(self) -> float:
"""Matrix coefficient e (x-translation)."""
@property
def f(self) -> float:
"""Matrix coefficient f (y-translation)."""
@property
def is_rectilinear(self) -> bool:
"""True if matrix preserves axis alignment."""
# Identity matrix constant
IdentityMatrix: MatrixRectangle representation with comprehensive geometric operations.
class Rect:
def __init__(self, x0: float, y0: float, x1: float, y1: float):
"""
Create rectangle from coordinates.
Parameters:
- x0: left coordinate
- y0: top coordinate
- x1: right coordinate
- y1: bottom coordinate
"""
def normalize(self) -> Rect:
"""
Normalize rectangle (ensure x0 <= x1 and y0 <= y1).
Returns:
Self for method chaining
"""
def transform(self, matrix: Matrix) -> Rect:
"""
Transform rectangle by matrix.
Parameters:
- matrix: transformation matrix
Returns:
New transformed Rect object
"""
def intersect(self, rect: Rect) -> Rect:
"""
Calculate intersection with another rectangle.
Parameters:
- rect: rectangle to intersect with
Returns:
Intersection Rect (may be empty)
"""
def include_point(self, point: Point) -> Rect:
"""
Expand rectangle to include point.
Parameters:
- point: point to include
Returns:
Self for method chaining
"""
def include_rect(self, rect: Rect) -> Rect:
"""
Expand rectangle to include another rectangle.
Parameters:
- rect: rectangle to include
Returns:
Self for method chaining
"""
def round(self) -> IRect:
"""
Round coordinates to integers.
Returns:
IRect with integer coordinates
"""
def morph(self, fixpoint: Point, matrix: Matrix) -> Rect:
"""
Transform around a fixed point.
Parameters:
- fixpoint: fixed point for transformation
- matrix: transformation matrix
Returns:
New transformed Rect object
"""
@property
def x0(self) -> float:
"""Left coordinate."""
@property
def y0(self) -> float:
"""Top coordinate."""
@property
def x1(self) -> float:
"""Right coordinate."""
@property
def y1(self) -> float:
"""Bottom coordinate."""
@property
def width(self) -> float:
"""Rectangle width."""
@property
def height(self) -> float:
"""Rectangle height."""
@property
def tl(self) -> Point:
"""Top-left corner point."""
@property
def tr(self) -> Point:
"""Top-right corner point."""
@property
def bl(self) -> Point:
"""Bottom-left corner point."""
@property
def br(self) -> Point:
"""Bottom-right corner point."""
@property
def quad(self) -> Quad:
"""Rectangle as Quad object."""
@property
def is_empty(self) -> bool:
"""True if rectangle is empty."""
@property
def is_infinite(self) -> bool:
"""True if rectangle is infinite."""
# Rectangle constants
EMPTY_RECT: Rect # Empty rectangle
INFINITE_RECT: Rect # Infinite rectangleInteger-coordinate rectangle for pixel-perfect operations.
class IRect:
def __init__(self, x0: int, y0: int, x1: int, y1: int):
"""
Create integer rectangle.
Parameters:
- x0: left coordinate
- y0: top coordinate
- x1: right coordinate
- y1: bottom coordinate
"""
def normalize(self) -> IRect:
"""
Normalize rectangle coordinates.
Returns:
Self for method chaining
"""
def intersect(self, irect: IRect) -> IRect:
"""
Calculate intersection with another integer rectangle.
Parameters:
- irect: rectangle to intersect with
Returns:
Intersection IRect
"""
def include_point(self, point: Point) -> IRect:
"""
Expand to include point.
Parameters:
- point: point to include
Returns:
Self for method chaining
"""
def include_rect(self, irect: IRect) -> IRect:
"""
Expand to include another rectangle.
Parameters:
- irect: rectangle to include
Returns:
Self for method chaining
"""
@property
def x0(self) -> int:
"""Left coordinate."""
@property
def y0(self) -> int:
"""Top coordinate."""
@property
def x1(self) -> int:
"""Right coordinate."""
@property
def y1(self) -> int:
"""Bottom coordinate."""
@property
def width(self) -> int:
"""Rectangle width."""
@property
def height(self) -> int:
"""Rectangle height."""
@property
def rect(self) -> Rect:
"""Convert to float Rect."""
@property
def is_empty(self) -> bool:
"""True if rectangle is empty."""
@property
def is_infinite(self) -> bool:
"""True if rectangle is infinite."""
# Integer rectangle constants
EMPTY_IRECT: IRect # Empty integer rectangle
INFINITE_IRECT: IRect # Infinite integer rectangle2D point representation with distance and transformation capabilities.
class Point:
def __init__(self, x: float, y: float):
"""
Create point.
Parameters:
- x: x coordinate
- y: y coordinate
"""
def distance_to(self, point: Point) -> float:
"""
Calculate distance to another point.
Parameters:
- point: target point
Returns:
Euclidean distance
"""
def transform(self, matrix: Matrix) -> Point:
"""
Transform point by matrix.
Parameters:
- matrix: transformation matrix
Returns:
New transformed Point object
"""
def unit_vector(self, point: Point) -> Point:
"""
Calculate unit vector to another point.
Parameters:
- point: target point
Returns:
Unit vector Point
"""
@property
def x(self) -> float:
"""X coordinate."""
@property
def y(self) -> float:
"""Y coordinate."""Four-sided polygon representation for text highlighting and selections.
class Quad:
def __init__(self, ul: Point, ur: Point, ll: Point, lr: Point):
"""
Create quadrilateral from four corner points.
Parameters:
- ul: upper-left point
- ur: upper-right point
- ll: lower-left point
- lr: lower-right point
"""
def transform(self, matrix: Matrix) -> Quad:
"""
Transform quadrilateral by matrix.
Parameters:
- matrix: transformation matrix
Returns:
New transformed Quad object
"""
def morph(self, fixpoint: Point, matrix: Matrix) -> Quad:
"""
Transform around fixed point.
Parameters:
- fixpoint: transformation center
- matrix: transformation matrix
Returns:
New transformed Quad object
"""
@property
def ul(self) -> Point:
"""Upper-left corner point."""
@property
def ur(self) -> Point:
"""Upper-right corner point."""
@property
def ll(self) -> Point:
"""Lower-left corner point."""
@property
def lr(self) -> Point:
"""Lower-right corner point."""
@property
def rect(self) -> Rect:
"""Bounding rectangle of quadrilateral."""
@property
def is_empty(self) -> bool:
"""True if quadrilateral is empty."""
@property
def is_convex(self) -> bool:
"""True if quadrilateral is convex."""
@property
def is_rectangular(self) -> bool:
"""True if quadrilateral is rectangular."""
# Quad constants
EMPTY_QUAD: Quad # Empty quadrilateral
INFINITE_QUAD: Quad # Infinite quadrilateralimport pymupdf
# Create identity matrix
mat = pymupdf.Matrix()
print(f"Identity: {mat.a}, {mat.d}") # Should be 1, 1
# Scale by 2x
mat.prescale(2.0)
print(f"Scaled: {mat.a}, {mat.d}") # Should be 2, 2
# Rotate by 45 degrees
mat.prerotate(45)
# Translate by (100, 50)
mat.pretranslate(100, 50)
# Transform a point
point = pymupdf.Point(0, 0)
transformed_point = point.transform(mat)
print(f"Transformed point: ({transformed_point.x}, {transformed_point.y})")import pymupdf
# Create rectangle
rect = pymupdf.Rect(10, 10, 100, 50)
print(f"Original: {rect.width} x {rect.height}")
# Scale rectangle
scale_matrix = pymupdf.Matrix(2, 1.5) # 2x width, 1.5x height
scaled_rect = rect.transform(scale_matrix)
print(f"Scaled: {scaled_rect.width} x {scaled_rect.height}")
# Find intersection
rect1 = pymupdf.Rect(0, 0, 100, 100)
rect2 = pymupdf.Rect(50, 50, 150, 150)
intersection = rect1.intersect(rect2)
print(f"Intersection: {intersection}")
# Include point to expand rectangle
rect = pymupdf.Rect(0, 0, 100, 100)
point = pymupdf.Point(200, 200)
expanded = rect.include_point(point)
print(f"Expanded: {expanded}")import pymupdf
def create_transformation_matrix(scale_x: float, scale_y: float,
rotation: float, tx: float, ty: float) -> pymupdf.Matrix:
"""Create combined transformation matrix."""
mat = pymupdf.Matrix()
mat.prescale(scale_x, scale_y)
mat.prerotate(rotation)
mat.pretranslate(tx, ty)
return mat
# Create complex transformation
transform = create_transformation_matrix(
scale_x=1.5, scale_y=2.0,
rotation=30,
tx=100, ty=50
)
# Apply to various geometry objects
point = pymupdf.Point(50, 25)
rect = pymupdf.Rect(0, 0, 100, 50)
quad = rect.quad
transformed_point = point.transform(transform)
transformed_rect = rect.transform(transform)
transformed_quad = quad.transform(transform)
print(f"Original point: ({point.x}, {point.y})")
print(f"Transformed point: ({transformed_point.x:.2f}, {transformed_point.y:.2f})")import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# Get page dimensions
page_rect = page.rect
print(f"Page size: {page_rect.width} x {page_rect.height}")
# Convert between coordinate systems
# PDF coordinates: origin at bottom-left
# Screen coordinates: origin at top-left
def pdf_to_screen(point: pymupdf.Point, page_height: float) -> pymupdf.Point:
"""Convert PDF coordinates to screen coordinates."""
return pymupdf.Point(point.x, page_height - point.y)
def screen_to_pdf(point: pymupdf.Point, page_height: float) -> pymupdf.Point:
"""Convert screen coordinates to PDF coordinates."""
return pymupdf.Point(point.x, page_height - point.y)
# Example conversion
pdf_point = pymupdf.Point(100, 100)
screen_point = pdf_to_screen(pdf_point, page_rect.height)
print(f"PDF point: ({pdf_point.x}, {pdf_point.y})")
print(f"Screen point: ({screen_point.x}, {screen_point.y})")
doc.close()import pymupdf
doc = pymupdf.open("document.pdf")
page = doc.load_page(0)
# Search for text and get quads
search_term = "important"
text_instances = page.search_for(search_term, quads=True)
for quad in text_instances:
print(f"Text quad corners:")
print(f" UL: ({quad.ul.x:.1f}, {quad.ul.y:.1f})")
print(f" UR: ({quad.ur.x:.1f}, {quad.ur.y:.1f})")
print(f" LL: ({quad.ll.x:.1f}, {quad.ll.y:.1f})")
print(f" LR: ({quad.lr.x:.1f}, {quad.lr.y:.1f})")
# Get bounding rectangle
bbox = quad.rect
print(f" Bounding box: {bbox}")
# Create highlight annotation
highlight = page.add_highlight_annot(quad)
highlight.set_colors({"stroke": [1, 1, 0]}) # Yellow
highlight.update()
doc.save("highlighted_text.pdf")
doc.close()import pymupdf
def calculate_rect_area(rect: pymupdf.Rect) -> float:
"""Calculate rectangle area."""
return rect.width * rect.height
def calculate_points_distance(p1: pymupdf.Point, p2: pymupdf.Point) -> float:
"""Calculate distance between two points."""
return p1.distance_to(p2)
def calculate_quad_area(quad: pymupdf.Quad) -> float:
"""Calculate approximate quadrilateral area using shoelace formula."""
points = [quad.ul, quad.ur, quad.lr, quad.ll]
area = 0
n = len(points)
for i in range(n):
j = (i + 1) % n
area += points[i].x * points[j].y
area -= points[j].x * points[i].y
return abs(area) / 2
# Example calculations
rect = pymupdf.Rect(0, 0, 100, 50)
print(f"Rectangle area: {calculate_rect_area(rect)}")
p1 = pymupdf.Point(0, 0)
p2 = pymupdf.Point(100, 100)
print(f"Distance: {calculate_points_distance(p1, p2):.2f}")
quad = rect.quad
print(f"Quad area: {calculate_quad_area(quad)}")import pymupdf
import math
def analyze_matrix(matrix: pymupdf.Matrix) -> dict:
"""Analyze transformation matrix properties."""
# Extract scale factors
scale_x = math.sqrt(matrix.a * matrix.a + matrix.b * matrix.b)
scale_y = math.sqrt(matrix.c * matrix.c + matrix.d * matrix.d)
# Extract rotation angle
rotation = math.atan2(matrix.b, matrix.a) * 180 / math.pi
# Extract translation
translation_x = matrix.e
translation_y = matrix.f
# Calculate determinant (area scaling factor)
determinant = matrix.a * matrix.d - matrix.b * matrix.c
return {
"scale_x": scale_x,
"scale_y": scale_y,
"rotation": rotation,
"translation": (translation_x, translation_y),
"determinant": determinant,
"is_rectilinear": matrix.is_rectilinear
}
# Create and analyze transformation
mat = pymupdf.Matrix()
mat.prescale(2, 1.5)
mat.prerotate(30)
mat.pretranslate(100, 50)
analysis = analyze_matrix(mat)
print("Matrix analysis:")
for key, value in analysis.items():
print(f" {key}: {value}")Install with Tessl CLI
npx tessl i tessl/pypi-pymupdf