File type identification using libmagic
npx @tessl/cli install tessl/pypi-python-magic@0.4.0A Python interface to the libmagic file type identification library. python-magic provides file type detection by examining file headers according to predefined patterns, offering both simple convenience functions and advanced control through the Magic class.
pip install python-magicsudo apt-get install libmagic1 on Ubuntu/Debian)import magicFor convenience functions:
from magic import from_file, from_buffer, from_descriptorFor advanced usage:
from magic import Magic, MagicExceptionFor compatibility layer:
from magic import compat
# or deprecated functions directly
from magic import detect_from_filename, detect_from_content, detect_from_fobj, openimport magic
# Simple file type detection
file_type = magic.from_file('document.pdf')
print(file_type) # 'PDF document, version 1.4'
# Get MIME type instead
mime_type = magic.from_file('document.pdf', mime=True)
print(mime_type) # 'application/pdf'
# Detect from file contents
with open('document.pdf', 'rb') as f:
content = f.read(2048) # Read first 2KB for accurate detection
file_type = magic.from_buffer(content)
print(file_type) # 'PDF document, version 1.4'
# Advanced usage with Magic class
m = Magic(uncompress=True) # Look inside compressed files
file_type = m.from_file('archive.tar.gz')
print(file_type) # Shows content type, not just compressionpython-magic wraps the libmagic C library through ctypes, providing:
Identifies file types by examining files on disk.
def from_file(filename, mime=False):
"""
Detect filetype from filename.
Args:
filename (str | PathLike): Path to file to analyze
mime (bool): Return MIME type if True, human-readable description if False
Returns:
str: File type description or MIME type
Raises:
IOError: If file cannot be accessed
MagicException: If libmagic encounters an error
"""Identifies file types from file content in memory.
def from_buffer(buffer, mime=False):
"""
Detect filetype from file content buffer.
Args:
buffer (bytes | str): File content to analyze (recommend ≥2048 bytes)
mime (bool): Return MIME type if True, human-readable description if False
Returns:
str: File type description or MIME type
Raises:
MagicException: If libmagic encounters an error
"""Identifies file types from open file descriptors.
def from_descriptor(fd, mime=False):
"""
Detect filetype from file descriptor.
Args:
fd (int): File descriptor number
mime (bool): Return MIME type if True, human-readable description if False
Returns:
str: File type description or MIME type
Raises:
MagicException: If libmagic encounters an error
"""Provides direct control over libmagic with customizable flags and parameters.
class Magic:
"""
Advanced wrapper around libmagic with customizable behavior.
Thread-safe class for fine-grained control over file type detection.
"""
def __init__(self, mime=False, magic_file=None, mime_encoding=False,
keep_going=False, uncompress=False, raw=False, extension=False):
"""
Create Magic instance with custom configuration.
Args:
mime (bool): Return MIME types instead of descriptions
magic_file (str, optional): Path to custom magic database file
mime_encoding (bool): Return character encoding information
keep_going (bool): Continue processing after first match
uncompress (bool): Look inside compressed files
raw (bool): Don't decode non-printable characters
extension (bool): Return file extensions (requires libmagic ≥524)
Raises:
NotImplementedError: If extension=True but libmagic version < 524
ImportError: If libmagic library cannot be loaded
"""
def from_file(self, filename):
"""
Identify file type from file path.
Args:
filename (str | PathLike): Path to file
Returns:
str: File type information based on instance configuration
Raises:
IOError: If file cannot be accessed
MagicException: If libmagic encounters an error
"""
def from_buffer(self, buf):
"""
Identify file type from buffer content.
Args:
buf (bytes | str): File content to analyze
Returns:
str: File type information based on instance configuration
Raises:
MagicException: If libmagic encounters an error
"""
def from_descriptor(self, fd):
"""
Identify file type from file descriptor.
Args:
fd (int): File descriptor number
Returns:
str: File type information based on instance configuration
Raises:
MagicException: If libmagic encounters an error
"""
def setparam(self, param, val):
"""
Set libmagic parameter.
Args:
param (int): Parameter constant (MAGIC_PARAM_*)
val (int): Parameter value
Returns:
int: 0 on success, -1 on failure
Raises:
NotImplementedError: If libmagic doesn't support parameters
"""
def getparam(self, param):
"""
Get libmagic parameter value.
Args:
param (int): Parameter constant (MAGIC_PARAM_*)
Returns:
int: Current parameter value
Raises:
NotImplementedError: If libmagic doesn't support parameters
"""The compat module provides an alternative Magic class interface that matches libmagic's native Python bindings.
class Magic:
"""
Compatibility Magic class for libmagic's native Python bindings.
This class provides lower-level access to libmagic functionality
and is included for compatibility with existing code.
"""
def __init__(self, ms):
"""
Initialize Magic object with magic_t pointer.
Args:
ms: Magic structure pointer from magic_open()
"""
def close(self):
"""
Close the magic database and deallocate resources.
Must be called to properly clean up the magic object.
"""
def file(self, filename):
"""
Get file type description from filename.
Args:
filename (str | bytes): Path to file to analyze
Returns:
str | None: File type description or None if error occurred
"""
def descriptor(self, fd):
"""
Get file type description from file descriptor.
Args:
fd (int): File descriptor number
Returns:
str | None: File type description or None if error occurred
"""
def buffer(self, buf):
"""
Get file type description from buffer content.
Args:
buf (bytes): File content to analyze
Returns:
str | None: File type description or None if error occurred
"""
def error(self):
"""
Get textual description of last error.
Returns:
str | None: Error description or None if no error
"""
def setflags(self, flags):
"""
Set flags controlling magic behavior.
Args:
flags (int): Bitwise OR of magic flags
Returns:
int: 0 on success, -1 on failure
"""
def load(self, filename=None):
"""
Load magic database from file.
Args:
filename (str, optional): Database file path, None for default
Returns:
int: 0 on success, -1 on failure
"""
def compile(self, dbs):
"""
Compile magic database files.
Args:
dbs (str): Colon-separated list of database files
Returns:
int: 0 on success, -1 on failure
"""
def check(self, dbs):
"""
Check validity of magic database files.
Args:
dbs (str): Colon-separated list of database files
Returns:
int: 0 on success, -1 on failure
"""
def list(self, dbs):
"""
List entries in magic database files.
Args:
dbs (str): Colon-separated list of database files
Returns:
int: 0 on success, -1 on failure
"""
def errno(self):
"""
Get numeric error code from last operation.
Returns:
int: 0 for internal error, non-zero for OS error code
"""Factory function for creating compat Magic objects.
def open(flags):
"""
Create Magic object for compatibility layer.
Args:
flags (int): Magic flags to use (MAGIC_* constants)
Returns:
Magic: Magic instance from compat module
Example:
from magic import compat
m = compat.open(compat.MAGIC_MIME)
m.load()
result = m.file('document.pdf')
m.close()
"""Get libmagic version information.
def version():
"""
Get libmagic version number.
Returns:
int: libmagic version number
Raises:
NotImplementedError: If version detection not supported
"""Custom exception for magic-related errors.
class MagicException(Exception):
"""
Exception raised by libmagic operations.
Attributes:
message (str): Error description from libmagic
"""
def __init__(self, message):
"""
Create MagicException with error message.
Args:
message (str): Error description
"""MAGIC_NONE = 0x000000 # No special behavior
MAGIC_DEBUG = 0x000001 # Turn on debugging output
MAGIC_SYMLINK = 0x000002 # Follow symbolic links
MAGIC_COMPRESS = 0x000004 # Check inside compressed files
MAGIC_DEVICES = 0x000008 # Look at device file contents
MAGIC_MIME_TYPE = 0x000010 # Return MIME type string
MAGIC_MIME_ENCODING = 0x000400 # Return MIME encoding
MAGIC_MIME = 0x000010 # Return MIME type (same as MIME_TYPE)
MAGIC_EXTENSION = 0x1000000 # Return file extensions
MAGIC_CONTINUE = 0x000020 # Return all matches, not just first
MAGIC_CHECK = 0x000040 # Print warnings to stderr
MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time after reading
MAGIC_RAW = 0x000100 # Don't decode non-printable characters
MAGIC_ERROR = 0x000200 # Handle ENOENT as real errorMAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check compressed files
MAGIC_NO_CHECK_TAR = 0x002000 # Don't check tar files
MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
MAGIC_NO_CHECK_ELF = 0x010000 # Don't check ELF details
MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check ASCII files
MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ASCII/troff
MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ASCII/fortran
MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ASCII/tokensMAGIC_APPLE = 2048 # Return Apple creator/type
MAGIC_NO_CHECK_TEXT = 131072 # Don't check text files
MAGIC_NO_CHECK_CDF = 262144 # Don't check CDF files
MAGIC_NO_CHECK_ENCODING = 2097152 # Don't check for text encoding
MAGIC_NO_CHECK_BUILTIN = 4173824 # Don't use built-in testsMAGIC_PARAM_INDIR_MAX = 0 # Recursion limit for indirect magic
MAGIC_PARAM_NAME_MAX = 1 # Use count limit for name/use magic
MAGIC_PARAM_ELF_PHNUM_MAX = 2 # Max ELF notes processed
MAGIC_PARAM_ELF_SHNUM_MAX = 3 # Max ELF program sections processed
MAGIC_PARAM_ELF_NOTES_MAX = 4 # Max ELF sections processed
MAGIC_PARAM_REGEX_MAX = 5 # Length limit for regex searches
MAGIC_PARAM_BYTES_MAX = 6 # Max bytes to read from fileThese functions provide compatibility with libmagic's native Python bindings but generate deprecation warnings.
def detect_from_filename(filename):
"""
Detect file type from filename (compatibility function).
Args:
filename (str): Path to file
Returns:
FileMagic: Named tuple with mime_type, encoding, and name fields
Warnings:
PendingDeprecationWarning: This function is deprecated
"""
def detect_from_content(byte_content):
"""
Detect file type from bytes (compatibility function).
Args:
byte_content (bytes): File content to analyze
Returns:
FileMagic: Named tuple with mime_type, encoding, and name fields
Warnings:
PendingDeprecationWarning: This function is deprecated
"""
def detect_from_fobj(fobj):
"""
Detect file type from file object (compatibility function).
Args:
fobj: File-like object with fileno() method
Returns:
FileMagic: Named tuple with mime_type, encoding, and name fields
Warnings:
PendingDeprecationWarning: This function is deprecated
"""
def open(flags):
"""
Create Magic object (compatibility function).
Args:
flags (int): Magic flags to use
Returns:
Magic: Magic instance from compat module
Warnings:
PendingDeprecationWarning: This function is deprecated
"""FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))import magic
# Web files
magic.from_file('page.html', mime=True) # 'text/html'
magic.from_file('style.css', mime=True) # 'text/css'
magic.from_file('script.js', mime=True) # 'text/javascript'
# Images
magic.from_file('photo.jpg', mime=True) # 'image/jpeg'
magic.from_file('icon.png', mime=True) # 'image/png'
magic.from_file('drawing.svg', mime=True) # 'image/svg+xml'
# Documents
magic.from_file('report.pdf', mime=True) # 'application/pdf'
magic.from_file('data.xlsx', mime=True) # 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
# Archives
magic.from_file('backup.zip', mime=True) # 'application/zip'
magic.from_file('source.tar.gz', mime=True) # 'application/gzip'import magic
# Create Magic instance for compressed files
m = magic.Magic(uncompress=True)
# This will look inside the compressed file
result = m.from_file('archive.tar.gz')
print(result) # May show "POSIX tar archive" instead of just "gzip compressed data"
# Get both MIME type and encoding
m = magic.Magic(mime=True, mime_encoding=True)
result = m.from_file('document.txt')
print(result) # 'text/plain; charset=utf-8'
# Get file extensions
try:
m = magic.Magic(extension=True)
extensions = m.from_file('image.jpg')
print(extensions) # 'jpeg/jpg/jpe/jfif'
except NotImplementedError:
print("Extension detection requires libmagic version 524 or higher")import magic
from magic import MagicException
try:
# This will raise IOError if file doesn't exist
result = magic.from_file('nonexistent.txt')
except IOError as e:
print(f"File access error: {e}")
except MagicException as e:
print(f"Magic detection error: {e.message}")
# Handling buffer detection errors
try:
# Empty buffer might cause issues
result = magic.from_buffer(b'')
except MagicException as e:
print(f"Detection failed: {e.message}")import magic
import os
# Open file and get file descriptor
with open('document.pdf', 'rb') as f:
fd = f.fileno()
file_type = magic.from_descriptor(fd)
print(file_type) # Works while file is open
# Using with stdin/stdout/stderr
import sys
try:
stdin_type = magic.from_descriptor(sys.stdin.fileno())
print(f"stdin type: {stdin_type}")
except:
print("stdin detection failed (may be redirected)")import magic
# Use custom magic database file
try:
m = magic.Magic(magic_file='/path/to/custom.mgc')
result = m.from_file('specialized_file.dat')
print(result)
except ImportError:
print("Custom magic file not found or invalid")