Powerful data structures for data analysis, time series, and statistics
npx @tessl/cli install tessl/pypi-pandas@2.3.0Pandas is a comprehensive Python data analysis library that provides powerful, flexible, and expressive data structures designed for working with structured and time series data. It offers extensive functionality for data manipulation, cleaning, transformation, and analysis including data alignment, merging, reshaping, grouping, and statistical operations.
pip install pandasimport pandas as pdCommon imports for specific functionality:
import pandas as pd
from pandas import DataFrame, Series, Indeximport pandas as pd
import numpy as np
# Create a DataFrame from dictionary
data = {
'name': ['Alice', 'Bob', 'Charlie', 'Diana'],
'age': [25, 30, 35, 28],
'city': ['New York', 'London', 'Tokyo', 'Paris'],
'salary': [50000, 60000, 70000, 55000]
}
df = pd.DataFrame(data)
# Basic operations
print(df.head()) # Display first 5 rows
print(df.info()) # Display DataFrame info
print(df.describe()) # Statistical summary
# Data selection and filtering
young_employees = df[df['age'] < 30]
high_earners = df[df['salary'] > 55000]
# Create a Series
ages = pd.Series([25, 30, 35, 28], name='ages')
print(ages.mean()) # Calculate mean age
# Read data from files
df_csv = pd.read_csv('data.csv')
df_excel = pd.read_excel('data.xlsx')
# Basic data manipulation
df['bonus'] = df['salary'] * 0.1 # Add new column
df_sorted = df.sort_values('salary') # Sort by salary
df_grouped = df.groupby('city')['salary'].mean() # Group and aggregatePandas is built around three fundamental data structures:
The library integrates seamlessly with NumPy, providing optimized performance through vectorized operations, and serves as the foundation for the Python data science ecosystem, including integration with Jupyter notebooks, matplotlib, scikit-learn, and hundreds of domain-specific analysis libraries.
The fundamental data structures that form the foundation of pandas: DataFrame, Series, and various Index types. These structures provide the building blocks for all data manipulation operations.
class DataFrame:
def __init__(self, data=None, index=None, columns=None, dtype=None, copy=None): ...
class Series:
def __init__(self, data=None, index=None, dtype=None, name=None, copy=None, fastpath=False): ...
class Index:
def __init__(self, data=None, dtype=None, copy=False, name=None, tupleize_cols=True): ...Comprehensive I/O capabilities for reading and writing data in various formats including CSV, Excel, JSON, SQL databases, HDF5, Parquet, and many statistical file formats.
def read_csv(filepath_or_buffer, **kwargs): ...
def read_excel(io, **kwargs): ...
def read_json(path_or_buf, **kwargs): ...
def read_sql(sql, con, **kwargs): ...
def read_parquet(path, **kwargs): ...Functions for combining, reshaping, and transforming data including merging, concatenation, pivoting, melting, and advanced data restructuring operations.
def concat(objs, axis=0, join='outer', **kwargs): ...
def merge(left, right, how='inner', on=None, **kwargs): ...
def pivot_table(data, values=None, index=None, columns=None, **kwargs): ...
def melt(data, id_vars=None, value_vars=None, **kwargs): ...Comprehensive time series functionality including date/time parsing, time zone handling, frequency conversion, resampling, and specialized time-based operations.
def date_range(start=None, end=None, periods=None, freq=None, **kwargs): ...
def to_datetime(arg, **kwargs): ...
class Timestamp:
def __init__(self, ts_input=None, freq=None, tz=None, **kwargs): ...Extension data types, missing data handling, and type conversion utilities including nullable integer/boolean types, categorical data, and advanced missing value operations.
def isna(obj): ...
def notna(obj): ...
class Categorical:
def __init__(self, values, categories=None, ordered=None, dtype=None, fastpath=False): ...Built-in statistical functions, mathematical operations, and data analysis utilities including descriptive statistics, correlation analysis, and numerical computations.
def cut(x, bins, **kwargs): ...
def qcut(x, q, **kwargs): ...
def factorize(values, **kwargs): ...
def value_counts(values, **kwargs): ...Pandas configuration system for controlling display options, computational behavior, and library-wide settings.
def get_option(pat): ...
def set_option(pat, value): ...
def reset_option(pat): ...
def option_context(*args): ...Comprehensive plotting capabilities including basic plot types, statistical visualizations, and advanced multivariate analysis plots built on matplotlib.
def scatter_matrix(frame, **kwargs): ...
def parallel_coordinates(frame, class_column, **kwargs): ...
def andrews_curves(frame, class_column, **kwargs): ...
def radviz(frame, class_column, **kwargs): ...Type checking utilities and data type validation functions for working with pandas data structures and ensuring data quality.
def is_numeric_dtype(arr_or_dtype): ...
def is_datetime64_dtype(arr_or_dtype): ...
def is_categorical_dtype(arr_or_dtype): ...
def infer_dtype(value, **kwargs): ...Exception and warning classes for proper error handling in pandas applications, including parsing errors, performance warnings, and data validation errors.
class ParserError(ValueError): ...
class PerformanceWarning(Warning): ...
class SettingWithCopyWarning(Warning): ...
class DtypeWarning(Warning): ...# Core scalar types
class Timestamp:
"""Pandas timestamp object."""
pass
class Timedelta:
"""Pandas timedelta object."""
pass
class Period:
"""Pandas period object."""
pass
class Interval:
"""Pandas interval object."""
pass
# Missing value sentinels
NA: object # Pandas missing value sentinel
NaT: object # Not-a-Time for datetime/timedelta
# Common type aliases
Scalar = Union[str, int, float, bool, Timestamp, Timedelta, Period, Interval]
ArrayLike = Union[list, tuple, np.ndarray, Series, Index]
Axes = Union[int, str, Sequence[Union[int, str]]]