Powerful data structures for data analysis, time series, and statistics
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
The fundamental data structures that form the foundation of pandas: DataFrame, Series, and various Index types. These structures provide the building blocks for all data manipulation operations.
import pandas as pd
from pandas import DataFrame, Series, IndexTwo-dimensional labeled data structure with heterogeneous columns, similar to a spreadsheet or SQL table. The primary pandas data structure for most use cases.
class DataFrame:
def __init__(self, data=None, index=None, columns=None, dtype=None, copy=None):
"""
Two-dimensional, size-mutable, potentially heterogeneous tabular data.
Parameters:
- data: dict, list, ndarray, Series, or DataFrame
- index: Index or array-like, row labels
- columns: Index or array-like, column labels
- dtype: data type to force
- copy: bool, copy data from inputs
"""
def head(self, n=5):
"""Return the first n rows."""
def tail(self, n=5):
"""Return the last n rows."""
def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, show_counts=None, null_counts=None):
"""Print concise summary of DataFrame."""
def describe(self, percentiles=None, include=None, exclude=None):
"""Generate descriptive statistics."""
def shape(self):
"""Return tuple of (rows, columns)."""
def size(self):
"""Return number of elements."""
def columns(self):
"""Column labels."""
def index(self):
"""Row labels."""
def dtypes(self):
"""Data types of columns."""
def values(self):
"""NumPy representation of DataFrame."""
def empty(self):
"""True if DataFrame is empty."""
def copy(self, deep=True):
"""Make a copy of DataFrame."""
def select_dtypes(self, include=None, exclude=None):
"""Select columns based on data types."""
def astype(self, dtype, copy=True, errors='raise'):
"""Cast DataFrame to specified dtype."""
def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None):
"""Sort by values along axis."""
def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None):
"""Sort by labels along axis."""
def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):
"""Drop specified labels from rows or columns."""
def drop_duplicates(self, subset=None, keep='first', inplace=False, ignore_index=False):
"""Remove duplicate rows."""
def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False):
"""Remove missing values."""
def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
"""Fill missing values."""
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True):
"""Group DataFrame by one or more columns."""
def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwargs):
"""Apply function along axis."""
def applymap(self, func, na_action=None, **kwargs):
"""Apply function element-wise."""
def aggregate(self, func, axis=0, *args, **kwargs):
"""Aggregate using one or more operations."""
def transform(self, func, axis=0, *args, **kwargs):
"""Transform using one or more operations."""
def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False):
"""Set DataFrame index using existing columns."""
def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''):
"""Reset index to default integer index."""
def reindex(self, labels=None, index=None, columns=None, axis=None, method=None, copy=True, level=None, fill_value=None, limit=None, tolerance=None):
"""Conform DataFrame to new index."""
def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression='infer', quoting=None, quotechar='"', line_terminator=None, chunksize=None, date_format=None, doublequote=True, escapechar=None, decimal='.', errors='strict', storage_options=None):
"""Write DataFrame to CSV file."""
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=None, freeze_panes=None, storage_options=None):
"""Write DataFrame to Excel file."""
def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression='infer', index=True, indent=None, storage_options=None):
"""Write DataFrame to JSON."""
def to_dict(self, orient='dict', into=dict):
"""Convert DataFrame to dictionary."""
def to_numpy(self, dtype=None, copy=False, na_value=None):
"""Convert DataFrame to NumPy array."""One-dimensional labeled array capable of holding any data type. The basic building block of pandas data structures.
class Series:
def __init__(self, data=None, index=None, dtype=None, name=None, copy=None, fastpath=False):
"""
One-dimensional ndarray with axis labels.
Parameters:
- data: array-like, dict, or scalar value
- index: array-like or Index, labels for the data
- dtype: data type for the series
- name: name for the Series
- copy: bool, copy input data
"""
def head(self, n=5):
"""Return the first n values."""
def tail(self, n=5):
"""Return the last n values."""
def describe(self, percentiles=None, include=None, exclude=None):
"""Generate descriptive statistics."""
def shape(self):
"""Return tuple of shape."""
def size(self):
"""Return number of elements."""
def index(self):
"""Series index (labels)."""
def values(self):
"""NumPy representation of Series."""
def dtype(self):
"""Data type of Series."""
def name(self):
"""Name of Series."""
def empty(self):
"""True if Series is empty."""
def copy(self, deep=True):
"""Make a copy of Series."""
def astype(self, dtype, copy=True, errors='raise'):
"""Cast Series to specified dtype."""
def sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None):
"""Sort by values."""
def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None):
"""Sort by index labels."""
def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):
"""Drop specified labels."""
def drop_duplicates(self, keep='first', inplace=False):
"""Remove duplicate values."""
def dropna(self, axis=0, inplace=False, how=None):
"""Remove missing values."""
def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
"""Fill missing values."""
def apply(self, func, convert_dtype=True, args=(), **kwargs):
"""Apply function to Series values."""
def map(self, arg, na_action=None):
"""Map values using input mapping or function."""
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True):
"""Group Series by values."""
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
"""Count unique values."""
def unique(self):
"""Return unique values."""
def nunique(self, dropna=True):
"""Count number of unique values."""
def mean(self, axis=None, skipna=True, level=None, numeric_only=None):
"""Return mean of values."""
def median(self, axis=None, skipna=True, level=None, numeric_only=None):
"""Return median of values."""
def std(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
"""Return standard deviation."""
def var(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
"""Return variance."""
def sum(self, axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
"""Return sum of values."""
def min(self, axis=None, skipna=True, level=None, numeric_only=None):
"""Return minimum value."""
def max(self, axis=None, skipna=True, level=None, numeric_only=None):
"""Return maximum value."""
def count(self, level=None):
"""Count non-missing values."""
def to_dict(self, into=dict):
"""Convert Series to dictionary."""
def to_list(self):
"""Convert Series to list."""
def to_numpy(self, dtype=None, copy=False, na_value=None):
"""Convert Series to NumPy array."""Immutable sequence used for indexing and alignment in pandas data structures.
class Index:
def __init__(self, data=None, dtype=None, copy=False, name=None, tupleize_cols=True):
"""
Immutable sequence used for indexing and alignment.
Parameters:
- data: array-like, sequence of labels
- dtype: data type for the index
- copy: bool, copy input data
- name: name for the Index
"""
def shape(self):
"""Return tuple of shape."""
def size(self):
"""Return number of elements."""
def dtype(self):
"""Data type of Index."""
def name(self):
"""Name of Index."""
def names(self):
"""Names of levels (for MultiIndex)."""
def values(self):
"""NumPy representation of Index."""
def empty(self):
"""True if Index is empty."""
def copy(self, name=None, deep=False):
"""Make a copy of Index."""
def astype(self, dtype, copy=True):
"""Cast Index to specified dtype."""
def sort_values(self, return_indexer=False, ascending=True, na_position='last', key=None):
"""Sort Index values."""
def drop(self, labels, errors='raise'):
"""Drop specified labels from Index."""
def drop_duplicates(self, keep='first'):
"""Remove duplicate values."""
def dropna(self, how='any'):
"""Remove missing values."""
def fillna(self, value=None, downcast=None):
"""Fill missing values."""
def unique(self, level=None):
"""Return unique values."""
def nunique(self, dropna=True):
"""Count number of unique values."""
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
"""Count unique values."""
def to_list(self):
"""Convert Index to list."""
def to_numpy(self, dtype=None, copy=False, na_value=None):
"""Convert Index to NumPy array."""
def to_series(self, index=None, name=None):
"""Convert Index to Series."""class RangeIndex(Index):
"""Immutable Index implementing a monotonic integer range."""
def __init__(self, start=None, stop=None, step=None, dtype=None, copy=False, name=None): ...
class CategoricalIndex(Index):
"""Index based on an underlying Categorical."""
def __init__(self, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None): ...
class MultiIndex(Index):
"""Multi-level or hierarchical index object."""
def __init__(self, levels=None, codes=None, sortorder=None, names=None, dtype=None, copy=False, name=None, verify_integrity=True): ...
class IntervalIndex(Index):
"""Index for intervals that are closed on the same side."""
def __init__(self, data, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): ...
class DatetimeIndex(Index):
"""Index for datetime64 data."""
def __init__(self, data=None, freq=None, tz=None, normalize=False, closed=None, ambiguous='raise', dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None): ...
class TimedeltaIndex(Index):
"""Index for timedelta64 data."""
def __init__(self, data=None, unit=None, freq=None, closed=None, dtype=None, copy=False, name=None): ...
class PeriodIndex(Index):
"""Index for Period data."""
def __init__(self, data=None, ordinal=None, freq=None, dtype=None, copy=False, name=None): ...# Index slicing helper
IndexSlice: object # Slicing helper for MultiIndex
# Grouper for groupby operations
class Grouper:
def __init__(self, key=None, level=None, freq=None, axis=0, sort=False, closed=None, label=None, how='mean', fill_method=None, limit=None, group_keys=True, origin='start_day', offset=None, dropna=True): ...
# Named aggregation helper
class NamedAgg:
def __init__(self, column, aggfunc): ...
# Flags for pandas objects
class Flags:
allows_duplicate_labels: boolInstall with Tessl CLI
npx tessl i tessl/pypi-pandas