0
# Core Data Structures
1
2
The fundamental data structures that form the foundation of pandas: DataFrame, Series, and various Index types. These structures provide the building blocks for all data manipulation operations.
3
4
## Core Imports
5
6
```python
7
import pandas as pd
8
from pandas import DataFrame, Series, Index
9
```
10
11
## Capabilities
12
13
### DataFrame
14
15
Two-dimensional labeled data structure with heterogeneous columns, similar to a spreadsheet or SQL table. The primary pandas data structure for most use cases.
16
17
```python { .api }
18
class DataFrame:
19
def __init__(self, data=None, index=None, columns=None, dtype=None, copy=None):
20
"""
21
Two-dimensional, size-mutable, potentially heterogeneous tabular data.
22
23
Parameters:
24
- data: dict, list, ndarray, Series, or DataFrame
25
- index: Index or array-like, row labels
26
- columns: Index or array-like, column labels
27
- dtype: data type to force
28
- copy: bool, copy data from inputs
29
"""
30
31
def head(self, n=5):
32
"""Return the first n rows."""
33
34
def tail(self, n=5):
35
"""Return the last n rows."""
36
37
def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, show_counts=None, null_counts=None):
38
"""Print concise summary of DataFrame."""
39
40
def describe(self, percentiles=None, include=None, exclude=None):
41
"""Generate descriptive statistics."""
42
43
def shape(self):
44
"""Return tuple of (rows, columns)."""
45
46
def size(self):
47
"""Return number of elements."""
48
49
def columns(self):
50
"""Column labels."""
51
52
def index(self):
53
"""Row labels."""
54
55
def dtypes(self):
56
"""Data types of columns."""
57
58
def values(self):
59
"""NumPy representation of DataFrame."""
60
61
def empty(self):
62
"""True if DataFrame is empty."""
63
64
def copy(self, deep=True):
65
"""Make a copy of DataFrame."""
66
67
def select_dtypes(self, include=None, exclude=None):
68
"""Select columns based on data types."""
69
70
def astype(self, dtype, copy=True, errors='raise'):
71
"""Cast DataFrame to specified dtype."""
72
73
def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None):
74
"""Sort by values along axis."""
75
76
def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None):
77
"""Sort by labels along axis."""
78
79
def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):
80
"""Drop specified labels from rows or columns."""
81
82
def drop_duplicates(self, subset=None, keep='first', inplace=False, ignore_index=False):
83
"""Remove duplicate rows."""
84
85
def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False):
86
"""Remove missing values."""
87
88
def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
89
"""Fill missing values."""
90
91
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True):
92
"""Group DataFrame by one or more columns."""
93
94
def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwargs):
95
"""Apply function along axis."""
96
97
def applymap(self, func, na_action=None, **kwargs):
98
"""Apply function element-wise."""
99
100
def aggregate(self, func, axis=0, *args, **kwargs):
101
"""Aggregate using one or more operations."""
102
103
def transform(self, func, axis=0, *args, **kwargs):
104
"""Transform using one or more operations."""
105
106
def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False):
107
"""Set DataFrame index using existing columns."""
108
109
def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''):
110
"""Reset index to default integer index."""
111
112
def reindex(self, labels=None, index=None, columns=None, axis=None, method=None, copy=True, level=None, fill_value=None, limit=None, tolerance=None):
113
"""Conform DataFrame to new index."""
114
115
def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression='infer', quoting=None, quotechar='"', line_terminator=None, chunksize=None, date_format=None, doublequote=True, escapechar=None, decimal='.', errors='strict', storage_options=None):
116
"""Write DataFrame to CSV file."""
117
118
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=None, freeze_panes=None, storage_options=None):
119
"""Write DataFrame to Excel file."""
120
121
def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression='infer', index=True, indent=None, storage_options=None):
122
"""Write DataFrame to JSON."""
123
124
def to_dict(self, orient='dict', into=dict):
125
"""Convert DataFrame to dictionary."""
126
127
def to_numpy(self, dtype=None, copy=False, na_value=None):
128
"""Convert DataFrame to NumPy array."""
129
```
130
131
### Series
132
133
One-dimensional labeled array capable of holding any data type. The basic building block of pandas data structures.
134
135
```python { .api }
136
class Series:
137
def __init__(self, data=None, index=None, dtype=None, name=None, copy=None, fastpath=False):
138
"""
139
One-dimensional ndarray with axis labels.
140
141
Parameters:
142
- data: array-like, dict, or scalar value
143
- index: array-like or Index, labels for the data
144
- dtype: data type for the series
145
- name: name for the Series
146
- copy: bool, copy input data
147
"""
148
149
def head(self, n=5):
150
"""Return the first n values."""
151
152
def tail(self, n=5):
153
"""Return the last n values."""
154
155
def describe(self, percentiles=None, include=None, exclude=None):
156
"""Generate descriptive statistics."""
157
158
def shape(self):
159
"""Return tuple of shape."""
160
161
def size(self):
162
"""Return number of elements."""
163
164
def index(self):
165
"""Series index (labels)."""
166
167
def values(self):
168
"""NumPy representation of Series."""
169
170
def dtype(self):
171
"""Data type of Series."""
172
173
def name(self):
174
"""Name of Series."""
175
176
def empty(self):
177
"""True if Series is empty."""
178
179
def copy(self, deep=True):
180
"""Make a copy of Series."""
181
182
def astype(self, dtype, copy=True, errors='raise'):
183
"""Cast Series to specified dtype."""
184
185
def sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None):
186
"""Sort by values."""
187
188
def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None):
189
"""Sort by index labels."""
190
191
def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):
192
"""Drop specified labels."""
193
194
def drop_duplicates(self, keep='first', inplace=False):
195
"""Remove duplicate values."""
196
197
def dropna(self, axis=0, inplace=False, how=None):
198
"""Remove missing values."""
199
200
def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
201
"""Fill missing values."""
202
203
def apply(self, func, convert_dtype=True, args=(), **kwargs):
204
"""Apply function to Series values."""
205
206
def map(self, arg, na_action=None):
207
"""Map values using input mapping or function."""
208
209
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True):
210
"""Group Series by values."""
211
212
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
213
"""Count unique values."""
214
215
def unique(self):
216
"""Return unique values."""
217
218
def nunique(self, dropna=True):
219
"""Count number of unique values."""
220
221
def mean(self, axis=None, skipna=True, level=None, numeric_only=None):
222
"""Return mean of values."""
223
224
def median(self, axis=None, skipna=True, level=None, numeric_only=None):
225
"""Return median of values."""
226
227
def std(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
228
"""Return standard deviation."""
229
230
def var(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
231
"""Return variance."""
232
233
def sum(self, axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
234
"""Return sum of values."""
235
236
def min(self, axis=None, skipna=True, level=None, numeric_only=None):
237
"""Return minimum value."""
238
239
def max(self, axis=None, skipna=True, level=None, numeric_only=None):
240
"""Return maximum value."""
241
242
def count(self, level=None):
243
"""Count non-missing values."""
244
245
def to_dict(self, into=dict):
246
"""Convert Series to dictionary."""
247
248
def to_list(self):
249
"""Convert Series to list."""
250
251
def to_numpy(self, dtype=None, copy=False, na_value=None):
252
"""Convert Series to NumPy array."""
253
```
254
255
### Index
256
257
Immutable sequence used for indexing and alignment in pandas data structures.
258
259
```python { .api }
260
class Index:
261
def __init__(self, data=None, dtype=None, copy=False, name=None, tupleize_cols=True):
262
"""
263
Immutable sequence used for indexing and alignment.
264
265
Parameters:
266
- data: array-like, sequence of labels
267
- dtype: data type for the index
268
- copy: bool, copy input data
269
- name: name for the Index
270
"""
271
272
def shape(self):
273
"""Return tuple of shape."""
274
275
def size(self):
276
"""Return number of elements."""
277
278
def dtype(self):
279
"""Data type of Index."""
280
281
def name(self):
282
"""Name of Index."""
283
284
def names(self):
285
"""Names of levels (for MultiIndex)."""
286
287
def values(self):
288
"""NumPy representation of Index."""
289
290
def empty(self):
291
"""True if Index is empty."""
292
293
def copy(self, name=None, deep=False):
294
"""Make a copy of Index."""
295
296
def astype(self, dtype, copy=True):
297
"""Cast Index to specified dtype."""
298
299
def sort_values(self, return_indexer=False, ascending=True, na_position='last', key=None):
300
"""Sort Index values."""
301
302
def drop(self, labels, errors='raise'):
303
"""Drop specified labels from Index."""
304
305
def drop_duplicates(self, keep='first'):
306
"""Remove duplicate values."""
307
308
def dropna(self, how='any'):
309
"""Remove missing values."""
310
311
def fillna(self, value=None, downcast=None):
312
"""Fill missing values."""
313
314
def unique(self, level=None):
315
"""Return unique values."""
316
317
def nunique(self, dropna=True):
318
"""Count number of unique values."""
319
320
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
321
"""Count unique values."""
322
323
def to_list(self):
324
"""Convert Index to list."""
325
326
def to_numpy(self, dtype=None, copy=False, na_value=None):
327
"""Convert Index to NumPy array."""
328
329
def to_series(self, index=None, name=None):
330
"""Convert Index to Series."""
331
```
332
333
### Specialized Index Types
334
335
```python { .api }
336
class RangeIndex(Index):
337
"""Immutable Index implementing a monotonic integer range."""
338
def __init__(self, start=None, stop=None, step=None, dtype=None, copy=False, name=None): ...
339
340
class CategoricalIndex(Index):
341
"""Index based on an underlying Categorical."""
342
def __init__(self, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None): ...
343
344
class MultiIndex(Index):
345
"""Multi-level or hierarchical index object."""
346
def __init__(self, levels=None, codes=None, sortorder=None, names=None, dtype=None, copy=False, name=None, verify_integrity=True): ...
347
348
class IntervalIndex(Index):
349
"""Index for intervals that are closed on the same side."""
350
def __init__(self, data, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): ...
351
352
class DatetimeIndex(Index):
353
"""Index for datetime64 data."""
354
def __init__(self, data=None, freq=None, tz=None, normalize=False, closed=None, ambiguous='raise', dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None): ...
355
356
class TimedeltaIndex(Index):
357
"""Index for timedelta64 data."""
358
def __init__(self, data=None, unit=None, freq=None, closed=None, dtype=None, copy=False, name=None): ...
359
360
class PeriodIndex(Index):
361
"""Index for Period data."""
362
def __init__(self, data=None, ordinal=None, freq=None, dtype=None, copy=False, name=None): ...
363
```
364
365
## Types
366
367
```python { .api }
368
# Index slicing helper
369
IndexSlice: object # Slicing helper for MultiIndex
370
371
# Grouper for groupby operations
372
class Grouper:
373
def __init__(self, key=None, level=None, freq=None, axis=0, sort=False, closed=None, label=None, how='mean', fill_method=None, limit=None, group_keys=True, origin='start_day', offset=None, dropna=True): ...
374
375
# Named aggregation helper
376
class NamedAgg:
377
def __init__(self, column, aggfunc): ...
378
379
# Flags for pandas objects
380
class Flags:
381
allows_duplicate_labels: bool
382
```