or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

api-types.mdconfiguration.mdcore-data-structures.mddata-io.mddata-manipulation.mddata-types.mderrors.mdindex.mdplotting.mdstatistics-math.mdtime-series.md

core-data-structures.mddocs/

0

# Core Data Structures

1

2

The fundamental data structures that form the foundation of pandas: DataFrame, Series, and various Index types. These structures provide the building blocks for all data manipulation operations.

3

4

## Core Imports

5

6

```python

7

import pandas as pd

8

from pandas import DataFrame, Series, Index

9

```

10

11

## Capabilities

12

13

### DataFrame

14

15

Two-dimensional labeled data structure with heterogeneous columns, similar to a spreadsheet or SQL table. The primary pandas data structure for most use cases.

16

17

```python { .api }

18

class DataFrame:

19

def __init__(self, data=None, index=None, columns=None, dtype=None, copy=None):

20

"""

21

Two-dimensional, size-mutable, potentially heterogeneous tabular data.

22

23

Parameters:

24

- data: dict, list, ndarray, Series, or DataFrame

25

- index: Index or array-like, row labels

26

- columns: Index or array-like, column labels

27

- dtype: data type to force

28

- copy: bool, copy data from inputs

29

"""

30

31

def head(self, n=5):

32

"""Return the first n rows."""

33

34

def tail(self, n=5):

35

"""Return the last n rows."""

36

37

def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, show_counts=None, null_counts=None):

38

"""Print concise summary of DataFrame."""

39

40

def describe(self, percentiles=None, include=None, exclude=None):

41

"""Generate descriptive statistics."""

42

43

def shape(self):

44

"""Return tuple of (rows, columns)."""

45

46

def size(self):

47

"""Return number of elements."""

48

49

def columns(self):

50

"""Column labels."""

51

52

def index(self):

53

"""Row labels."""

54

55

def dtypes(self):

56

"""Data types of columns."""

57

58

def values(self):

59

"""NumPy representation of DataFrame."""

60

61

def empty(self):

62

"""True if DataFrame is empty."""

63

64

def copy(self, deep=True):

65

"""Make a copy of DataFrame."""

66

67

def select_dtypes(self, include=None, exclude=None):

68

"""Select columns based on data types."""

69

70

def astype(self, dtype, copy=True, errors='raise'):

71

"""Cast DataFrame to specified dtype."""

72

73

def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None):

74

"""Sort by values along axis."""

75

76

def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None):

77

"""Sort by labels along axis."""

78

79

def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):

80

"""Drop specified labels from rows or columns."""

81

82

def drop_duplicates(self, subset=None, keep='first', inplace=False, ignore_index=False):

83

"""Remove duplicate rows."""

84

85

def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False):

86

"""Remove missing values."""

87

88

def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):

89

"""Fill missing values."""

90

91

def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True):

92

"""Group DataFrame by one or more columns."""

93

94

def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwargs):

95

"""Apply function along axis."""

96

97

def applymap(self, func, na_action=None, **kwargs):

98

"""Apply function element-wise."""

99

100

def aggregate(self, func, axis=0, *args, **kwargs):

101

"""Aggregate using one or more operations."""

102

103

def transform(self, func, axis=0, *args, **kwargs):

104

"""Transform using one or more operations."""

105

106

def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False):

107

"""Set DataFrame index using existing columns."""

108

109

def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''):

110

"""Reset index to default integer index."""

111

112

def reindex(self, labels=None, index=None, columns=None, axis=None, method=None, copy=True, level=None, fill_value=None, limit=None, tolerance=None):

113

"""Conform DataFrame to new index."""

114

115

def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression='infer', quoting=None, quotechar='"', line_terminator=None, chunksize=None, date_format=None, doublequote=True, escapechar=None, decimal='.', errors='strict', storage_options=None):

116

"""Write DataFrame to CSV file."""

117

118

def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=None, freeze_panes=None, storage_options=None):

119

"""Write DataFrame to Excel file."""

120

121

def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression='infer', index=True, indent=None, storage_options=None):

122

"""Write DataFrame to JSON."""

123

124

def to_dict(self, orient='dict', into=dict):

125

"""Convert DataFrame to dictionary."""

126

127

def to_numpy(self, dtype=None, copy=False, na_value=None):

128

"""Convert DataFrame to NumPy array."""

129

```

130

131

### Series

132

133

One-dimensional labeled array capable of holding any data type. The basic building block of pandas data structures.

134

135

```python { .api }

136

class Series:

137

def __init__(self, data=None, index=None, dtype=None, name=None, copy=None, fastpath=False):

138

"""

139

One-dimensional ndarray with axis labels.

140

141

Parameters:

142

- data: array-like, dict, or scalar value

143

- index: array-like or Index, labels for the data

144

- dtype: data type for the series

145

- name: name for the Series

146

- copy: bool, copy input data

147

"""

148

149

def head(self, n=5):

150

"""Return the first n values."""

151

152

def tail(self, n=5):

153

"""Return the last n values."""

154

155

def describe(self, percentiles=None, include=None, exclude=None):

156

"""Generate descriptive statistics."""

157

158

def shape(self):

159

"""Return tuple of shape."""

160

161

def size(self):

162

"""Return number of elements."""

163

164

def index(self):

165

"""Series index (labels)."""

166

167

def values(self):

168

"""NumPy representation of Series."""

169

170

def dtype(self):

171

"""Data type of Series."""

172

173

def name(self):

174

"""Name of Series."""

175

176

def empty(self):

177

"""True if Series is empty."""

178

179

def copy(self, deep=True):

180

"""Make a copy of Series."""

181

182

def astype(self, dtype, copy=True, errors='raise'):

183

"""Cast Series to specified dtype."""

184

185

def sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None):

186

"""Sort by values."""

187

188

def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None):

189

"""Sort by index labels."""

190

191

def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):

192

"""Drop specified labels."""

193

194

def drop_duplicates(self, keep='first', inplace=False):

195

"""Remove duplicate values."""

196

197

def dropna(self, axis=0, inplace=False, how=None):

198

"""Remove missing values."""

199

200

def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):

201

"""Fill missing values."""

202

203

def apply(self, func, convert_dtype=True, args=(), **kwargs):

204

"""Apply function to Series values."""

205

206

def map(self, arg, na_action=None):

207

"""Map values using input mapping or function."""

208

209

def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True):

210

"""Group Series by values."""

211

212

def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):

213

"""Count unique values."""

214

215

def unique(self):

216

"""Return unique values."""

217

218

def nunique(self, dropna=True):

219

"""Count number of unique values."""

220

221

def mean(self, axis=None, skipna=True, level=None, numeric_only=None):

222

"""Return mean of values."""

223

224

def median(self, axis=None, skipna=True, level=None, numeric_only=None):

225

"""Return median of values."""

226

227

def std(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):

228

"""Return standard deviation."""

229

230

def var(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):

231

"""Return variance."""

232

233

def sum(self, axis=None, skipna=True, level=None, numeric_only=None, min_count=0):

234

"""Return sum of values."""

235

236

def min(self, axis=None, skipna=True, level=None, numeric_only=None):

237

"""Return minimum value."""

238

239

def max(self, axis=None, skipna=True, level=None, numeric_only=None):

240

"""Return maximum value."""

241

242

def count(self, level=None):

243

"""Count non-missing values."""

244

245

def to_dict(self, into=dict):

246

"""Convert Series to dictionary."""

247

248

def to_list(self):

249

"""Convert Series to list."""

250

251

def to_numpy(self, dtype=None, copy=False, na_value=None):

252

"""Convert Series to NumPy array."""

253

```

254

255

### Index

256

257

Immutable sequence used for indexing and alignment in pandas data structures.

258

259

```python { .api }

260

class Index:

261

def __init__(self, data=None, dtype=None, copy=False, name=None, tupleize_cols=True):

262

"""

263

Immutable sequence used for indexing and alignment.

264

265

Parameters:

266

- data: array-like, sequence of labels

267

- dtype: data type for the index

268

- copy: bool, copy input data

269

- name: name for the Index

270

"""

271

272

def shape(self):

273

"""Return tuple of shape."""

274

275

def size(self):

276

"""Return number of elements."""

277

278

def dtype(self):

279

"""Data type of Index."""

280

281

def name(self):

282

"""Name of Index."""

283

284

def names(self):

285

"""Names of levels (for MultiIndex)."""

286

287

def values(self):

288

"""NumPy representation of Index."""

289

290

def empty(self):

291

"""True if Index is empty."""

292

293

def copy(self, name=None, deep=False):

294

"""Make a copy of Index."""

295

296

def astype(self, dtype, copy=True):

297

"""Cast Index to specified dtype."""

298

299

def sort_values(self, return_indexer=False, ascending=True, na_position='last', key=None):

300

"""Sort Index values."""

301

302

def drop(self, labels, errors='raise'):

303

"""Drop specified labels from Index."""

304

305

def drop_duplicates(self, keep='first'):

306

"""Remove duplicate values."""

307

308

def dropna(self, how='any'):

309

"""Remove missing values."""

310

311

def fillna(self, value=None, downcast=None):

312

"""Fill missing values."""

313

314

def unique(self, level=None):

315

"""Return unique values."""

316

317

def nunique(self, dropna=True):

318

"""Count number of unique values."""

319

320

def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):

321

"""Count unique values."""

322

323

def to_list(self):

324

"""Convert Index to list."""

325

326

def to_numpy(self, dtype=None, copy=False, na_value=None):

327

"""Convert Index to NumPy array."""

328

329

def to_series(self, index=None, name=None):

330

"""Convert Index to Series."""

331

```

332

333

### Specialized Index Types

334

335

```python { .api }

336

class RangeIndex(Index):

337

"""Immutable Index implementing a monotonic integer range."""

338

def __init__(self, start=None, stop=None, step=None, dtype=None, copy=False, name=None): ...

339

340

class CategoricalIndex(Index):

341

"""Index based on an underlying Categorical."""

342

def __init__(self, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None): ...

343

344

class MultiIndex(Index):

345

"""Multi-level or hierarchical index object."""

346

def __init__(self, levels=None, codes=None, sortorder=None, names=None, dtype=None, copy=False, name=None, verify_integrity=True): ...

347

348

class IntervalIndex(Index):

349

"""Index for intervals that are closed on the same side."""

350

def __init__(self, data, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): ...

351

352

class DatetimeIndex(Index):

353

"""Index for datetime64 data."""

354

def __init__(self, data=None, freq=None, tz=None, normalize=False, closed=None, ambiguous='raise', dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None): ...

355

356

class TimedeltaIndex(Index):

357

"""Index for timedelta64 data."""

358

def __init__(self, data=None, unit=None, freq=None, closed=None, dtype=None, copy=False, name=None): ...

359

360

class PeriodIndex(Index):

361

"""Index for Period data."""

362

def __init__(self, data=None, ordinal=None, freq=None, dtype=None, copy=False, name=None): ...

363

```

364

365

## Types

366

367

```python { .api }

368

# Index slicing helper

369

IndexSlice: object # Slicing helper for MultiIndex

370

371

# Grouper for groupby operations

372

class Grouper:

373

def __init__(self, key=None, level=None, freq=None, axis=0, sort=False, closed=None, label=None, how='mean', fill_method=None, limit=None, group_keys=True, origin='start_day', offset=None, dropna=True): ...

374

375

# Named aggregation helper

376

class NamedAgg:

377

def __init__(self, column, aggfunc): ...

378

379

# Flags for pandas objects

380

class Flags:

381

allows_duplicate_labels: bool

382

```