or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

api-types.mdconfiguration.mdcore-data-structures.mddata-io.mddata-manipulation.mddata-types.mderrors.mdindex.mdplotting.mdstatistics-math.mdtime-series.md

data-io.mddocs/

0

# Data Input/Output

1

2

Comprehensive I/O capabilities for reading and writing data in various formats including CSV, Excel, JSON, SQL databases, HDF5, Parquet, and many statistical file formats.

3

4

## Core Imports

5

6

```python

7

import pandas as pd

8

from pandas import read_csv, read_excel, read_json, read_sql

9

```

10

11

## Capabilities

12

13

### CSV and Text Files

14

15

Read and write comma-separated values and other delimited text files.

16

17

```python { .api }

18

def read_csv(filepath_or_buffer, sep=',', delimiter=None, header='infer', names=None, index_col=None, usecols=None, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, date_format=None, dayfirst=False, cache_dates=True, iterator=False, chunksize=None, compression='infer', thousands=None, decimal='.', lineterminator=None, quotechar='"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, encoding_errors='strict', dialect=None, on_bad_lines='error', delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None, storage_options=None, dtype_backend='numpy_nullable'):

19

"""

20

Read a comma-separated values (CSV) file into DataFrame.

21

22

Parameters:

23

- filepath_or_buffer: str, path object, or file-like object

24

- sep: str, delimiter to use

25

- header: int, list of int, default 'infer', row(s) to use as column names

26

- names: array-like, list of column names to use

27

- index_col: int, str, sequence of int/str, or False, column(s) to use as row labels

28

- usecols: list-like or callable, return subset of columns

29

- dtype: type name or dict of column -> type, data type for data or columns

30

- na_values: scalar, str, list-like, or dict, additional strings to recognize as NA/NaN

31

- parse_dates: bool or list of int or names or list of lists or dict

32

- chunksize: int, return TextFileReader object for iteration

33

34

Returns:

35

DataFrame or TextFileReader

36

"""

37

38

def read_table(filepath_or_buffer, sep='\\t', delimiter=None, header='infer', names=None, index_col=None, usecols=None, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, date_format=None, dayfirst=False, cache_dates=True, iterator=False, chunksize=None, compression='infer', thousands=None, decimal='.', lineterminator=None, quotechar='"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, encoding_errors='strict', dialect=None, on_bad_lines='error', delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None, storage_options=None, dtype_backend='numpy_nullable'):

39

"""

40

Read general delimited file into DataFrame.

41

42

Similar to read_csv but with tab delimiter by default.

43

"""

44

45

def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, infer_nrows=100, dtype_backend='numpy_nullable', iterator=False, chunksize=None, **kwargs):

46

"""

47

Read a table of fixed-width formatted lines into DataFrame.

48

49

Parameters:

50

- filepath_or_buffer: str, path object, or file-like object

51

- colspecs: list of tuple (int, int) or 'infer', column specifications

52

- widths: list of int, width of each field

53

- infer_nrows: int, number of rows to consider when letting the parser determine colspecs

54

55

Returns:

56

DataFrame or TextFileReader

57

"""

58

```

59

60

### Excel Files

61

62

Read and write Microsoft Excel files (.xlsx, .xls).

63

64

```python { .api }

65

def read_excel(io, sheet_name=0, header=0, names=None, index_col=None, usecols=None, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, parse_dates=False, date_parser=None, date_format=None, thousands=None, decimal='.', comment=None, skipfooter=0, storage_options=None, dtype_backend='numpy_nullable', engine_kwargs=None):

66

"""

67

Read an Excel file into a pandas DataFrame.

68

69

Parameters:

70

- io: str, bytes, ExcelFile, xlrd.Book, path object, or file-like object

71

- sheet_name: str, int, list, or None, names of sheets or sheet positions to read

72

- header: int, list of int, default 0, row(s) to use as column names

73

- names: array-like, list of column names to use

74

- index_col: int, str, list of int, default None, column(s) to use as row labels

75

- usecols: str, list-like, or callable, return subset of columns

76

- dtype: type name or dict of column -> type, data type for data or columns

77

- skiprows: list-like, rows to skip at the beginning

78

- nrows: int, number of rows to parse

79

80

Returns:

81

DataFrame or dict of DataFrames

82

"""

83

84

class ExcelFile:

85

def __init__(self, path_or_buffer, engine=None, storage_options=None, engine_kwargs=None):

86

"""

87

Class for parsing tabular Excel sheets into DataFrame objects.

88

89

Parameters:

90

- path_or_buffer: str, bytes, path object, or file-like object

91

- engine: str, engine to use for reading ('openpyxl', 'xlrd', 'odf', 'pyxlsb')

92

"""

93

94

def parse(self, sheet_name=0, header=0, names=None, index_col=None, usecols=None, converters=None, dtype=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, verbose=False, parse_dates=False, date_parser=None, thousands=None, comment=None, skipfooter=0, convert_float=None, mangle_dupe_cols=True, dtype_backend='numpy_nullable', **kwds):

95

"""Parse specified sheet(s) into DataFrame."""

96

97

def sheet_names(self):

98

"""Property returning list of sheet names."""

99

100

class ExcelWriter:

101

def __init__(self, path, engine=None, date_format=None, datetime_format=None, mode='w', storage_options=None, if_sheet_exists=None, engine_kwargs=None):

102

"""

103

Class for writing DataFrame objects into Excel sheets.

104

105

Parameters:

106

- path: str or file-like object, file path or existing ExcelWriter

107

- engine: str, engine to use for writing ('openpyxl', 'xlsxwriter')

108

- mode: str, file mode to use (write or append)

109

"""

110

111

def close(self):

112

"""Close the contained workbook."""

113

114

def save(self):

115

"""Save workbook to disk."""

116

```

117

118

### JSON Files

119

120

Read and write JavaScript Object Notation (JSON) format.

121

122

```python { .api }

123

def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, convert_axes=None, convert_dates=True, keep_default_dates=True, precise_float=False, date_unit=None, encoding=None, encoding_errors='strict', lines=False, chunksize=None, compression='infer', nrows=None, storage_options=None, dtype_backend='numpy_nullable', engine='ujson'):

124

"""

125

Convert a JSON string to pandas object.

126

127

Parameters:

128

- path_or_buf: str, path object, file-like object, or None

129

- orient: str, indication of expected JSON string format

130

- typ: str, type of object to recover ('frame' or 'series')

131

- dtype: bool or dict, data type for data or columns

132

- convert_dates: bool or list of str, dates to parse

133

- lines: bool, read file as one JSON object per line

134

- chunksize: int, return JsonReader object for iteration

135

136

Returns:

137

Series, DataFrame, or JsonReader

138

"""

139

140

def json_normalize(data, record_path=None, meta=None, meta_prefix=None, record_prefix=None, errors='raise', sep='.', max_level=None):

141

"""

142

Normalize semi-structured JSON data into a flat table.

143

144

Parameters:

145

- data: dict or list of dicts, unserialized JSON objects

146

- record_path: str or list of str, path in each object to list of records

147

- meta: list of str, fields to use as metadata for each record

148

- sep: str, nested record separator

149

- max_level: int, max number of levels to normalize

150

151

Returns:

152

DataFrame

153

"""

154

```

155

156

### SQL Databases

157

158

Read and write data from SQL databases.

159

160

```python { .api }

161

def read_sql(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, columns=None, chunksize=None, dtype_backend='numpy_nullable', dtype=None):

162

"""

163

Read SQL query or database table into a DataFrame.

164

165

Parameters:

166

- sql: str or SQLAlchemy Selectable, SQL query or table name

167

- con: ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection

168

- index_col: str or list of str, column(s) to use as row labels

169

- coerce_float: bool, attempts to convert values of non-string, non-numeric objects to floating point

170

- params: list, tuple, mapping or None, parameters to pass to execute method

171

- parse_dates: list or dict, columns to parse as dates

172

- chunksize: int, number of rows to include in each chunk

173

174

Returns:

175

DataFrame or Iterator[DataFrame]

176

"""

177

178

def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, chunksize=None, dtype=None, dtype_backend='numpy_nullable'):

179

"""

180

Read SQL query into a DataFrame.

181

182

Parameters:

183

- sql: str or SQLAlchemy Selectable, SQL query to be executed

184

- con: ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection

185

186

Returns:

187

DataFrame or Iterator[DataFrame]

188

"""

189

190

def read_sql_table(table_name, con, schema=None, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None, dtype_backend='numpy_nullable'):

191

"""

192

Read SQL database table into a DataFrame.

193

194

Parameters:

195

- table_name: str, name of SQL table in database

196

- con: ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection

197

- schema: str, name of SQL schema in database to query

198

- columns: list, list of column names to select from SQL table

199

200

Returns:

201

DataFrame or Iterator[DataFrame]

202

"""

203

```

204

205

### Binary Formats

206

207

Read and write binary file formats for efficient storage.

208

209

```python { .api }

210

def read_pickle(filepath_or_buffer, compression='infer', storage_options=None):

211

"""

212

Load pickled pandas object (or any object) from file.

213

214

Parameters:

215

- filepath_or_buffer: str, path object, or file-like object

216

- compression: str or dict, compression type and options

217

218

Returns:

219

unpickled object

220

"""

221

222

def to_pickle(obj, filepath_or_buffer, compression='infer', protocol=5, storage_options=None):

223

"""

224

Pickle (serialize) object to file.

225

226

Parameters:

227

- obj: any object, object to pickle

228

- filepath_or_buffer: str, path object, or file-like object

229

- compression: str or dict, compression type and options

230

- protocol: int, pickle protocol to use

231

"""

232

233

def read_hdf(path_or_buf, key=None, mode='r', errors='strict', where=None, start=None, stop=None, columns=None, iterator=False, chunksize=None, dtype_backend='numpy_nullable', **kwargs):

234

"""

235

Read from the store, close it if we opened it.

236

237

Parameters:

238

- path_or_buf: str, path object, pandas.HDFStore, or file-like object

239

- key: str, identifier for group in store

240

- mode: str, mode to open file

241

- where: list of Term, criteria to select

242

- start: int, row number to start selection

243

- stop: int, row number to stop selection

244

- columns: list, list of columns to return

245

246

Returns:

247

DataFrame or Series

248

"""

249

250

class HDFStore:

251

def __init__(self, path, mode='r', complevel=None, complib=None, fletcher32=False, **kwargs):

252

"""

253

Dict-like IO interface for storing pandas objects in PyTables.

254

255

Parameters:

256

- path: str, file path to HDF5 file

257

- mode: str, mode to open file

258

- complevel: int, compression level (0-9)

259

- complib: str, compression library

260

"""

261

262

def put(self, key, value, format=None, index=True, append=False, complib=None, complevel=None, min_itemsize=None, nan_rep=None, data_columns=None, encoding=None, errors='strict', track_times=True, dropna=False):

263

"""Store object in HDFStore."""

264

265

def get(self, key):

266

"""Retrieve pandas object stored in file."""

267

268

def keys(self):

269

"""Return list of keys in the store."""

270

271

def close(self):

272

"""Close the store."""

273

274

def read_parquet(path, engine='auto', columns=None, storage_options=None, use_nullable_dtypes=False, dtype_backend='numpy_nullable', filesystem=None, filters=None, **kwargs):

275

"""

276

Load a parquet object, returning a DataFrame.

277

278

Parameters:

279

- path: str, path object, or file-like object

280

- engine: str, parquet library to use ('auto', 'pyarrow', 'fastparquet')

281

- columns: list, columns to read

282

- filters: list of tuples, row group filters

283

284

Returns:

285

DataFrame

286

"""

287

288

def read_feather(path, columns=None, use_threads=True, storage_options=None, dtype_backend='numpy_nullable'):

289

"""

290

Load a feather-format object into a DataFrame.

291

292

Parameters:

293

- path: str, path object, or file-like object

294

- columns: sequence, columns to read

295

- use_threads: bool, whether to parallelize reading

296

297

Returns:

298

DataFrame

299

"""

300

301

def read_orc(path, columns=None, dtype_backend='numpy_nullable', filesystem=None, **kwargs):

302

"""

303

Load an ORC object, returning a DataFrame.

304

305

Parameters:

306

- path: str, path object, or file-like object

307

- columns: list, columns to read

308

309

Returns:

310

DataFrame

311

"""

312

```

313

314

### Web and Markup

315

316

Read data from web sources and markup formats.

317

318

```python { .api }

319

def read_html(io, match='.+', header=None, index_col=None, skiprows=None, attrs=None, parse_dates=False, thousands=',', encoding=None, decimal='.', converters=None, na_values=None, keep_default_na=True, displayed_only=True, extract_links=None, dtype_backend='numpy_nullable', storage_options=None):

320

"""

321

Read HTML tables into a list of DataFrame objects.

322

323

Parameters:

324

- io: str, path object, file-like object, or raw string containing HTML

325

- match: str or compiled regex, set of table attributes to match

326

- header: int or list-like, row(s) to use to make column headers

327

- index_col: int or list-like, column(s) to use to make row index

328

- skiprows: int, list-like or slice, rows to skip

329

- attrs: dict, attributes to match in table tag

330

331

Returns:

332

list of DataFrames

333

"""

334

335

def read_xml(path_or_buffer, xpath='./*', namespaces=None, elems_only=False, attrs_only=False, names=None, dtype=None, converters=None, parse_dates=None, encoding='utf-8', parser='lxml', tree_builder=None, stylesheet=None, iterparse=None, compression='infer', storage_options=None, dtype_backend='numpy_nullable'):

336

"""

337

Read XML document into a DataFrame object.

338

339

Parameters:

340

- path_or_buffer: str, path object, or file-like object

341

- xpath: str, XPath expression to parse desired element(s)

342

- namespaces: dict, namespace prefixes and URIs

343

- elems_only: bool, parse child elements only

344

- attrs_only: bool, parse attributes only

345

- encoding: str, encoding of document

346

347

Returns:

348

DataFrame

349

"""

350

351

def read_clipboard(sep='\\s+', dtype_backend='numpy_nullable', **kwargs):

352

"""

353

Read text from clipboard and pass to read_csv.

354

355

Parameters:

356

- sep: str, delimiter for splitting clipboard contents

357

358

Returns:

359

DataFrame

360

"""

361

```

362

363

### Statistical Package Formats

364

365

Read data from statistical software packages.

366

367

```python { .api }

368

def read_stata(filepath_or_buffer, convert_dates=True, convert_categoricals=True, encoding=None, index_col=None, convert_missing=False, preserve_dtypes=True, columns=None, order_categoricals=True, chunksize=None, iterator=False, compression='infer', storage_options=None, dtype_backend='numpy_nullable'):

369

"""

370

Read Stata file into DataFrame.

371

372

Parameters:

373

- filepath_or_buffer: str, path object, or file-like object

374

- convert_dates: bool, convert date variables to pandas datetime

375

- convert_categoricals: bool, convert categorical variables to pandas Categorical

376

- encoding: str, encoding used to decode text strings

377

- preserve_dtypes: bool, preserve Stata data types

378

379

Returns:

380

DataFrame or StataReader

381

"""

382

383

def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, chunksize=None, iterator=False, compression='infer', storage_options=None, dtype_backend='numpy_nullable'):

384

"""

385

Read SAS files stored as either XPORT or SAS7BDAT format files.

386

387

Parameters:

388

- filepath_or_buffer: str, path object, or file-like object

389

- format: str, file format ('xport' or 'sas7bdat')

390

- encoding: str, encoding for text data

391

- chunksize: int, rows to read at a time

392

393

Returns:

394

DataFrame or SAS Reader

395

"""

396

397

def read_spss(path, usecols=None, convert_categoricals=True, dtype_backend='numpy_nullable', storage_options=None):

398

"""

399

Load an SPSS file from the file path, returning a DataFrame.

400

401

Parameters:

402

- path: str, path object, or file-like object

403

- usecols: list-like, return subset of columns

404

- convert_categoricals: bool, convert categorical variables to pandas Categorical

405

406

Returns:

407

DataFrame

408

"""

409

```

410

411

### Google BigQuery

412

413

Read data from Google BigQuery.

414

415

```python { .api }

416

def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, auth_local_webserver=True, dialect=None, location=None, configuration=None, credentials=None, use_bqstorage_api=None, max_results=None, progress_bar_type=None, dtype_backend='numpy_nullable'):

417

"""

418

Load data from Google BigQuery.

419

420

Parameters:

421

- query: str, SQL-Like Query to return data values

422

- project_id: str, Google BigQuery Account project ID

423

- index_col: str, name of result column to use for index

424

- col_order: list(str), list of BigQuery column names in desired order

425

- reauth: bool, force Google BigQuery to re-authenticate user

426

- dialect: str, SQL dialect for BigQuery ('legacy' or 'standard')

427

428

Returns:

429

DataFrame

430

"""

431

```

432

433

## Types

434

435

```python { .api }

436

# File reader classes for chunked reading

437

class TextFileReader:

438

"""Iterator for reading CSV files in chunks."""

439

def __iter__(self): ...

440

def __next__(self): ...

441

def get_chunk(self, size=None): ...

442

def close(self): ...

443

444

class JsonReader:

445

"""Iterator for reading JSON files in chunks."""

446

def __iter__(self): ...

447

def __next__(self): ...

448

def close(self): ...

449

450

# Storage format constants

451

class ExcelWriterMode:

452

WRITE = 'w'

453

APPEND = 'a'

454

455

class JSONOrient:

456

SPLIT = 'split'

457

RECORDS = 'records'

458

INDEX = 'index'

459

COLUMNS = 'columns'

460

VALUES = 'values'

461

TABLE = 'table'

462

```