or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

archive-reading.mdcli-tools.mdhttp-capture.mdhttp-headers.mdindex.mdstream-processing.mdtime-utilities.mdwarc-writing.md

time-utilities.mddocs/

0

# Time Utilities

1

2

Comprehensive time handling functions for web archive timestamps with support for multiple date formats, timezone handling, and conversion between various timestamp representations used in WARC files.

3

4

## Capabilities

5

6

### Date and Time Conversion Functions

7

8

Complete set of functions for converting between different date and time formats commonly used in web archiving.

9

10

```python { .api }

11

def iso_date_to_datetime(string, tz_aware=False):

12

"""

13

Parse ISO 8601 date string to datetime object.

14

15

Args:

16

string (str): ISO 8601 date string (e.g., '2021-01-01T12:00:00Z')

17

tz_aware (bool): Whether to return timezone-aware datetime

18

19

Returns:

20

datetime: Parsed datetime object

21

"""

22

23

def http_date_to_datetime(string, tz_aware=False):

24

"""

25

Parse HTTP date string to datetime object.

26

27

Args:

28

string (str): HTTP date string (e.g., 'Fri, 01 Jan 2021 12:00:00 GMT')

29

tz_aware (bool): Whether to return timezone-aware datetime

30

31

Returns:

32

datetime: Parsed datetime object

33

"""

34

35

def datetime_to_http_date(the_datetime):

36

"""

37

Convert datetime to HTTP date string format.

38

39

Args:

40

the_datetime (datetime): Datetime object to convert

41

42

Returns:

43

str: HTTP date string (e.g., 'Fri, 01 Jan 2021 12:00:00 GMT')

44

"""

45

46

def datetime_to_iso_date(the_datetime, use_micros=False):

47

"""

48

Convert datetime to ISO 8601 date string.

49

50

Args:

51

the_datetime (datetime): Datetime object to convert

52

use_micros (bool): Whether to include microseconds in output

53

54

Returns:

55

str: ISO 8601 date string

56

"""

57

58

def datetime_to_timestamp(the_datetime):

59

"""

60

Convert datetime to 14-digit timestamp format.

61

62

Args:

63

the_datetime (datetime): Datetime object to convert

64

65

Returns:

66

str: 14-digit timestamp (YYYYMMDDHHMMSS)

67

"""

68

```

69

70

### Timestamp Functions

71

72

Functions for working with numeric timestamp formats used in web archiving.

73

74

```python { .api }

75

def timestamp_now():

76

"""

77

Get current timestamp in 14-digit format.

78

79

Returns:

80

str: Current timestamp (YYYYMMDDHHMMSS)

81

"""

82

83

def timestamp20_now():

84

"""

85

Get current timestamp in 20-digit format with microseconds.

86

87

Returns:

88

str: Current timestamp (YYYYMMDDHHMMSSNNNNNN)

89

"""

90

91

def iso_date_to_timestamp(string):

92

"""

93

Convert ISO 8601 date string to 14-digit timestamp.

94

95

Args:

96

string (str): ISO 8601 date string

97

98

Returns:

99

str: 14-digit timestamp

100

"""

101

102

def timestamp_to_iso_date(string):

103

"""

104

Convert 14-digit timestamp to ISO 8601 date string.

105

106

Args:

107

string (str): 14-digit timestamp

108

109

Returns:

110

str: ISO 8601 date string

111

"""

112

113

def http_date_to_timestamp(string):

114

"""

115

Convert HTTP date string to 14-digit timestamp.

116

117

Args:

118

string (str): HTTP date string

119

120

Returns:

121

str: 14-digit timestamp

122

"""

123

124

def timestamp_to_http_date(string):

125

"""

126

Convert 14-digit timestamp to HTTP date string.

127

128

Args:

129

string (str): 14-digit timestamp

130

131

Returns:

132

str: HTTP date string

133

"""

134

135

def timestamp_to_datetime(string, tz_aware=False):

136

"""

137

Parse 14-digit timestamp to datetime object.

138

139

Args:

140

string (str): 14-digit timestamp

141

tz_aware (bool): Whether to return timezone-aware datetime

142

143

Returns:

144

datetime: Parsed datetime object

145

"""

146

147

def timestamp_to_sec(string):

148

"""

149

Convert 14-digit timestamp to seconds since Unix epoch.

150

151

Args:

152

string (str): 14-digit timestamp

153

154

Returns:

155

float: Seconds since Unix epoch

156

"""

157

158

def sec_to_timestamp(secs):

159

"""

160

Convert seconds since Unix epoch to 14-digit timestamp.

161

162

Args:

163

secs (float): Seconds since Unix epoch

164

165

Returns:

166

str: 14-digit timestamp

167

"""

168

```

169

170

### Timestamp Formatting and Padding

171

172

Functions for formatting and padding timestamps to specific lengths.

173

174

```python { .api }

175

def pad_timestamp(string, pad_str=PAD_6_UP):

176

"""

177

Pad timestamp to specified length using padding string.

178

179

Args:

180

string (str): Timestamp to pad

181

pad_str (str): Padding string pattern to use

182

183

Returns:

184

str: Padded timestamp

185

"""

186

```

187

188

## Usage Examples

189

190

### Basic Date Conversions

191

192

```python

193

from warcio.timeutils import (

194

iso_date_to_datetime, datetime_to_iso_date,

195

http_date_to_datetime, datetime_to_http_date,

196

datetime_to_timestamp, timestamp_to_datetime

197

)

198

from datetime import datetime

199

200

# Parse ISO date

201

iso_string = "2021-01-01T12:00:00Z"

202

dt = iso_date_to_datetime(iso_string)

203

print(f"Parsed datetime: {dt}")

204

205

# Convert back to ISO

206

iso_back = datetime_to_iso_date(dt)

207

print(f"Back to ISO: {iso_back}")

208

209

# Parse HTTP date

210

http_string = "Fri, 01 Jan 2021 12:00:00 GMT"

211

dt_http = http_date_to_datetime(http_string)

212

print(f"HTTP datetime: {dt_http}")

213

214

# Convert to HTTP format

215

http_back = datetime_to_http_date(dt_http)

216

print(f"Back to HTTP: {http_back}")

217

218

# Convert to 14-digit timestamp

219

timestamp = datetime_to_timestamp(dt)

220

print(f"14-digit timestamp: {timestamp}")

221

222

# Parse timestamp back to datetime

223

dt_from_ts = timestamp_to_datetime(timestamp)

224

print(f"From timestamp: {dt_from_ts}")

225

```

226

227

### Current Timestamps

228

229

```python

230

from warcio.timeutils import timestamp_now, timestamp20_now

231

232

# Get current timestamp in different formats

233

current_14 = timestamp_now()

234

current_20 = timestamp20_now()

235

236

print(f"Current 14-digit: {current_14}")

237

print(f"Current 20-digit: {current_20}")

238

239

# Example outputs:

240

# Current 14-digit: 20210101120000

241

# Current 20-digit: 20210101120000123456

242

```

243

244

### Cross-Format Conversions

245

246

```python

247

from warcio.timeutils import (

248

iso_date_to_timestamp, timestamp_to_iso_date,

249

http_date_to_timestamp, timestamp_to_http_date

250

)

251

252

# Direct conversions without intermediate datetime objects

253

iso_date = "2021-01-01T12:00:00Z"

254

timestamp = iso_date_to_timestamp(iso_date)

255

print(f"ISO to timestamp: {iso_date} -> {timestamp}")

256

257

# Convert timestamp back to ISO

258

iso_back = timestamp_to_iso_date(timestamp)

259

print(f"Timestamp to ISO: {timestamp} -> {iso_back}")

260

261

# HTTP date to timestamp

262

http_date = "Fri, 01 Jan 2021 12:00:00 GMT"

263

timestamp_from_http = http_date_to_timestamp(http_date)

264

print(f"HTTP to timestamp: {http_date} -> {timestamp_from_http}")

265

266

# Timestamp to HTTP date

267

http_back = timestamp_to_http_date(timestamp_from_http)

268

print(f"Timestamp to HTTP: {timestamp_from_http} -> {http_back}")

269

```

270

271

### Unix Epoch Conversions

272

273

```python

274

from warcio.timeutils import timestamp_to_sec, sec_to_timestamp

275

import time

276

277

# Convert 14-digit timestamp to Unix seconds

278

timestamp = "20210101120000"

279

unix_seconds = timestamp_to_sec(timestamp)

280

print(f"Timestamp to seconds: {timestamp} -> {unix_seconds}")

281

282

# Convert Unix seconds back to timestamp

283

timestamp_back = sec_to_timestamp(unix_seconds)

284

print(f"Seconds to timestamp: {unix_seconds} -> {timestamp_back}")

285

286

# Work with current Unix time

287

current_time = time.time()

288

current_timestamp = sec_to_timestamp(current_time)

289

print(f"Current Unix time: {current_time}")

290

print(f"As timestamp: {current_timestamp}")

291

```

292

293

### Timezone Handling

294

295

```python

296

from warcio.timeutils import (

297

iso_date_to_datetime, http_date_to_datetime,

298

timestamp_to_datetime

299

)

300

301

# Parse with timezone awareness

302

iso_with_tz = "2021-01-01T12:00:00+05:00"

303

dt_tz_aware = iso_date_to_datetime(iso_with_tz, tz_aware=True)

304

print(f"Timezone-aware datetime: {dt_tz_aware}")

305

print(f"Timezone info: {dt_tz_aware.tzinfo}")

306

307

# Parse without timezone awareness (default)

308

dt_naive = iso_date_to_datetime(iso_with_tz, tz_aware=False)

309

print(f"Naive datetime: {dt_naive}")

310

print(f"Timezone info: {dt_naive.tzinfo}")

311

312

# Same applies to other parsing functions

313

http_date = "Fri, 01 Jan 2021 12:00:00 GMT"

314

http_tz_aware = http_date_to_datetime(http_date, tz_aware=True)

315

print(f"HTTP timezone-aware: {http_tz_aware}")

316

317

timestamp = "20210101120000"

318

ts_tz_aware = timestamp_to_datetime(timestamp, tz_aware=True)

319

print(f"Timestamp timezone-aware: {ts_tz_aware}")

320

```

321

322

### Microsecond Precision

323

324

```python

325

from warcio.timeutils import datetime_to_iso_date, timestamp20_now

326

from datetime import datetime

327

328

# Create datetime with microseconds

329

dt_with_micros = datetime(2021, 1, 1, 12, 0, 0, 123456)

330

331

# Convert to ISO with microseconds

332

iso_with_micros = datetime_to_iso_date(dt_with_micros, use_micros=True)

333

print(f"ISO with microseconds: {iso_with_micros}")

334

335

# Convert without microseconds (default)

336

iso_without_micros = datetime_to_iso_date(dt_with_micros, use_micros=False)

337

print(f"ISO without microseconds: {iso_without_micros}")

338

339

# 20-digit timestamp includes microseconds

340

timestamp_20 = timestamp20_now()

341

print(f"20-digit timestamp: {timestamp_20}")

342

print(f" Date part: {timestamp_20[:8]}")

343

print(f" Time part: {timestamp_20[8:14]}")

344

print(f" Microsec part: {timestamp_20[14:]}")

345

```

346

347

### Timestamp Padding

348

349

```python

350

from warcio.timeutils import pad_timestamp

351

352

# Example of timestamp padding (actual padding constants depend on implementation)

353

short_timestamp = "202101" # Partial timestamp

354

padded = pad_timestamp(short_timestamp)

355

print(f"Padded timestamp: {short_timestamp} -> {padded}")

356

357

# This function is typically used internally for normalizing timestamps

358

# to consistent lengths for sorting and comparison

359

```

360

361

### WARC Date Creation

362

363

```python

364

from warcio.timeutils import datetime_to_iso_date, timestamp_now

365

from datetime import datetime

366

367

# Create WARC-Date header value (ISO format)

368

current_time = datetime.utcnow()

369

warc_date = datetime_to_iso_date(current_time)

370

print(f"WARC-Date: {warc_date}")

371

372

# Alternative using timestamp function

373

warc_date_alt = timestamp_now()

374

print(f"WARC-Date (timestamp): {warc_date_alt}")

375

376

# For WARC files, ISO format is preferred:

377

# WARC-Date: 2021-01-01T12:00:00Z

378

```

379

380

### Batch Date Processing

381

382

```python

383

from warcio.timeutils import iso_date_to_timestamp, timestamp_to_http_date

384

385

# Process multiple dates

386

dates = [

387

"2021-01-01T12:00:00Z",

388

"2021-06-15T14:30:45Z",

389

"2021-12-31T23:59:59Z"

390

]

391

392

# Convert to timestamps for sorting

393

timestamps = [iso_date_to_timestamp(date) for date in dates]

394

print("Timestamps for sorting:")

395

for orig, ts in zip(dates, timestamps):

396

print(f" {orig} -> {ts}")

397

398

# Convert timestamps to HTTP dates for headers

399

http_dates = [timestamp_to_http_date(ts) for ts in timestamps]

400

print("\nHTTP date headers:")

401

for ts, http in zip(timestamps, http_dates):

402

print(f" {ts} -> {http}")

403

```