or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

autorest-processing.mdconfiguration.mddocument-processing.mdindex.mdmessage-handling.md

document-processing.mddocs/

0

# Document Processing

1

2

Utilities for processing OpenAPI specifications, literate configuration documents, and various input formats with automatic type detection and conversion. AutoRest Core provides comprehensive document processing capabilities for handling different specification formats and configurations.

3

4

## Capabilities

5

6

### Document Type Identification

7

8

Automatic detection of document types based on content analysis.

9

10

```typescript { .api }

11

/**

12

* Determines the document type based on content

13

* Analyzes document structure to classify the type

14

* @param content - Document content to analyze

15

* @returns Promise resolving to DocumentType classification

16

*/

17

function IdentifyDocument(content: string): Promise<DocumentType>;

18

19

/**

20

* Document type enumeration

21

* Classifies different types of documents AutoRest can process

22

*/

23

enum DocumentType {

24

/** OpenAPI 2.0 specification (Swagger) */

25

OpenAPI2 = "OpenAPI2",

26

27

/** OpenAPI 3.0 specification */

28

OpenAPI3 = "OpenAPI3",

29

30

/** Literate configuration document with AutoRest directives */

31

LiterateConfiguration = "LiterateConfiguration",

32

33

/** Unknown or unsupported document type */

34

Unknown = "Unknown"

35

}

36

```

37

38

### Document Format Support

39

40

Support for multiple input and output formats.

41

42

```typescript { .api }

43

/**

44

* Document format enumeration

45

* Defines supported document formats

46

*/

47

enum DocumentFormat {

48

/** Markdown format for literate documents */

49

Markdown = "markdown",

50

51

/** YAML format for specifications */

52

Yaml = "yaml",

53

54

/** JSON format for specifications */

55

Json = "json",

56

57

/** Unknown or unsupported format */

58

Unknown = "unknown"

59

}

60

61

/**

62

* Document extension mappings

63

* Maps file extensions to document formats

64

*/

65

const DocumentExtension: {

66

yaml: DocumentFormat.Yaml;

67

yml: DocumentFormat.Yaml;

68

json: DocumentFormat.Json;

69

md: DocumentFormat.Markdown;

70

markdown: DocumentFormat.Markdown;

71

};

72

73

/**

74

* Document pattern definitions

75

* Glob patterns for different document types

76

*/

77

const DocumentPatterns: {

78

yaml: string[];

79

json: string[];

80

markdown: string[];

81

all: string[];

82

};

83

```

84

85

### Literate Document Processing

86

87

Conversion utilities for literate documents containing embedded specifications.

88

89

```typescript { .api }

90

/**

91

* Processes a literate document and returns JSON representation

92

* Converts literate markdown/YAML documents to JSON format

93

* @param content - Literate document content

94

* @returns JSON string representation of the document

95

*/

96

function LiterateToJson(content: string): Promise<string>;

97

98

/**

99

* Checks if document is a literate configuration document

100

* Detects AutoRest magic string in document content

101

* @param content - Document content to check

102

* @returns Promise resolving to boolean indicating if it's a configuration document

103

*/

104

function IsConfigurationDocument(content: string): Promise<boolean>;

105

106

/**

107

* Checks if file extension indicates a configuration document

108

* Validates file extensions for configuration file types

109

* @param extension - File extension to check (without leading dot)

110

* @returns Promise resolving to boolean indicating if extension is valid for configuration

111

*/

112

function IsConfigurationExtension(extension: string): Promise<boolean>;

113

```

114

115

### OpenAPI Document Validation

116

117

Validation utilities specifically for OpenAPI specifications.

118

119

```typescript { .api }

120

/**

121

* Checks if document is an OpenAPI specification

122

* Validates document structure for OpenAPI compliance

123

* @param content - Document content to validate

124

* @returns Promise resolving to boolean indicating OpenAPI compliance

125

*/

126

function IsOpenApiDocument(content: string): Promise<boolean>;

127

128

/**

129

* Checks if file extension indicates an OpenAPI document

130

* Validates file extensions for OpenAPI specification types

131

* @param extension - File extension to check (without leading dot)

132

* @returns Promise resolving to boolean indicating if extension is valid for OpenAPI

133

*/

134

function IsOpenApiExtension(extension: string): Promise<boolean>;

135

```

136

137

138

**Usage Examples:**

139

140

```typescript

141

import {

142

IdentifyDocument,

143

LiterateToJson,

144

IsOpenApiDocument,

145

IsConfigurationDocument,

146

DocumentType,

147

DocumentFormat,

148

DocumentExtension

149

} from "@microsoft.azure/autorest-core";

150

151

// Document type identification

152

const swaggerContent = JSON.stringify({

153

swagger: "2.0",

154

info: { title: "Pet Store API", version: "1.0" },

155

paths: {}

156

});

157

158

const docType = await IdentifyDocument(swaggerContent);

159

console.log("Document type:", docType); // DocumentType.OpenAPI2

160

161

// OpenAPI 3.0 detection

162

const openapi3Content = JSON.stringify({

163

openapi: "3.0.0",

164

info: { title: "Pet Store API", version: "1.0" },

165

paths: {}

166

});

167

168

const openapi3Type = await IdentifyDocument(openapi3Content);

169

console.log("Document type:", openapi3Type); // DocumentType.OpenAPI3

170

171

// Configuration document detection

172

const configContent = `

173

# AutoRest Configuration

174

175

> see https://aka.ms/autorest

176

177

\`\`\`yaml

178

input-file: swagger.json

179

output-folder: ./generated

180

\`\`\`

181

`;

182

183

const isConfig = await IsConfigurationDocument(configContent);

184

console.log("Is configuration:", isConfig); // true

185

186

const configType = await IdentifyDocument(configContent);

187

console.log("Config document type:", configType); // DocumentType.LiterateConfiguration

188

```

189

190

### Literate Document Conversion

191

192

```typescript

193

// Convert literate document to JSON

194

const literateSwagger = `

195

# Pet Store API

196

197

This is a sample Pet Store API specification.

198

199

\`\`\`yaml

200

swagger: "2.0"

201

info:

202

title: Pet Store API

203

version: "1.0"

204

host: petstore.swagger.io

205

basePath: /v2

206

paths:

207

/pets:

208

get:

209

summary: List pets

210

responses:

211

200:

212

description: A list of pets

213

\`\`\`

214

`;

215

216

try {

217

const jsonContent = await LiterateToJson(literateSwagger);

218

const parsedSpec = JSON.parse(jsonContent);

219

console.log("Converted specification:", parsedSpec);

220

} catch (error) {

221

console.error("Conversion failed:", error);

222

}

223

```

224

225

### File Extension Validation

226

227

```typescript

228

// Validate file extensions

229

const files = [

230

"api.json",

231

"swagger.yaml",

232

"readme.md",

233

"config.markdown",

234

"spec.yml",

235

"data.txt"

236

];

237

238

for (const file of files) {

239

const extension = file.split('.').pop() || '';

240

241

const isOpenApi = await IsOpenApiExtension(extension);

242

const isConfig = await IsConfigurationExtension(extension);

243

244

console.log(`${file}:`);

245

console.log(` OpenAPI: ${isOpenApi}`);

246

console.log(` Configuration: ${isConfig}`);

247

console.log(` Format: ${DocumentExtension[extension] || 'unknown'}`);

248

}

249

```

250

251

### Document Processing Pipeline

252

253

```typescript

254

// Complete document processing workflow

255

async function processDocument(content: string, filename: string): Promise<void> {

256

// Identify document type

257

const docType = await IdentifyDocument(content);

258

console.log(`Processing ${filename} as ${docType}`);

259

260

switch (docType) {

261

case DocumentType.OpenAPI2:

262

console.log("Processing OpenAPI 2.0 specification");

263

const spec2 = JSON.parse(content);

264

console.log(`API Title: ${spec2.info.title}`);

265

console.log(`API Version: ${spec2.info.version}`);

266

break;

267

268

case DocumentType.OpenAPI3:

269

console.log("Processing OpenAPI 3.0 specification");

270

const spec3 = JSON.parse(content);

271

console.log(`API Title: ${spec3.info.title}`);

272

console.log(`API Version: ${spec3.info.version}`);

273

break;

274

275

case DocumentType.LiterateConfiguration:

276

console.log("Processing literate configuration");

277

try {

278

const jsonContent = await LiterateToJson(content);

279

if (jsonContent) {

280

const extractedSpec = JSON.parse(jsonContent);

281

console.log("Extracted specification from literate document");

282

console.log(`Title: ${extractedSpec.info?.title}`);

283

}

284

} catch (error) {

285

console.log("No extractable specification found");

286

}

287

break;

288

289

case DocumentType.Unknown:

290

console.log("Unknown document type - skipping");

291

break;

292

}

293

}

294

295

// Example usage

296

const documents = [

297

{ content: swaggerContent, filename: "swagger.json" },

298

{ content: openapi3Content, filename: "openapi3.json" },

299

{ content: configContent, filename: "readme.md" }

300

];

301

302

for (const doc of documents) {

303

await processDocument(doc.content, doc.filename);

304

}

305

```

306

307

### Batch Document Processing

308

309

```typescript

310

// Process multiple documents with type detection

311

async function batchProcessDocuments(

312

documents: Array<{ content: string; uri: string }>

313

): Promise<void> {

314

const results = {

315

openapi2: 0,

316

openapi3: 0,

317

configurations: 0,

318

unknown: 0

319

};

320

321

for (const doc of documents) {

322

const docType = await IdentifyDocument(doc.content);

323

324

switch (docType) {

325

case DocumentType.OpenAPI2:

326

results.openapi2++;

327

console.log(`πŸ“„ OpenAPI 2.0: ${doc.uri}`);

328

break;

329

case DocumentType.OpenAPI3:

330

results.openapi3++;

331

console.log(`πŸ“„ OpenAPI 3.0: ${doc.uri}`);

332

break;

333

case DocumentType.LiterateConfiguration:

334

results.configurations++;

335

console.log(`βš™οΈ Configuration: ${doc.uri}`);

336

break;

337

default:

338

results.unknown++;

339

console.log(`❓ Unknown: ${doc.uri}`);

340

break;

341

}

342

}

343

344

console.log("\nDocument Processing Summary:");

345

console.log(`OpenAPI 2.0 specifications: ${results.openapi2}`);

346

console.log(`OpenAPI 3.0 specifications: ${results.openapi3}`);

347

console.log(`Configuration documents: ${results.configurations}`);

348

console.log(`Unknown documents: ${results.unknown}`);

349

}

350

```

351

352

### Document Format Detection

353

354

```typescript

355

// Detect format from content

356

function detectDocumentFormat(content: string): DocumentFormat {

357

try {

358

JSON.parse(content);

359

return DocumentFormat.Json;

360

} catch {

361

// Not JSON, try YAML patterns

362

if (content.includes('swagger:') || content.includes('openapi:') || content.includes('---')) {

363

return DocumentFormat.Yaml;

364

}

365

366

// Check for markdown patterns

367

if (content.includes('#') || content.includes('```')) {

368

return DocumentFormat.Markdown;

369

}

370

371

return DocumentFormat.Unknown;

372

}

373

}

374

375

// Use format detection

376

const testContent = `

377

swagger: "2.0"

378

info:

379

title: API

380

version: "1.0"

381

`;

382

383

const format = detectDocumentFormat(testContent);

384

console.log("Detected format:", format); // DocumentFormat.Yaml

385

```

386

387

### Advanced Document Analysis

388

389

```typescript

390

// Comprehensive document analysis

391

async function analyzeDocument(content: string): Promise<{

392

type: DocumentType;

393

format: DocumentFormat;

394

isOpenApi: boolean;

395

isConfiguration: boolean;

396

metadata?: any;

397

}> {

398

const [type, isOpenApi, isConfiguration] = await Promise.all([

399

IdentifyDocument(content),

400

IsOpenApiDocument(content),

401

IsConfigurationDocument(content)

402

]);

403

404

const format = detectDocumentFormat(content);

405

406

let metadata;

407

if (type === DocumentType.OpenAPI2 || type === DocumentType.OpenAPI3) {

408

try {

409

const spec = JSON.parse(type === DocumentType.LiterateConfiguration

410

? await LiterateToJson(content)

411

: content);

412

metadata = {

413

title: spec.info?.title,

414

version: spec.info?.version,

415

host: spec.host || spec.servers?.[0]?.url,

416

pathCount: Object.keys(spec.paths || {}).length

417

};

418

} catch (error) {

419

// Ignore parsing errors for metadata

420

}

421

}

422

423

return {

424

type,

425

format,

426

isOpenApi,

427

isConfiguration,

428

metadata

429

};

430

}

431

432

// Example usage

433

const analysis = await analyzeDocument(swaggerContent);

434

console.log("Document analysis:", analysis);

435

```

436

437