0
# Schema-Based Data Generation
1
2
High-level tools for generating structured data with export capabilities, enabling bulk data creation for testing and development with schema-driven approaches.
3
4
## Capabilities
5
6
### Field Generation
7
8
```python { .api }
9
class Field:
10
"""Single field data generation with provider method access."""
11
12
def __init__(self, locale: Locale = Locale.DEFAULT):
13
"""Initialize Field with locale."""
14
15
def __call__(self, provider_method: str, *args, **kwargs) -> Any:
16
"""
17
Generate data using provider method.
18
19
Parameters:
20
- provider_method (str): Method path like 'person.full_name'
21
- *args, **kwargs: Arguments for the provider method
22
23
Returns:
24
Generated data value
25
26
Usage:
27
```python
28
field = Field(Locale.EN)
29
name = field('person.full_name')
30
email = field('internet.email')
31
age = field('person.age', minimum=18, maximum=65)
32
```
33
"""
34
35
def register_handler(self, field_name: str, field_handler: callable) -> None:
36
"""Register custom field handler."""
37
38
def handle(self, field_name: str = None):
39
"""Decorator for registering field handlers."""
40
41
def reseed(self, seed: int) -> None:
42
"""Reseed random generator."""
43
```
44
45
### Fieldset Generation
46
47
```python { .api }
48
class Fieldset(Field):
49
"""Multiple field data generation."""
50
51
def __call__(self, provider_method: str, *args, **kwargs) -> list[Any]:
52
"""Generate list of values using provider method."""
53
```
54
55
### Schema Generation and Export
56
57
```python { .api }
58
class Schema:
59
"""Schema-based bulk data generation with export capabilities."""
60
61
def __init__(self, schema: callable, iterations: int = 1):
62
"""
63
Initialize Schema with schema function.
64
65
Parameters:
66
- schema (callable): Function returning data structure template
67
- iterations (int): Number of records to generate
68
"""
69
70
def create(self) -> list[dict]:
71
"""
72
Generate list of schema instances.
73
74
Returns:
75
list[dict]: Generated data records
76
"""
77
78
def to_csv(self, file_path: str, **kwargs) -> None:
79
"""
80
Export generated data to CSV file.
81
82
Parameters:
83
- file_path (str): Output CSV file path
84
- **kwargs: Additional CSV writer arguments
85
"""
86
87
def to_json(self, file_path: str, **kwargs) -> None:
88
"""
89
Export generated data to JSON file.
90
91
Parameters:
92
- file_path (str): Output JSON file path
93
- **kwargs: Additional JSON arguments
94
"""
95
96
def to_pickle(self, file_path: str, **kwargs) -> None:
97
"""
98
Export generated data to pickle file.
99
100
Parameters:
101
- file_path (str): Output pickle file path
102
- **kwargs: Additional pickle arguments
103
"""
104
105
def __iter__(self):
106
"""Iterator support for schema instances."""
107
108
def __next__(self):
109
"""Next method for iterator."""
110
```
111
112
## Usage Examples
113
114
### Basic Field Usage
115
116
```python
117
from mimesis import Field
118
from mimesis.locales import Locale
119
120
field = Field(Locale.EN)
121
122
# Generate individual fields
123
name = field('person.full_name')
124
email = field('internet.email')
125
age = field('person.age', minimum=21, maximum=65)
126
city = field('address.city')
127
```
128
129
### Schema Definition and Generation
130
131
```python
132
from mimesis import Field, Schema
133
from mimesis.locales import Locale
134
135
# Define schema function
136
field = Field(Locale.EN)
137
138
def user_schema():
139
return {
140
'id': field('increment'),
141
'name': field('person.full_name'),
142
'email': field('internet.email'),
143
'age': field('person.age', minimum=18, maximum=65),
144
'address': {
145
'street': field('address.address'),
146
'city': field('address.city'),
147
'state': field('address.state'),
148
'postal_code': field('address.postal_code')
149
},
150
'profile': {
151
'occupation': field('person.occupation'),
152
'company': field('finance.company'),
153
'salary': field('finance.price', minimum=30000, maximum=150000)
154
}
155
}
156
157
# Create schema and generate data
158
schema = Schema(schema=user_schema, iterations=100)
159
users = schema.create() # List of 100 user records
160
```
161
162
### Data Export
163
164
```python
165
from mimesis import Field, Schema
166
167
field = Field()
168
schema = Schema(
169
schema=lambda: {
170
'name': field('person.full_name'),
171
'email': field('internet.email'),
172
'company': field('finance.company')
173
},
174
iterations=1000
175
)
176
177
# Export to different formats
178
schema.to_csv('users.csv')
179
schema.to_json('users.json', indent=2)
180
schema.to_pickle('users.pkl')
181
```
182
183
### Custom Field Handlers
184
185
```python
186
from mimesis import Field
187
import random
188
189
field = Field()
190
191
# Register custom handler
192
@field.handle('custom_score')
193
def custom_score_handler():
194
return random.randint(0, 100)
195
196
# Alternative registration
197
def status_handler():
198
return random.choice(['active', 'inactive', 'pending'])
199
200
field.register_handler('status', status_handler)
201
202
# Use custom handlers in schema
203
def record_schema():
204
return {
205
'name': field('person.full_name'),
206
'score': field('custom_score'),
207
'status': field('status')
208
}
209
```
210
211
### Complex Nested Schema
212
213
```python
214
from mimesis import Field, Schema
215
from mimesis.locales import Locale
216
217
field = Field(Locale.EN)
218
219
def complex_schema():
220
return {
221
'user': {
222
'id': field('increment'),
223
'personal': {
224
'name': field('person.full_name'),
225
'email': field('internet.email'),
226
'birthdate': field('datetime.date', start=1980, end=2000),
227
'contacts': [
228
field('person.phone_number') for _ in range(2)
229
]
230
},
231
'address': {
232
'primary': {
233
'street': field('address.address'),
234
'city': field('address.city'),
235
'coordinates': field('address.coordinates')
236
},
237
'billing': {
238
'street': field('address.address'),
239
'city': field('address.city')
240
}
241
},
242
'preferences': {
243
'languages': [field('person.language') for _ in range(3)],
244
'timezone': field('datetime.timezone')
245
}
246
},
247
'metadata': {
248
'created_at': field('datetime.datetime'),
249
'user_agent': field('internet.user_agent'),
250
'ip_address': field('internet.ip_v4')
251
}
252
}
253
254
schema = Schema(schema=complex_schema, iterations=50)
255
complex_data = schema.create()
256
```
257
258
### Fieldset for Multiple Values
259
260
```python
261
from mimesis import Fieldset
262
263
# Generate multiple values at once
264
fieldset = Fieldset()
265
names = fieldset('person.full_name', quantity=10) # List of 10 names
266
emails = fieldset('internet.email', quantity=5) # List of 5 emails
267
```
268
269
### Iterator Pattern
270
271
```python
272
from mimesis import Field, Schema
273
274
field = Field()
275
schema = Schema(
276
schema=lambda: {
277
'id': field('increment'),
278
'name': field('person.full_name')
279
},
280
iterations=1000
281
)
282
283
# Use as iterator for memory efficiency
284
for record in schema:
285
# Process one record at a time
286
print(record['name'])
287
if record['id'] > 10:
288
break
289
```