Tessl Tile for pypi/openai@2.8.1

or run

npx @tessl/cli init

moderations.mddocs/

0
# Moderations
1

2
Check content against OpenAI's usage policies to detect potentially harmful content across multiple categories including hate speech, violence, sexual content, and self-harm. Supports both text and image inputs for multi-modal moderation.
3

4
## Capabilities
5

6
### Create Moderation
7

8
Classify text and/or image content for policy violations.
9

10
```python { .api }
11
def create(
12
    self,
13
    *,
14
    input: str | list[str] | list[ModerationMultiModalInputParam],
15
    model: str | ModerationModel | Omit = omit,
16
    extra_headers: dict[str, str] | None = None,
17
    extra_query: dict[str, object] | None = None,
18
    extra_body: dict[str, object] | None = None,
19
    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
20
) -> ModerationCreateResponse:
21
    """
22
    Classify text and/or image inputs against OpenAI's usage policies.
23

24
    Args:
25
        input: Content to classify. Can be:
26
            - Single string: "Text to moderate"
27
            - List of strings: ["Text 1", "Text 2"]
28
            - List of multi-modal inputs: [{"type": "text", "text": "..."},
29
              {"type": "image_url", "image_url": {"url": "..."}}]
30
            Maximum 32,768 characters per text input.
31

32
        model: Moderation model to use. Options:
33
            - "text-moderation-latest": Latest text model, automatically updated
34
            - "text-moderation-stable": Stable text model, less frequent updates
35
            - "omni-moderation-latest": Latest multi-modal model (supports text + images, default)
36
            - "omni-moderation-2024-09-26": Specific omni model version
37

38
        extra_headers: Additional HTTP headers.
39
        extra_query: Additional query parameters.
40
        extra_body: Additional JSON fields.
41
        timeout: Request timeout in seconds.
42

43
    Returns:
44
        ModerationCreateResponse: Contains flagged status and category scores
45
            for each input.
46

47
    Raises:
48
        BadRequestError: Input exceeds maximum length
49
        AuthenticationError: Invalid API key
50
    """
51
```
52

53
Usage examples:
54

55
```python
56
from openai import OpenAI
57

58
client = OpenAI()
59

60
# Check single text
61
response = client.moderations.create(
62
    input="I want to hurt someone"
63
)
64

65
result = response.results[0]
66
print(f"Flagged: {result.flagged}")
67

68
if result.flagged:
69
    print("Violated categories:")
70
    for category, flagged in result.categories.model_dump().items():
71
        if flagged:
72
            score = getattr(result.category_scores, category)
73
            print(f"  {category}: {score:.4f}")
74

75
# Check multiple texts
76
texts = [
77
    "Hello, how are you?",
78
    "This is inappropriate content",
79
    "What's the weather like today?"
80
]
81

82
response = client.moderations.create(input=texts)
83

84
for i, result in enumerate(response.results):
85
    print(f"Text {i + 1}: {'Flagged' if result.flagged else 'Safe'}")
86

87
# Use latest omni model
88
response = client.moderations.create(
89
    model="omni-moderation-latest",
90
    input="Check this message for violations"
91
)
92

93
# Use stable model for consistent behavior
94
response = client.moderations.create(
95
    model="text-moderation-stable",
96
    input="Testing moderation"
97
)
98

99
# Multi-modal moderation with text and images
100
response = client.moderations.create(
101
    model="omni-moderation-latest",
102
    input=[
103
        {"type": "text", "text": "Check this message"},
104
        {
105
            "type": "image_url",
106
            "image_url": {"url": "https://example.com/image.jpg"}
107
        }
108
    ]
109
)
110

111
# Moderate image from base64
112
import base64
113

114
with open("image.jpg", "rb") as f:
115
    image_data = base64.b64encode(f.read()).decode()
116

117
response = client.moderations.create(
118
    model="omni-moderation-latest",
119
    input=[
120
        {
121
            "type": "image_url",
122
            "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
123
        }
124
    ]
125
)
126

127
# Detailed category analysis
128
response = client.moderations.create(
129
    input="Potentially problematic text"
130
)
131

132
result = response.results[0]
133

134
# All categories and scores
135
categories = result.categories
136
scores = result.category_scores
137

138
print("Category Analysis:")
139
print(f"  Hate: {scores.hate:.4f} (flagged: {categories.hate})")
140
print(f"  Hate/Threatening: {scores.hate_threatening:.4f} (flagged: {categories.hate_threatening})")
141
print(f"  Harassment: {scores.harassment:.4f} (flagged: {categories.harassment})")
142
print(f"  Harassment/Threatening: {scores.harassment_threatening:.4f} (flagged: {categories.harassment_threatening})")
143
print(f"  Self-Harm: {scores.self_harm:.4f} (flagged: {categories.self_harm})")
144
print(f"  Self-Harm/Intent: {scores.self_harm_intent:.4f} (flagged: {categories.self_harm_intent})")
145
print(f"  Self-Harm/Instructions: {scores.self_harm_instructions:.4f} (flagged: {categories.self_harm_instructions})")
146
print(f"  Sexual: {scores.sexual:.4f} (flagged: {categories.sexual})")
147
print(f"  Sexual/Minors: {scores.sexual_minors:.4f} (flagged: {categories.sexual_minors})")
148
print(f"  Violence: {scores.violence:.4f} (flagged: {categories.violence})")
149
print(f"  Violence/Graphic: {scores.violence_graphic:.4f} (flagged: {categories.violence_graphic})")
150

151
# Filter user content example
152
def is_safe_content(text: str) -> tuple[bool, list[str]]:
153
    """
154
    Check if content is safe to use.
155
    Returns (is_safe, violated_categories)
156
    """
157
    response = client.moderations.create(input=text)
158
    result = response.results[0]
159

160
    if not result.flagged:
161
        return True, []
162

163
    violated = [
164
        category for category, flagged in result.categories.model_dump().items()
165
        if flagged
166
    ]
167

168
    return False, violated
169

170
# Use in application
171
user_input = "Some user-generated content"
172
is_safe, violations = is_safe_content(user_input)
173

174
if is_safe:
175
    print("Content approved")
176
else:
177
    print(f"Content rejected. Violations: {', '.join(violations)}")
178
```
179

180
## Types
181

182
```python { .api }
183
from typing import Literal, Union
184
from typing_extensions import TypedDict
185
from pydantic import BaseModel
186

187
class ModerationCreateResponse(BaseModel):
188
    """Moderation response."""
189
    id: str
190
    model: str
191
    results: list[ModerationResult]
192

193
class ModerationResult(BaseModel):
194
    """Single moderation result."""
195
    flagged: bool
196
    categories: ModerationCategories
197
    category_scores: ModerationCategoryScores
198
    category_applied_input_types: ModerationCategoryAppliedInputTypes
199

200
class ModerationCategories(BaseModel):
201
    """Category flags (true if violated)."""
202
    hate: bool
203
    hate_threatening: bool
204
    harassment: bool
205
    harassment_threatening: bool
206
    self_harm: bool
207
    self_harm_intent: bool
208
    self_harm_instructions: bool
209
    sexual: bool
210
    sexual_minors: bool
211
    violence: bool
212
    violence_graphic: bool
213
    illicit: bool
214
    illicit_violent: bool
215

216
class ModerationCategoryScores(BaseModel):
217
    """Confidence scores (0-1) for each category."""
218
    hate: float
219
    hate_threatening: float
220
    harassment: float
221
    harassment_threatening: float
222
    self_harm: float
223
    self_harm_intent: float
224
    self_harm_instructions: float
225
    sexual: float
226
    sexual_minors: float
227
    violence: float
228
    violence_graphic: float
229
    illicit: float
230
    illicit_violent: float
231

232
class ModerationCategoryAppliedInputTypes(BaseModel):
233
    """Input types that triggered each category."""
234
    hate: list[str]
235
    hate_threatening: list[str]
236
    harassment: list[str]
237
    harassment_threatening: list[str]
238
    self_harm: list[str]
239
    self_harm_intent: list[str]
240
    self_harm_instructions: list[str]
241
    sexual: list[str]
242
    sexual_minors: list[str]
243
    violence: list[str]
244
    violence_graphic: list[str]
245
    illicit: list[str]
246
    illicit_violent: list[str]
247

248
# Model type
249
ModerationModel = Literal[
250
    "text-moderation-latest",
251
    "text-moderation-stable",
252
    "omni-moderation-latest",
253
    "omni-moderation-2024-09-26"
254
]
255

256
# Multi-modal input types
257
class ModerationTextInputParam(TypedDict):
258
    """Text input for moderation."""
259
    text: str  # Required: Text content to moderate
260
    type: Literal["text"]  # Required: Always "text"
261

262
class ImageURL(TypedDict):
263
    """Image URL or base64 data."""
264
    url: str  # Required: URL or data:image/...;base64,... string
265

266
class ModerationImageURLInputParam(TypedDict):
267
    """Image input for moderation."""
268
    image_url: ImageURL  # Required: Image URL or base64 data
269
    type: Literal["image_url"]  # Required: Always "image_url"
270

271
# Union type for multi-modal inputs
272
ModerationMultiModalInputParam = Union[
273
    ModerationTextInputParam,
274
    ModerationImageURLInputParam
275
]
276
```
277

278
## Category Descriptions
279

280
| Category | Description |
281
|----------|-------------|
282
| hate | Content expressing, inciting, or promoting hate based on protected characteristics |
283
| hate/threatening | Hateful content that also includes violence or serious harm |
284
| harassment | Content harassing, bullying, or abusing an individual |
285
| harassment/threatening | Harassing content that also includes violence or serious harm |
286
| self-harm | Content promoting, encouraging, or depicting acts of self-harm |
287
| self-harm/intent | Content indicating intent to engage in self-harm |
288
| self-harm/instructions | Content providing instructions or advice for self-harm |
289
| sexual | Content meant to arouse sexual excitement |
290
| sexual/minors | Sexual content involving individuals under 18 |
291
| violence | Content depicting death, violence, or physical injury |
292
| violence/graphic | Graphic violent content with extreme detail |
293
| illicit | Content promoting illicit substances or illegal activities |
294
| illicit/violent | Illicit content involving violence |
295
296
## Best Practices
297

298
```python
299
from openai import OpenAI
300

301
client = OpenAI()
302

303
# 1. Moderate user-generated content before processing
304
def moderate_before_processing(user_text: str):
305
    response = client.moderations.create(input=user_text)
306

307
    if response.results[0].flagged:
308
        return None, "Content violates policies"
309

310
    # Safe to process
311
    return process_safe_content(user_text), None
312

313
# 2. Batch moderation for efficiency
314
user_messages = ["msg1", "msg2", "msg3"]
315
response = client.moderations.create(input=user_messages)
316

317
safe_messages = [
318
    msg for msg, result in zip(user_messages, response.results)
319
    if not result.flagged
320
]
321

322
# 3. Log violations for analysis
323
for i, result in enumerate(response.results):
324
    if result.flagged:
325
        violated_categories = [
326
            cat for cat, flagged in result.categories.model_dump().items()
327
            if flagged
328
        ]
329
        log_violation(user_messages[i], violated_categories)
330

331
# 4. Use thresholds for borderline content
332
def is_definitely_safe(text: str, threshold: float = 0.5) -> bool:
333
    response = client.moderations.create(input=text)
334
    result = response.results[0]
335

336
    # Check if any score exceeds threshold
337
    scores = result.category_scores.model_dump()
338
    return all(score < threshold for score in scores.values())
339
```
340

341
## Async Usage
342

343
```python
344
import asyncio
345
from openai import AsyncOpenAI
346

347
async def moderate_content(text: str):
348
    client = AsyncOpenAI()
349

350
    response = await client.moderations.create(input=text)
351
    return response.results[0].flagged
352

353
# Run async
354
is_flagged = asyncio.run(moderate_content("Check this text"))
355
```
356

Version

Tile

Files

moderations.mddocs/

Version

Tile

Files

moderations.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

moderations.mddocs/