Virtual file system operations within the worker environment for managing files, data, and resources. The worker provides a complete file system interface for Python code execution.
Write files to the worker's virtual file system with support for text and binary data.
/**
* Write a file to the worker's filesystem
* @param path - File path in the worker filesystem (Unix-style paths)
* @param data - File data as string or binary data
* @param opts - Optional file writing options
* @returns Promise that resolves when file is written
*/
writeFile(path: string, data: string | ArrayBufferView, opts?: Record<string, unknown>): Promise<void>;Rename or move files within the worker's virtual file system.
/**
* Rename a file in the worker's filesystem
* @param oldPath - Current file path
* @param newPath - New file path
* @returns Promise that resolves when file is renamed
*/
renameFile(oldPath: string, newPath: string): Promise<void>;Delete files from the worker's virtual file system.
/**
* Delete a file from the worker's filesystem
* @param path - Path to the file to delete
* @returns Promise that resolves when file is deleted
*/
unlink(path: string): Promise<void>;Usage Examples:
import { WorkerProxy } from "@gradio/wasm";
const worker = new WorkerProxy(options);
worker.addEventListener("initialization-completed", async () => {
// Write text file
await worker.writeFile("/app/config.json", JSON.stringify({
model: "gpt-3.5-turbo",
temperature: 0.7,
max_tokens: 1000
}));
// Write binary file (e.g., image data)
const imageData = new Uint8Array([137, 80, 78, 71, 13, 10, 26, 10]); // PNG header
await worker.writeFile("/app/assets/image.png", imageData);
// Write CSV data
const csvData = "name,age,city\nAlice,25,New York\nBob,30,San Francisco";
await worker.writeFile("/app/data/users.csv", csvData, { encoding: "utf8" });
// Create directory structure by writing files
await worker.writeFile("/app/models/trained_model.pkl", "model_data_here");
await worker.writeFile("/app/logs/training.log", "Training started...\n");
// Rename files
await worker.renameFile("/app/config.json", "/app/config.backup.json");
await worker.renameFile("/app/models/trained_model.pkl", "/app/models/v1.0/model.pkl");
// Delete temporary files
await worker.unlink("/app/temp/processing.tmp");
await worker.unlink("/app/logs/old_training.log");
// Use files in Python code
await worker.runPythonCode(`
import json
import pandas as pd
import os
# Read configuration
with open('/app/config.backup.json', 'r') as f:
config = json.load(f)
print(f"Model: {config['model']}")
# Read CSV data
df = pd.read_csv('/app/data/users.csv')
print(f"Loaded {len(df)} users")
print(df.head())
# List directory contents
print("App directory contents:")
for root, dirs, files in os.walk('/app'):
for file in files:
print(os.path.join(root, file))
# Write processing results
results = {"processed_users": len(df), "timestamp": "2024-01-01"}
with open('/app/results.json', 'w') as f:
json.dump(results, f)
`);
});Files can be provided during worker initialization for immediate availability:
import { WorkerProxy } from "@gradio/wasm";
// Initialize worker with pre-loaded files
const worker = new WorkerProxy({
gradioWheelUrl: "https://example.com/gradio.whl",
gradioClientWheelUrl: "https://example.com/gradio_client.whl",
files: {
// Text file with inline data
"dataset.csv": {
data: "feature1,feature2,label\n1.0,2.0,A\n2.0,3.0,B\n3.0,4.0,C",
opts: { encoding: "utf8" }
},
// Binary file with inline data
"model_weights.bin": {
data: new Uint8Array([0x12, 0x34, 0x56, 0x78]),
opts: { mode: "wb" }
},
// File loaded from URL
"external_data.json": {
url: "https://api.example.com/data.json",
opts: { encoding: "utf8" }
},
// Configuration file
"app_config.yaml": {
data: `
model:
type: transformer
layers: 12
attention_heads: 8
dataset:
batch_size: 32
max_length: 512
`,
opts: { encoding: "utf8" }
}
},
requirements: ["pandas", "numpy", "pyyaml"],
sharedWorkerMode: false
});
worker.addEventListener("initialization-completed", async () => {
// Files are immediately available in Python
await worker.runPythonCode(`
import pandas as pd
import yaml
import os
# List all available files
print("Available files:")
for root, dirs, files in os.walk('/'):
for file in files:
full_path = os.path.join(root, file)
if not full_path.startswith('/dev') and not full_path.startswith('/proc'):
print(full_path)
# Load dataset
df = pd.read_csv('dataset.csv')
print(f"Dataset shape: {df.shape}")
# Load configuration
with open('app_config.yaml', 'r') as f:
config = yaml.safe_load(f)
print(f"Model type: {config['model']['type']}")
# Check binary file
with open('model_weights.bin', 'rb') as f:
weights = f.read()
print(f"Model weights size: {len(weights)} bytes")
`);
});Example of file-based data processing workflows:
import { WorkerProxy } from "@gradio/wasm";
class DataProcessingWorkflow {
private worker: WorkerProxy;
constructor(options: WorkerProxyOptions) {
this.worker = new WorkerProxy({
...options,
requirements: [...options.requirements, "pandas", "numpy", "matplotlib", "scikit-learn"]
});
this.worker.addEventListener("initialization-completed", () => {
this.setupWorkflow();
});
}
private async setupWorkflow() {
// Create directory structure
await this.createDirectoryStructure();
// Set up Python processing environment
await this.worker.runPythonCode(`
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import json
import pickle
import os
def setup_directories():
os.makedirs('/workflow/input', exist_ok=True)
os.makedirs('/workflow/processed', exist_ok=True)
os.makedirs('/workflow/models', exist_ok=True)
os.makedirs('/workflow/output', exist_ok=True)
print("Directory structure created")
setup_directories()
`);
}
private async createDirectoryStructure() {
// Create directories by writing placeholder files
await this.worker.writeFile("/workflow/input/.keep", "");
await this.worker.writeFile("/workflow/processed/.keep", "");
await this.worker.writeFile("/workflow/models/.keep", "");
await this.worker.writeFile("/workflow/output/.keep", "");
}
async processDataset(csvData: string, filename: string) {
// Write input data
await this.worker.writeFile(`/workflow/input/${filename}`, csvData);
console.log(`Processing dataset: ${filename}`);
// Process data in Python
await this.worker.runPythonCode(`
# Load and process dataset
df = pd.read_csv('/workflow/input/${filename}')
print(f"Loaded dataset with {len(df)} rows and {len(df.columns)} columns")
# Basic preprocessing
df_processed = df.dropna()
df_processed = df_processed.reset_index(drop=True)
# Save processed data
df_processed.to_csv('/workflow/processed/cleaned_${filename}', index=False)
print(f"Saved cleaned dataset with {len(df_processed)} rows")
# Generate summary statistics
summary = {
"original_rows": len(df),
"cleaned_rows": len(df_processed),
"columns": list(df_processed.columns),
"summary_stats": df_processed.describe().to_dict()
}
with open('/workflow/processed/summary_${filename.replace('.csv', '.json')}', 'w') as f:
json.dump(summary, f, indent=2)
print("Processing completed")
`);
}
async trainModel(datasetFilename: string, targetColumn: string) {
console.log(`Training model on ${datasetFilename} with target: ${targetColumn}`);
await this.worker.runPythonCode(`
# Load processed dataset
df = pd.read_csv('/workflow/processed/cleaned_${datasetFilename}')
# Prepare features and target
X = df.drop('${targetColumn}', axis=1)
y = df['${targetColumn}']
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Evaluate model
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Training accuracy: {train_score:.3f}")
print(f"Test accuracy: {test_score:.3f}")
# Save model
with open('/workflow/models/trained_model.pkl', 'wb') as f:
pickle.dump(model, f)
# Save model metadata
model_metadata = {
"model_type": "RandomForestClassifier",
"train_accuracy": train_score,
"test_accuracy": test_score,
"feature_names": list(X.columns),
"target_column": "${targetColumn}",
"dataset": "${datasetFilename}"
}
with open('/workflow/models/model_metadata.json', 'w') as f:
json.dump(model_metadata, f, indent=2)
print("Model training completed and saved")
`);
}
async generateReport() {
console.log("Generating processing report...");
await this.worker.runPythonCode(`
import json
import os
# Collect all processing results
report = {
"workflow_summary": {
"timestamp": pd.Timestamp.now().isoformat(),
"files_processed": []
}
}
# Scan processed files
for filename in os.listdir('/workflow/processed'):
if filename.endswith('.json'):
with open(f'/workflow/processed/{filename}', 'r') as f:
file_summary = json.load(f)
report["workflow_summary"]["files_processed"].append({
"filename": filename,
"summary": file_summary
})
# Add model information if available
if os.path.exists('/workflow/models/model_metadata.json'):
with open('/workflow/models/model_metadata.json', 'r') as f:
report["model_info"] = json.load(f)
# Save comprehensive report
with open('/workflow/output/processing_report.json', 'w') as f:
json.dump(report, f, indent=2)
print("Processing report generated")
print(f"Report location: /workflow/output/processing_report.json")
`);
}
async getResults(): Promise<any> {
// Read the final report
await this.worker.runPythonCode(`
with open('/workflow/output/processing_report.json', 'r') as f:
report_content = f.read()
print("REPORT_CONTENT_START")
print(report_content)
print("REPORT_CONTENT_END")
`);
// In a real implementation, you'd capture the stdout and parse the report
return { message: "Check worker stdout for report content" };
}
async cleanup() {
// Clean up temporary files
await this.worker.unlink("/workflow/input/.keep");
await this.worker.unlink("/workflow/processed/.keep");
await this.worker.unlink("/workflow/models/.keep");
await this.worker.unlink("/workflow/output/.keep");
this.worker.terminate();
}
}
// Usage
const workflow = new DataProcessingWorkflow({
gradioWheelUrl: "https://example.com/gradio.whl",
gradioClientWheelUrl: "https://example.com/gradio_client.whl",
files: {},
requirements: [],
sharedWorkerMode: false
});
// Process data
const csvData = "feature1,feature2,target\n1,2,A\n2,3,B\n3,4,A\n4,5,B";
await workflow.processDataset(csvData, "sample_data.csv");
await workflow.trainModel("sample_data.csv", "target");
await workflow.generateReport();
const results = await workflow.getResults();
console.log("Workflow completed:", results);Important considerations when working with the virtual file system:
// Best practices for file operations
class FileSystemManager {
private worker: WorkerProxy;
constructor(worker: WorkerProxy) {
this.worker = worker;
}
async safeWriteFile(path: string, data: string | ArrayBufferView, retries = 3) {
for (let i = 0; i < retries; i++) {
try {
await this.worker.writeFile(path, data);
return;
} catch (error) {
console.warn(`Write attempt ${i + 1} failed for ${path}:`, error);
if (i === retries - 1) throw error;
await new Promise(resolve => setTimeout(resolve, 100 * (i + 1)));
}
}
}
async ensureDirectoryExists(dirPath: string) {
// Create directory by writing a temporary file
const keepFile = `${dirPath}/.keep`;
await this.safeWriteFile(keepFile, "");
// Optionally remove the .keep file
// await this.worker.unlink(keepFile);
}
async batchWriteFiles(files: Record<string, string | ArrayBufferView>) {
const promises = Object.entries(files).map(([path, data]) =>
this.safeWriteFile(path, data)
);
await Promise.all(promises);
console.log(`Successfully wrote ${Object.keys(files).length} files`);
}
}