Agent skills for iOS, iPadOS, Swift, SwiftUI, and modern Apple framework development.
71
89%
Does it follow best practices?
Impact
—
No eval scenarios have been run
Advisory
Suggest reviewing before use
Complete reference for optimizing Core ML models: quantization, palettization, pruning, performance tuning, and profiling.
| Technique | Size Reduction | Accuracy Impact | Best Compute Unit | Min OS |
|---|---|---|---|---|
| INT8 per-channel | ~4x | Low | CPU/GPU | iOS 16 |
| INT4 per-block | ~8x | Medium | GPU | iOS 18 |
| Palettization 4-bit | ~8x | Low-Medium | Neural Engine | iOS 16 |
| Palettization 2-bit | ~16x | Medium-High | Neural Engine | iOS 16 |
| W8A8 (weights+activations) | ~4x | Low | ANE (A17 Pro/M4+) | iOS 17 |
| Pruning 50% | ~2x | Low | CPU/ANE | iOS 16 |
| Pruning 75% | ~4x | Medium | CPU/ANE | iOS 16 |
import coremltools as ct
import coremltools.optimize as cto
model = ct.models.MLModel("model.mlpackage")
op_config = cto.coreml.OpLinearQuantizerConfig(
mode="linear_symmetric", # or "linear" (asymmetric with zero-point)
weight_threshold=512, # only quantize tensors with > N elements
)
config = cto.coreml.OptimizationConfig(global_config=op_config)
compressed = cto.coreml.linear_quantize_weights(model, config=config)
compressed.save("model_int8.mlpackage")import coremltools.optimize as cto
config = cto.torch.quantization.PostTrainingQuantizerConfig.from_dict({
"global_config": {
"weight_dtype": "int4",
"granularity": "per_block",
"block_size": 128,
}
})
quantizer = cto.torch.quantization.PostTrainingQuantizer(model, config)
quantized_model = quantizer.compress()config = cto.torch.layerwise_compression.LayerwiseCompressorConfig.from_dict({
"global_config": {
"algorithm": "gptq",
"weight_dtype": 4,
"granularity": "per_block",
"block_size": 128,
},
"calibration_nsamples": 16,
})
compressor = cto.torch.layerwise_compression.LayerwiseCompressor(model, config)
compressed_model = compressor.compress(calibration_dataloader)Especially effective on the Neural Engine. 4-bit palettization typically preserves accuracy better than 4-bit linear quantization.
op_config = cto.coreml.OpPalettizerConfig(
mode="kmeans", # "kmeans" or "uniform"
nbits=4, # {1, 2, 3, 4, 6, 8}
granularity="per_grouped_channel", # iOS 18+ for grouped
group_size=16,
)
config = cto.coreml.OptimizationConfig(global_config=op_config)
palettized = cto.coreml.palettize_weights(model, config=config)| Bits | Unique Values | Size Reduction | Typical Quality |
|---|---|---|---|
| 8 | 256 | ~2x | Excellent |
| 6 | 64 | ~2.7x | Very good |
| 4 | 16 | ~8x | Good |
| 3 | 8 | ~10.7x | Moderate |
| 2 | 4 | ~16x | Fair |
| 1 | 2 | ~32x | Poor (binary) |
config = cto.coreml.OptimizationConfig(
global_config=cto.coreml.OpMagnitudePrunerConfig(
target_sparsity=0.75,
weight_threshold=2048,
)
)
pruned = cto.coreml.prune_weights(model, config=config)config = cto.coreml.OptimizationConfig(
global_config=cto.coreml.OpThresholdPrunerConfig(
threshold=1e-12,
minimum_sparsity_percentile=0.5,
)
)
pruned = cto.coreml.prune_weights(model, config=config)Apply multiple compression techniques in sequence:
# Palettize first, then prune on top
palettized = cto.coreml.palettize_weights(model, pal_config)
final = cto.coreml.prune_weights(
palettized, prune_config, joint_compression=True
)Fine-grained control over which operations get compressed:
config = cto.coreml.OptimizationConfig(
global_config=global_op_config,
op_type_configs={
"linear": linear_config,
"conv": conv_config,
},
op_name_configs={
"embedding_layer": None, # None = skip compression
},
)Train with quantization in the loop for best accuracy:
from coremltools.optimize.torch.quantization import (
LinearQuantizer, LinearQuantizerConfig, ModuleLinearQuantizerConfig
)
config = LinearQuantizerConfig(
global_config=ModuleLinearQuantizerConfig(
quantization_scheme="symmetric",
milestones=[0, 1000, 1000, 0],
)
)
quantizer = LinearQuantizer(model, config)
quantizer.prepare(example_inputs=[1, 3, 224, 224], inplace=True)
# Training loop
for inputs, labels in data:
output = model(inputs)
loss = loss_fn(output, labels)
loss.backward()
optimizer.step()
quantizer.step()
model = quantizer.finalize(inplace=True)// From Xcode-compiled model (auto-generated class)
let model = try MyImageClassifier(configuration: MLModelConfiguration())
// From URL at runtime
let config = MLModelConfiguration()
config.computeUnits = .all
let model = try MLModel(contentsOf: modelURL, configuration: config)
// From pre-compiled model (.mlmodelc) for faster loading
let compiledURL = try MLModel.compileModel(at: sourceModelURL)
let model = try MLModel(contentsOf: compiledURL)let config = MLModelConfiguration()
config.computeUnits = .all
config.allowLowPrecisionAccumulationOnGPU = true
// config.functionName = "adapter_1" // For multifunction models (iOS 18+)let input = MyModelInput(image: pixelBuffer)
let output = try model.prediction(input: input)
let label = output.classLabellet output = try await model.prediction(input: input)Thread-safe, supports Task cancellation, integrates with Swift concurrency. ~60% faster than synchronous for batch workloads.
let batchInputs: [MyModelInput] = images.map { MyModelInput(image: $0) }
let batchOutputs = try model.predictions(inputs: batchInputs)let features = try MLDictionaryFeatureProvider(dictionary: [
"input": MLFeatureValue(pixelBuffer: pixelBuffer),
"threshold": MLFeatureValue(double: 0.5),
])
let output = try model.prediction(from: features)import Vision
import CoreML
let vnModel = try VNCoreMLModel(for: MyDetector().model)
let request = VNCoreMLRequest(model: vnModel) { request, error in
guard let results = request.results as? [VNClassificationObservation] else { return }
let topResult = results.first
print("\(topResult?.identifier ?? ""): \(topResult?.confidence ?? 0)")
}
let handler = VNImageRequestHandler(cgImage: image)
try handler.perform([request])import NaturalLanguage
let nlModel = try NLModel(mlModel: SentimentClassifier().model)
let sentiment = nlModel.predictedLabel(for: "Great product!")Swift type for multidimensional array operations:
import CoreML
let tensor = MLTensor([1.0, 2.0, 3.0, 4.0])
let reshaped = tensor.reshaped(to: [2, 2])
let result = tensor.softmax()
let matmulResult = tensorA.matmul(tensorB).mlmodelc for instant loading after first
compilationMLModel.compileModel(at:)bisect_model() for very large models that are slow to loadMLComputePlan (iOS 17+) for programmatic profilingmodel = ct.models.MLModel("model.mlpackage",
optimization_hints={
"reshapeFrequency": ct.ReshapeFrequency.Infrequent
}).all is correct for
production. .cpuOnly is for debugging only.skills
accessorysetupkit
references
activitykit
references
adattributionkit
references
alarmkit
references
app-clips
app-intents
references
app-store-optimization
app-store-review
apple-on-device-ai
appmigrationkit
references
audioaccessorykit
references
authentication
references
avkit
references
background-processing
references
browserenginekit
references
callkit
references
carplay
references
cloudkit
references
contacts-framework
references
core-bluetooth
references
core-data
core-motion
references
core-nfc
references
coreml
references
cryptokit
references
cryptotokenkit
references
debugging-instruments
device-integrity
references
dockkit
references
energykit
references
eventkit
references
financekit
references
focus-engine
gamekit
references
healthkit
references
homekit
references
ios-accessibility
ios-localization
ios-networking
ios-simulator
references
mapkit
metrickit
references
musickit
references
natural-language
references
paperkit
references
passkit
references
pdfkit
references
pencilkit
references
permissionkit
references
photokit
push-notifications
realitykit
references
relevancekit
references
scenekit
references
sensorkit
references
speech-recognition
spritekit
references
storekit
swift-api-design-guidelines
swift-architecture
swift-charts
references
swift-codable
swift-concurrency
swift-formatstyle
swift-language
swift-security
references
swift-testing
swiftdata
swiftlint
swiftui-animation
swiftui-gestures
references
swiftui-layout-components
swiftui-liquid-glass
references
swiftui-patterns
swiftui-performance
swiftui-uikit-interop
swiftui-webkit
tabletopkit
references
tipkit
references
vision-framework
weatherkit
references
widgetkit
references