Evaluation
A type that defines an evaluation.
Declaration
protocol Evaluation : SendableMentioned in
Overview
Implement this protocol to create custom evaluations. The evaluation runs your system under test against a dataset and applies evaluators to measure performance.
struct MyEvaluation: Evaluation {
let metric = Metric("Match")
let dataset = ArrayLoader(samples: [
ModelSample(prompt: "One plus one is...", expected: "Two.")
])
func subject(from sample: ModelSample<String>) async throws -> ModelSubject<String> {
ModelSubject(value: "Two.")
}
var evaluators: Evaluators {
Evaluator { sample, subject in
let metric = Metric("Match")
guard let expected = sample.expected else { return metric.ignore() }
return subject.value == expected ? metric.passing() : metric.failing()
}
}
func aggregateMetrics(using aggregator: inout MetricsAggregator) {
aggregator.computeMean(of: metric)
}
}