Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7c4dbde
[AIC-2664] Impl trackers (first pass)
mattrmc1 Jun 22, 2026
a0c8784
fix: default tracker version to 1 and remove version clamp from token…
mattrmc1 Jun 23, 2026
1a7e1f6
feat: add Runner, RunnerResult, Judge, and Evaluator
mattrmc1 Jun 23, 2026
bed4ca2
guard against null AIMetrics
mattrmc1 Jun 23, 2026
2b47c86
fix: guard against blank metricKey and infinite/invalid score
mattrmc1 Jun 23, 2026
4ef3de2
fix: MAX_TOKEN_BYTES -> MAX_TOKEN_LENGTH
mattrmc1 Jun 23, 2026
1be0a1e
fix: guard against empty runId and configKey
mattrmc1 Jun 23, 2026
8e81ea0
fix: Add warning comment to createTracker public call
mattrmc1 Jun 23, 2026
e81e2f5
fix: use trim + isEmpty to support java 8
mattrmc1 Jun 23, 2026
c21fdd7
fix: stop trackMetricsOf clock before running metrics extractor
mattrmc1 Jun 23, 2026
4c96dca
fix: record operation duration when trackMetricsOf extractor throws
mattrmc1 Jun 23, 2026
4da5478
fix: downgrade null-arg track logs from warn to debug per spec
mattrmc1 Jun 23, 2026
a94b2bf
Merge branch 'mmccarthy/AIC-2664/ai-config-tracker-overhaul' of githu…
mattrmc1 Jun 23, 2026
394a044
fix: remove unnecessary NoOpAIConfigTracker
mattrmc1 Jun 24, 2026
6c80aed
Merge branch 'mmccarthy/AIC-2664/ai-config-tracker-overhaul' of githu…
mattrmc1 Jun 24, 2026
5381bf4
fix: remove resumption-token length cap
mattrmc1 Jun 24, 2026
1355033
Merge branch 'mmccarthy/AIC-2664/ai-config-tracker-overhaul' of githu…
mattrmc1 Jun 24, 2026
add48f9
fix: guard against NaN scores
mattrmc1 Jun 24, 2026
1bd6777
fix: defensively copy judges map in Evaluator constructor
mattrmc1 Jun 24, 2026
9a8143e
fix: use Java 8-compatible map/list construction in Judge
mattrmc1 Jun 24, 2026
3aa5d08
fix: Add security note to LDAIConfigTracker.getResumptionToken()
mattrmc1 Jun 24, 2026
121b140
fix: Add security note to MetricSummary.getResumptionToken()
mattrmc1 Jun 24, 2026
faa4981
Merge branch 'mmccarthy/AIC-2664/ai-config-tracker-overhaul' of githu…
mattrmc1 Jun 24, 2026
f42de0b
fix: remove reasoning from Judge schema required fields
mattrmc1 Jun 24, 2026
59835e3
Merge branch 'main' of github.com:launchdarkly/java-core into mmccart…
mattrmc1 Jun 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ public final class AIAgentConfig extends AIConfig {
String instructions,
JudgeConfiguration judgeConfiguration,
Map<String, Tool> tools,
Supplier<LDAIConfigTracker> trackerFactory) {
super(key, enabled, Mode.AGENT, model, provider, trackerFactory);
Supplier<LDAIConfigTracker> trackerFactory,
Evaluator evaluator) {
super(key, enabled, Mode.AGENT, model, provider, trackerFactory, evaluator);
this.instructions = instructions;
this.judgeConfiguration = judgeConfiguration;
this.tools = tools == null ? null : Collections.unmodifiableMap(tools);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ public final class AICompletionConfig extends AIConfig {
List<Message> messages,
JudgeConfiguration judgeConfiguration,
Map<String, Tool> tools,
Supplier<LDAIConfigTracker> trackerFactory) {
super(key, enabled, Mode.COMPLETION, model, provider, trackerFactory);
Supplier<LDAIConfigTracker> trackerFactory,
Evaluator evaluator) {
super(key, enabled, Mode.COMPLETION, model, provider, trackerFactory, evaluator);
this.messages = messages == null ? null : Collections.unmodifiableList(messages);
this.judgeConfiguration = judgeConfiguration;
this.tools = tools == null ? null : Collections.unmodifiableMap(tools);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,23 @@ public abstract class AIConfig {
private final Model model;
private final Provider provider;
private final Supplier<LDAIConfigTracker> trackerFactory;
private final Evaluator evaluator;

AIConfig(
String key,
boolean enabled,
Mode mode,
Model model,
Provider provider,
Supplier<LDAIConfigTracker> trackerFactory) {
Supplier<LDAIConfigTracker> trackerFactory,
Evaluator evaluator) {
this.key = key;
this.enabled = enabled;
this.mode = mode;
this.model = model;
this.provider = provider;
this.trackerFactory = Objects.requireNonNull(trackerFactory, "trackerFactory");
this.evaluator = Objects.requireNonNull(evaluator, "evaluator");
}

/**
Expand Down Expand Up @@ -102,4 +105,17 @@ public Provider getProvider() {
public LDAIConfigTracker createTracker() {
return trackerFactory.get();
}

/**
* Returns the evaluator that coordinates judge execution for this configuration.
* <p>
* For {@link AIJudgeConfig} this is always {@link Evaluator#noop()}. For
* {@link AICompletionConfig} and {@link AIAgentConfig} it is the evaluator supplied at
* construction time (also {@link Evaluator#noop()} unless a custom one is wired in).
*
* @return the evaluator, never {@code null}
*/
public Evaluator getEvaluator() {
return evaluator;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public final class AIJudgeConfig extends AIConfig {
List<Message> messages,
String evaluationMetricKey,
Supplier<LDAIConfigTracker> trackerFactory) {
super(key, enabled, Mode.JUDGE, model, provider, trackerFactory);
super(key, enabled, Mode.JUDGE, model, provider, trackerFactory, Evaluator.noop());
this.messages = messages == null ? null : Collections.unmodifiableList(messages);
this.evaluationMetricKey = evaluationMetricKey;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package com.launchdarkly.sdk.server.ai;

import com.launchdarkly.logging.LDLogger;
import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.JudgeConfiguration;
import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;

/**
* Coordinates evaluation of an AI Config output by running a set of {@link Judge} instances.
* <p>
* An {@code Evaluator} is attached to an {@link AICompletionConfig} or {@link AIAgentConfig} and
* invoked by managed AI types (plan 4). In v1.0, the evaluator returned by the config retrieval
* methods is always a noop that returns an empty list immediately.
* <p>
* Instances are immutable and thread-safe.
*/
public final class Evaluator {
private static final Evaluator NOOP = new Evaluator();

private final Map<String, Judge> judges;
private final JudgeConfiguration judgeConfiguration;
private final LDLogger logger;
private final boolean isNoop;

private Evaluator() {
this.judges = Collections.emptyMap();
this.judgeConfiguration = null;
this.logger = null;
this.isNoop = true;
}

/**
* Constructs an evaluator with the given judges and configuration.
*
* @param judges a map from judge config key to {@link Judge} instance; must not be {@code null}
* @param judgeConfiguration the judge configuration listing which judges to run and their sampling
* rates; must not be {@code null}
* @param logger the logger; must not be {@code null}
*/
public Evaluator(Map<String, Judge> judges, JudgeConfiguration judgeConfiguration, LDLogger logger) {
this.judges = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(judges, "judges")));
this.judgeConfiguration = Objects.requireNonNull(judgeConfiguration, "judgeConfiguration");
this.logger = Objects.requireNonNull(logger, "logger");
this.isNoop = false;
Comment thread
cursor[bot] marked this conversation as resolved.
}

/**
* Returns the shared noop evaluator, which immediately returns an empty result list without
* logging any warnings.
*
* @return the noop singleton, never {@code null}
*/
public static Evaluator noop() {
return NOOP;
}

/**
* Runs all configured judges against the given input/output pair and returns their results.
* <p>
* When this is the noop evaluator, returns a completed future holding an empty list immediately.
* Otherwise, judges are run sequentially in the order specified by the {@link JudgeConfiguration}.
* Judges referenced in the configuration but absent from the judges map are skipped with a
* warning; this is not an error.
* <p>
* This method does NOT call {@code trackJudgeResult} — that is the caller's responsibility.
*
* @param input the message history or prompt that was sent to the model
* @param output the model's response to evaluate
* @return a completed future holding the list of judge results; never {@code null}
*/
public CompletableFuture<List<JudgeResult>> evaluate(String input, String output) {
if (isNoop) {
return CompletableFuture.completedFuture(Collections.emptyList());
}

List<JudgeResult> results = new ArrayList<>();
for (JudgeConfiguration.Judge entry : judgeConfiguration.getJudges()) {
Judge judge = judges.get(entry.getKey());
if (judge == null) {
logger.warn("Evaluator: no judge found for key '{}', skipping", entry.getKey());
continue;
}
results.add(judge.evaluate(input, output, entry.getSamplingRate()));
}
return CompletableFuture.completedFuture(results);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
package com.launchdarkly.sdk.server.ai;

import com.launchdarkly.logging.LDLogger;
import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Message;
import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.Collectors;

/**
* Evaluates an AI model output against a judge prompt, returning a scored {@link JudgeResult}.
* <p>
* A {@code Judge} wraps an {@link AIJudgeConfig} and a {@link Runner}. Each call to
* {@link #evaluate} or {@link #evaluateMessages} invokes the runner with a formatted evaluation
* prompt and parses the structured {@code {score, reasoning}} response. Evaluation can be sampled
* to reduce cost: pass a {@code samplingRate} of {@code 0.0} to always skip, or {@code 1.0} to
* always run.
* <p>
* Instances are immutable and thread-safe.
*/
public final class Judge {
/**
* JSON-Schema fragment sent to the runner as the {@code outputType}, requesting structured
* {@code {score, reasoning}} output.
*/
private static final Map<String, Object> EVALUATION_SCHEMA;
static {
Map<String, Object> scoreSchema = new HashMap<>();
scoreSchema.put("type", "number");

Map<String, Object> reasoningSchema = new HashMap<>();
reasoningSchema.put("type", "string");

Map<String, Object> properties = new HashMap<>();
properties.put("score", Collections.unmodifiableMap(scoreSchema));
properties.put("reasoning", Collections.unmodifiableMap(reasoningSchema));

Map<String, Object> schema = new HashMap<>();
schema.put("type", "object");
schema.put("properties", Collections.unmodifiableMap(properties));
schema.put("required", Collections.singletonList("score"));

EVALUATION_SCHEMA = Collections.unmodifiableMap(schema);
}

private final AIJudgeConfig config;
private final Runner runner;
private final LDLogger logger;

/**
* Constructs a judge.
*
* @param config the judge AI Config; must not be {@code null}
* @param runner the runner to invoke; must not be {@code null}
* @param logger the logger; must not be {@code null}
*/
public Judge(AIJudgeConfig config, Runner runner, LDLogger logger) {
this.config = Objects.requireNonNull(config, "config");
this.runner = Objects.requireNonNull(runner, "runner");
this.logger = Objects.requireNonNull(logger, "logger");
}

/**
* Evaluates the given input/output pair, always running (sampling rate {@code 1.0}).
*
* @param input the message history or prompt that was sent to the model
* @param output the model's response to evaluate
* @return the evaluation result; never {@code null}
*/
public JudgeResult evaluate(String input, String output) {
return evaluate(input, output, 1.0);
}

/**
* Evaluates the given input/output pair, subject to the given sampling rate.
*
* @param input the message history or prompt that was sent to the model
* @param output the model's response to evaluate
* @param samplingRate the fraction of evaluations to actually run; {@code 0.0} always skips,
* {@code 1.0} always runs
* @return the evaluation result; never {@code null}
*/
public JudgeResult evaluate(String input, String output, double samplingRate) {
if (ThreadLocalRandom.current().nextDouble() >= samplingRate) {
return JudgeResult.builder()
.sampled(false)
.success(false)
.judgeConfigKey(config.getKey())
.metricKey(config.getEvaluationMetricKey())
.build();
}

String formatted = "MESSAGE HISTORY:\n" + input + "\n\nRESPONSE TO EVALUATE:\n" + output;
LDAIConfigTracker tracker = config.createTracker();

RunnerResult result;
try {
result = tracker.trackMetricsOf(RunnerResult::getMetrics, () -> runner.run(formatted, EVALUATION_SCHEMA));
} catch (Exception ex) {
return JudgeResult.builder()
.sampled(true)
.success(false)
.judgeConfigKey(config.getKey())
.metricKey(config.getEvaluationMetricKey())
.errorMessage(ex.getMessage())
.build();
}

Map<String, Object> parsed = result.getParsed();
if (parsed == null) {
logger.warn("Judge {}: runner returned null parsed output", config.getKey());
return JudgeResult.builder()
.sampled(true)
.success(false)
.judgeConfigKey(config.getKey())
.metricKey(config.getEvaluationMetricKey())
.build();
}

Object scoreRaw = parsed.get("score");
if (!(scoreRaw instanceof Number)) {
logger.warn("Judge {}: parsed output missing numeric score", config.getKey());
return JudgeResult.builder()
.sampled(true)
.success(false)
.judgeConfigKey(config.getKey())
.metricKey(config.getEvaluationMetricKey())
.build();
}
double score = ((Number) scoreRaw).doubleValue();
if (!Double.isFinite(score) || score < 0.0 || score > 1.0) {
logger.warn("Judge {}: score {} is outside [0.0, 1.0]", config.getKey(), score);
return JudgeResult.builder()
.sampled(true)
.success(false)
.judgeConfigKey(config.getKey())
.metricKey(config.getEvaluationMetricKey())
.build();
}

JudgeResult.Builder resultBuilder = JudgeResult.builder()
.sampled(true)
.success(true)
.judgeConfigKey(config.getKey())
.metricKey(config.getEvaluationMetricKey())
.score(score);
Comment thread
cursor[bot] marked this conversation as resolved.

Object reasoningRaw = parsed.get("reasoning");
if (reasoningRaw instanceof String) {
resultBuilder.reasoning((String) reasoningRaw);
} else if (reasoningRaw != null) {
logger.warn("Judge {}: reasoning is not a string, ignoring", config.getKey());
}

return resultBuilder.build();
}

/**
* Evaluates a message list and runner response, always running (sampling rate {@code 1.0}).
* <p>
* Messages are formatted as {@code role: content} lines, joined by newlines.
*
* @param messages the messages that were sent to the model
* @param response the runner result whose {@link RunnerResult#getContent() content} is evaluated
* @return the evaluation result; never {@code null}
*/
public JudgeResult evaluateMessages(List<Message> messages, RunnerResult response) {
return evaluateMessages(messages, response, 1.0);
}

/**
* Evaluates a message list and runner response, subject to the given sampling rate.
* <p>
* Messages are formatted as {@code role: content} lines, joined by newlines.
*
* @param messages the messages that were sent to the model
* @param response the runner result whose {@link RunnerResult#getContent() content} is evaluated
* @param samplingRate the fraction of evaluations to actually run
* @return the evaluation result; never {@code null}
*/
public JudgeResult evaluateMessages(List<Message> messages, RunnerResult response, double samplingRate) {
String formattedMessages = messages == null ? "" : messages.stream()
.map(m -> m.getRole().getWireValue() + ": " + m.getContent())
.collect(Collectors.joining("\n"));
return evaluate(formattedMessages, response == null ? "" : response.getContent(), samplingRate);
}

/**
* Returns the judge AI Config this instance was constructed with.
*
* @return the judge config, never {@code null}
*/
public AIJudgeConfig getConfig() {
return config;
}

/**
* Returns the runner this instance was constructed with.
*
* @return the runner, never {@code null}
*/
public Runner getRunner() {
return runner;
}
}
Loading
Loading