ClassificationOverride.java
package org.egothor.methodatlas;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.egothor.methodatlas.ai.AiClassSuggestion;
import org.egothor.methodatlas.ai.AiMethodSuggestion;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
/**
* Applies human-authored classification overrides to AI-generated (or absent)
* security classification results.
*
* <h2>Purpose</h2>
*
* <p>
* AI classification is a best-effort semantic analysis. In practice, a provider
* may under-classify a method that is clearly security-relevant, over-classify a
* utility method, assign incorrect tags, or produce a rationale that does not
* meet audit requirements. {@code ClassificationOverride} allows a team or
* individual to record corrections in a persistent YAML file so that re-running
* MethodAtlas does not lose those decisions.
* </p>
*
* <p>
* Overrides are also the only mechanism for adding AI-style enrichment columns
* to methods that are not going through live AI or the manual workflow — for
* example, when running in static inventory mode with a trusted set of
* hand-reviewed classifications.
* </p>
*
* <h2>Override File Format</h2>
*
* <p>
* The override file is a YAML document with a top-level {@code overrides} list.
* Each entry targets either a single method (when {@code method} is present) or
* every method in a class (when {@code method} is absent). Only the fields you
* specify are overridden; unspecified fields retain their AI-derived or
* default values.
* </p>
*
* <pre>
* overrides:
*
* # Correct a false positive: AI classified this as security-relevant but it is not.
* - fqcn: com.acme.util.DateFormatterTest
* method: format_returnsIso8601
* securityRelevant: false
* reason: "Date formatting only — no security property tested"
* note: "Reviewed 2026-04-24 by alice"
*
* # Correct a false negative: AI missed this security-critical test.
* - fqcn: com.acme.crypto.AesGcmTest
* method: roundTrip_encryptDecrypt
* securityRelevant: true
* tags: [security, crypto]
* displayName: "SECURITY: crypto — AES-GCM round-trip"
* reason: "Verifies ciphertext integrity under AES-GCM — critical crypto test"
* note: "Confirmed by security team 2026-04-20"
*
* # Classify all methods in a class (no 'method' field = class-level override).
* - fqcn: com.acme.auth.Oauth2FlowTest
* securityRelevant: true
* tags: [security, auth]
* note: "Entire class covers OAuth 2.0 flow — AI taxonomy too narrow"
* </pre>
*
* <h2>Field Reference</h2>
*
* <ul>
* <li>{@code fqcn} — fully qualified class name; required; must match the
* {@code fqcn} column in MethodAtlas output</li>
* <li>{@code method} — method name; optional; when absent the override applies
* to all methods in the class; method-level overrides take precedence over
* class-level overrides for the same class</li>
* <li>{@code securityRelevant} — {@code true} or {@code false}; optional; when
* absent the AI decision (or default {@code false}) is kept</li>
* <li>{@code tags} — YAML list of security taxonomy tags; optional; when absent
* the AI tags (or an empty list) are kept</li>
* <li>{@code displayName} — suggested {@code @DisplayName} value; optional;
* when absent the AI-suggested name (or {@code null}) is kept</li>
* <li>{@code reason} — human-readable rationale for the classification;
* optional; when absent the AI rationale (or {@code null}) is kept</li>
* <li>{@code note} — free-text annotation for human use only; never appears in
* any MethodAtlas output; useful for recording reviewer identity, date, and
* decision context</li>
* </ul>
*
* <h2>Confidence Behaviour</h2>
*
* <p>
* When any override field is applied to a method, the output confidence value
* is set to {@code 1.0} if the method is classified as security-relevant, or
* {@code 0.0} otherwise. This reflects the fact that a human review provides
* higher certainty than any AI score and ensures that confidence-based filters
* (such as {@code --min-confidence}) do not suppress human-verified results.
* </p>
*
* <h2>Integration Points</h2>
*
* <p>
* {@code ClassificationOverride} works in all MethodAtlas operating modes:
* </p>
*
* <ul>
* <li><b>Live AI mode</b> ({@code -ai}) — AI result is obtained first;
* overrides are applied on top.</li>
* <li><b>Manual AI workflow</b> ({@code -manual-consume}) — operator-filled
* responses are loaded first; overrides are applied on top.</li>
* <li><b>Static mode</b> (no {@code -ai}) — no AI result exists; any override
* that marks a method as security-relevant synthesizes a full
* {@link AiMethodSuggestion} from the override fields alone.</li>
* </ul>
*
* <h2>Unknown Methods</h2>
*
* <p>
* Override entries that reference a method name not found in the parsed source
* are silently ignored. This means old entries remain harmless after methods are
* renamed or deleted, and the file does not need to be pruned after refactoring.
* </p>
*
* @see AiClassSuggestion
* @see AiMethodSuggestion
*/
public final class ClassificationOverride {
private static final Logger LOG = Logger.getLogger(ClassificationOverride.class.getName());
/**
* Singleton instance used when no override file is configured. All calls to
* {@link #apply} return the original suggestion unchanged.
*/
private static final ClassificationOverride EMPTY = new ClassificationOverride(Map.of());
/**
* Override entries grouped by fully qualified class name for O(1) lookup.
*/
private final Map<String, List<Entry>> byClass;
private ClassificationOverride(Map<String, List<Entry>> byClass) {
this.byClass = byClass;
}
/**
* Returns an empty override set that leaves all classifications unchanged.
*
* <p>
* Use this when no override file is configured.
* </p>
*
* @return shared empty instance
*/
public static ClassificationOverride empty() {
return EMPTY;
}
/**
* Loads an override file from the given path.
*
* <p>
* The file must be a YAML document with a top-level {@code overrides} list.
* See the class Javadoc for the expected structure. Unknown YAML fields are
* silently ignored, so the file can carry additional human-readable metadata
* beyond the recognized fields without causing parse errors.
* </p>
*
* @param path path to the YAML override file
* @return loaded override set; never {@code null}
* @throws IOException if the file cannot be read or contains invalid YAML
*/
@SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
public static ClassificationOverride load(Path path) throws IOException {
ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
OverrideFile file = mapper.readValue(path.toFile(), OverrideFile.class);
if (file.overrides == null || file.overrides.isEmpty()) {
return EMPTY;
}
Map<String, List<Entry>> byClass = new HashMap<>();
for (EntryDto dto : file.overrides) {
if (dto.fqcn == null || dto.fqcn.isBlank()) {
if (LOG.isLoggable(Level.WARNING)) {
LOG.warning("Override entry without fqcn skipped");
}
continue;
}
byClass.computeIfAbsent(dto.fqcn, k -> new ArrayList<>())
.add(new Entry(dto.fqcn, dto.method, dto.securityRelevant,
dto.tags, dto.displayName, dto.reason, dto.note));
}
return new ClassificationOverride(byClass);
}
/**
* Returns {@code true} if at least one override entry targets the given class.
*
* <p>
* This can be used to decide whether {@link #apply} should be called even
* when no AI suggestion was produced (e.g. in static mode), avoiding
* unnecessary processing for classes that have no overrides.
* </p>
*
* @param fqcn fully qualified class name to check
* @return {@code true} if overrides exist for {@code fqcn}
*/
public boolean hasOverridesFor(String fqcn) {
return byClass.containsKey(fqcn);
}
/**
* Applies override entries to an existing AI classification result.
*
* <p>
* The {@code methodNames} list must contain the canonical method names as
* discovered by the MethodAtlas parser. It drives the set of methods for
* which output records are produced; override entries targeting method names
* absent from this list are silently skipped.
* </p>
*
* <p>
* When {@code suggestion} is {@code null} and no overrides target
* {@code fqcn}, this method returns {@code null} unchanged so that the
* absence of AI data is preserved correctly in the output.
* </p>
*
* <p>
* When {@code suggestion} is {@code null} but at least one override targets
* {@code fqcn}, a synthetic {@link AiClassSuggestion} is constructed from
* the override fields. Methods not targeted by any override will have
* {@code securityRelevant=false} and empty tag/reason fields in the
* synthesized result.
* </p>
*
* @param fqcn fully qualified class name of the class being processed
* @param suggestion AI classification result to modify; may be {@code null}
* @param methodNames names of all test methods found by the parser in this
* class, in discovery order
* @return modified or synthesized classification; {@code null} only when both
* {@code suggestion} is {@code null} and no overrides target
* {@code fqcn}
*/
@SuppressWarnings("PMD.NPathComplexity")
public AiClassSuggestion apply(String fqcn, AiClassSuggestion suggestion, List<String> methodNames) {
List<Entry> entries = byClass.get(fqcn);
if (entries == null || entries.isEmpty()) {
return suggestion;
}
// Separate class-level override (no method specified) from method-level entries.
// If multiple class-level entries exist for the same FQCN, the last one wins.
Entry classLevel = null;
Map<String, Entry> methodLevel = new HashMap<>();
for (Entry e : entries) {
if (e.method() == null) {
classLevel = e;
} else {
methodLevel.put(e.method(), e);
}
}
// Build a name → existing suggestion map for quick lookup.
List<AiMethodSuggestion> existingMethods = (suggestion != null && suggestion.methods() != null)
? suggestion.methods() : List.of();
Map<String, AiMethodSuggestion> existingByName = new HashMap<>();
for (AiMethodSuggestion m : existingMethods) {
existingByName.put(m.methodName(), m);
}
// Apply overrides to each method found by the parser.
List<AiMethodSuggestion> merged = new ArrayList<>(methodNames.size());
for (String name : methodNames) {
AiMethodSuggestion base = existingByName.get(name);
Entry effective = methodLevel.getOrDefault(name, classLevel);
merged.add(mergeMethod(name, base, effective));
}
// Class-level suggestion fields are carried through from the original,
// or left null when no AI suggestion was available.
return new AiClassSuggestion(
suggestion != null ? suggestion.className() : fqcn,
suggestion != null ? suggestion.classSecurityRelevant() : null,
suggestion != null ? suggestion.classTags() : null,
suggestion != null ? suggestion.classReason() : null,
merged);
}
/**
* Merges a single method's base classification with the applicable override
* entry.
*
* <p>
* Fields present in the override replace the corresponding base values.
* Fields absent in the override retain their base values (or defaults when
* no base classification exists). When any override field is applied, the
* confidence is set to {@code 1.0} for security-relevant results and
* {@code 0.0} otherwise, reflecting the higher certainty of human review.
* </p>
*
* @param name method name
* @param base existing AI suggestion for this method; may be {@code null}
* @param override override entry to apply; may be {@code null} (no-op)
* @return resulting method suggestion; never {@code null}
*/
@SuppressWarnings("PMD.NPathComplexity")
private static AiMethodSuggestion mergeMethod(String name, AiMethodSuggestion base, Entry override) {
if (override == null) {
// No override — synthesize a neutral record if base is absent.
if (base != null) {
return base;
}
return new AiMethodSuggestion(name, false, null, List.of(), null, 0.0, 0.0);
}
boolean securityRelevant = base != null && base.securityRelevant();
List<String> tags = (base != null && base.tags() != null) ? base.tags() : List.of();
String displayName = base != null ? base.displayName() : null;
String reason = base != null ? base.reason() : null;
if (override.securityRelevant() != null) {
securityRelevant = override.securityRelevant();
}
if (override.tags() != null) {
tags = List.copyOf(override.tags());
}
if (override.displayName() != null) {
displayName = override.displayName();
}
if (override.reason() != null) {
reason = override.reason();
}
// Human review supersedes any AI confidence score.
// Interaction score is AI-generated and is preserved from the base suggestion.
double confidence = securityRelevant ? 1.0 : 0.0;
double interactionScore = base != null ? base.interactionScore() : 0.0;
return new AiMethodSuggestion(name, securityRelevant, displayName, tags, reason, confidence, interactionScore);
}
// -------------------------------------------------------------------------
// Public immutable entry type
// -------------------------------------------------------------------------
/**
* A single override entry as stored in the in-memory index.
*
* <p>
* All fields except {@link #fqcn} are optional and carry {@code null} to
* indicate "not overridden". The {@link #note} field is never emitted in any
* output format and exists solely for human documentation.
* </p>
*
* @param fqcn fully qualified class name targeted by this entry
* @param method method name targeted; {@code null} for a class-level
* override that applies to all methods in the class
* @param securityRelevant override value for security relevance; {@code null}
* means "keep existing"
* @param tags override value for taxonomy tags; {@code null} means
* "keep existing"
* @param displayName override value for the suggested display name;
* {@code null} means "keep existing"
* @param reason override value for the classification rationale;
* {@code null} means "keep existing"
* @param note free-text annotation for human use; never emitted in
* any output
*/
public record Entry(String fqcn, String method, Boolean securityRelevant, List<String> tags,
String displayName, String reason, String note) {
}
// -------------------------------------------------------------------------
// YAML deserialization POJOs
// -------------------------------------------------------------------------
/** Root YAML deserialization container holding the list of override entries. */
@JsonIgnoreProperties(ignoreUnknown = true)
private static final class OverrideFile {
@JsonProperty("overrides")
/* default */ List<EntryDto> overrides;
}
/** Data transfer object for a single override entry read from YAML. */
@JsonIgnoreProperties(ignoreUnknown = true)
private static final class EntryDto {
@JsonProperty("fqcn")
/* default */ String fqcn;
@JsonProperty("method")
/* default */ String method;
@JsonProperty("securityRelevant")
/* default */ Boolean securityRelevant;
@JsonProperty("tags")
/* default */ List<String> tags;
@JsonProperty("displayName")
/* default */ String displayName;
@JsonProperty("reason")
/* default */ String reason;
@JsonProperty("note")
/* default */ String note;
}
}