AiResultCache.java

package org.egothor.methodatlas;

import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import org.egothor.methodatlas.ai.AiClassSuggestion;
import org.egothor.methodatlas.ai.AiMethodSuggestion;
import org.egothor.methodatlas.ai.CredentialTriageVerdict;
import org.egothor.methodatlas.api.ScanRecord;
import org.egothor.methodatlas.emit.DeltaReport;

/**
 * In-memory cache of AI results loaded from a previous MethodAtlas run, keyed by
 * the per-class {@code content_hash} fingerprint.
 *
 * <p>
 * Each entry ({@link AiCacheEntry}) holds the complete AI answer for one class —
 * method classifications <em>and</em> any credential-triage verdicts — plus the
 * prompt-catalogue signature that produced it. This lets one cached answer serve a
 * later classification-only run, a later credential run, or a combined run without
 * re-querying the model, provided the prompt signature still matches. An answer
 * obtained under a different prompt catalogue is never reused.
 * </p>
 *
 * <p>
 * Two source formats are accepted by {@link #load(Path)}: the unified JSON-Lines
 * cache (the format MethodAtlas now writes) and the legacy per-method scan CSV
 * (produced by older {@code -content-hash} runs). Legacy entries carry no prompt
 * signature and no credential verdicts: their classifications may still be reused
 * by content hash, but they can never satisfy a credential query.
 * </p>
 *
 * <p>
 * Instances are obtained via {@link #load(Path)} or the no-op {@link #empty()}.
 * Not thread-safe for the hit/miss counters; the scan loop is single-threaded.
 * </p>
 *
 * @see AiCacheStore
 * @see MethodAtlasApp
 */
public final class AiResultCache {

    private final Map<String, AiCacheEntry> byHash;
    private int hits;
    private int misses;

    private AiResultCache(Map<String, AiCacheEntry> byHash) {
        this.byHash = byHash;
    }

    /** Returns an empty cache that always produces misses. */
    public static AiResultCache empty() {
        return new AiResultCache(Map.of());
    }

    /**
     * Loads a cache from a unified JSON-Lines cache file or a legacy scan CSV,
     * auto-detected from the file's first non-blank character.
     *
     * @param path path to a unified cache file or a legacy MethodAtlas CSV
     * @return loaded cache; never {@code null}
     * @throws IOException if the file cannot be read
     */
    public static AiResultCache load(Path path) throws IOException {
        Map<String, AiCacheEntry> byHash = new HashMap<>();
        if (AiCacheStore.looksLikeJsonLines(path)) {
            for (AiCacheEntry entry : AiCacheStore.read(path)) {
                if (entry.contentHash() != null && !entry.contentHash().isEmpty()
                        && entry.suggestion() != null) {
                    byHash.put(entry.contentHash(), entry);
                }
            }
        } else {
            loadLegacyCsv(path, byHash);
        }
        return new AiResultCache(byHash);
    }

    /**
     * Loads legacy entries from a per-method scan CSV. Only rows with a non-empty
     * {@code content_hash} and a non-{@code null} {@code ai_security_relevant}
     * column (AI was enabled) are included; the resulting entries carry no prompt
     * signature and no credential verdicts.
     *
     * @param csvPath legacy CSV path
     * @param byHash   map to populate, keyed by content hash
     * @throws IOException if the file cannot be read
     */
    @SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
    private static void loadLegacyCsv(Path csvPath, Map<String, AiCacheEntry> byHash) throws IOException {
        List<ScanRecord> records = DeltaReport.loadRecords(csvPath);

        Map<String, List<ScanRecord>> grouped = new HashMap<>();
        for (ScanRecord r : records) {
            if (r.contentHash() != null && !r.contentHash().isEmpty() && r.aiSecurityRelevant() != null) {
                grouped.computeIfAbsent(r.contentHash(), k -> new ArrayList<>()).add(r);
            }
        }

        for (Map.Entry<String, List<ScanRecord>> entry : grouped.entrySet()) {
            List<AiMethodSuggestion> methods = new ArrayList<>(entry.getValue().size());
            for (ScanRecord r : entry.getValue()) {
                methods.add(new AiMethodSuggestion(
                        r.method(),
                        Boolean.TRUE.equals(r.aiSecurityRelevant()),
                        r.aiDisplayName(),
                        r.aiTags() != null ? r.aiTags() : List.of(),
                        r.aiReason(),
                        r.aiConfidence() != null ? r.aiConfidence() : 0.0,
                        r.aiInteractionScore() != null ? r.aiInteractionScore() : 0.0));
            }
            AiClassSuggestion suggestion = new AiClassSuggestion(null, null, null, null, methods);
            byHash.put(entry.getKey(), new AiCacheEntry(entry.getKey(), null, suggestion));
        }
    }

    /**
     * Returns the cached AI answer for a class by content hash, ignoring the prompt
     * signature.
     *
     * @param contentHash SHA-256 fingerprint of the class source, or {@code null}
     * @return cached suggestion, or empty on a miss or {@code null} hash
     */
    public Optional<AiClassSuggestion> lookup(String contentHash) {
        if (contentHash == null) {
            misses++;
            return Optional.empty();
        }
        AiCacheEntry entry = byHash.get(contentHash);
        if (entry != null) {
            hits++;
            return Optional.of(entry.suggestion());
        }
        misses++;
        return Optional.empty();
    }

    /**
     * Returns the cached classification for a class when the content hash matches and
     * the cached answer is compatible with the current prompt catalogue.
     *
     * <p>
     * A unified entry must carry a matching {@code promptSignature}; a legacy entry
     * (no signature) is served by content hash alone, preserving prior behaviour.
     * Updates the hit/miss counters.
     * </p>
     *
     * @param contentHash     SHA-256 fingerprint of the class source, or {@code null}
     * @param promptSignature signature of the current run's prompt catalogue
     * @return cached classification, or empty on a miss
     */
    public Optional<AiClassSuggestion> classification(String contentHash, String promptSignature) {
        AiCacheEntry entry = contentHash == null ? null : byHash.get(contentHash);
        if (entry != null && (entry.promptSignature() == null
                || entry.promptSignature().equals(promptSignature))) {
            hits++;
            return Optional.of(entry.suggestion());
        }
        misses++;
        return Optional.empty();
    }

    /**
     * Returns cached credential-triage verdicts for a class when the content hash and
     * the prompt signature both match and verdicts were actually recorded.
     *
     * <p>
     * Unlike {@link #classification(String, String)} this requires a non-{@code null}
     * matching signature (a legacy entry can never satisfy a credential query) and
     * does not touch the hit/miss counters.
     * </p>
     *
     * @param contentHash     SHA-256 fingerprint of the class source, or {@code null}
     * @param promptSignature signature of the current run's prompt catalogue
     * @return cached verdicts, or empty when none are cached for this signature
     */
    public Optional<List<CredentialTriageVerdict>> verdicts(String contentHash, String promptSignature) {
        AiCacheEntry entry = contentHash == null ? null : byHash.get(contentHash);
        if (entry != null && entry.promptSignature() != null
                && entry.promptSignature().equals(promptSignature)
                && entry.suggestion().secrets() != null) {
            return Optional.of(entry.suggestion().secrets());
        }
        return Optional.empty();
    }

    /**
     * Returns {@code true} when this cache contains at least one entry.
     *
     * <p>When {@code false}, content hashes do not need to be computed for lookups
     * because all results would be misses regardless.</p>
     *
     * @return {@code true} when the cache is non-empty
     */
    public boolean isActive() {
        return !byHash.isEmpty();
    }

    /** Returns the number of successful cache lookups so far. */
    public int hits() {
        return hits;
    }

    /** Returns the number of unsuccessful cache lookups so far. */
    public int misses() {
        return misses;
    }
}