ScanOrchestrator.java

// SPDX-License-Identifier: Apache-2.0
// Copyright 2026 Egothor
// Copyright 2026 Accenture
package org.egothor.methodatlas.command;

import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;

import org.egothor.methodatlas.AiResultCache;
import org.egothor.methodatlas.emit.ClassificationOverride;
import org.egothor.methodatlas.CliConfig;
import org.egothor.methodatlas.emit.TestMethodSink;
import org.egothor.methodatlas.ai.AiClassSuggestion;
import org.egothor.methodatlas.ai.AiMethodSuggestion;
import org.egothor.methodatlas.ai.AiOptions;
import org.egothor.methodatlas.ai.AiSuggestionEngine;
import org.egothor.methodatlas.ai.AiSuggestionException;
import org.egothor.methodatlas.ai.PromptBuilder;
import org.egothor.methodatlas.ai.SuggestionLookup;
import org.egothor.methodatlas.api.DiscoveredMethod;
import org.egothor.methodatlas.api.TestDiscovery;
import org.egothor.methodatlas.api.TestDiscoveryConfig;

/**
 * Orchestrates the scan-and-emit loop that every command mode is built around.
 *
 * <p>
 * Each {@link Command} mode varies in three places — output format, per-record
 * sink behaviour, and whether records stream or buffer — but they all share
 * the same core sequence:
 * </p>
 * <ol>
 *   <li>Load all configured {@link TestDiscovery} providers.</li>
 *   <li>For each scan root, run every provider, merge their results, and
 *       group methods by class.</li>
 *   <li>For each class, optionally consult the AI engine through a layered
 *       cache + override lookup.</li>
 *   <li>Forward each method record to the supplied sink.</li>
 *   <li>Close all providers.</li>
 * </ol>
 *
 * <p>
 * This class owns that sequence. Commands compose it with a
 * {@link PluginLoader} (passed in at construction) and configure the
 * per-record sink, AI runtime, and content-hash policy at call time.
 * </p>
 *
 * <h2>API shape</h2>
 *
 * <p>
 * Two entry points serve the two common patterns:
 * </p>
 * <ul>
 *   <li>{@link #scan} manages the provider lifecycle internally; it is the
 *       right call for SARIF, JSON, and GitHub-annotation modes that buffer
 *       or emit unconditionally.</li>
 *   <li>{@link #runDiscovery} processes a single root against pre-loaded
 *       providers; it is the right call for CSV and plain-text modes that
 *       compute per-root metadata (such as the {@code source_root} column)
 *       before forwarding records.</li>
 * </ul>
 *
 * <p>
 * The apply-tags flow has its own shape: {@link #collectMethodsByFile}
 * groups discovered methods by source file (the caller owns the provider
 * lifecycle so it can read each provider's
 * {@link TestDiscovery#hadErrors()} afterwards), and
 * {@link #gatherAiSuggestionsForFile} resolves AI suggestions for one
 * file at a time.
 * </p>
 *
 * <h2>Thread safety</h2>
 *
 * <p>
 * This class is thread-safe. The injected {@link PluginLoader} is
 * thread-safe and {@link java.util.ServiceLoader} resolution is
 * idempotent; nothing else is shared between calls.
 * </p>
 *
 * @see PluginLoader
 * @see AiRuntime
 * @see Command
 * @since 1.0.0
 */
public final class ScanOrchestrator {

    private static final Logger LOG = Logger.getLogger(ScanOrchestrator.class.getName());

    private final PluginLoader pluginLoader;

    /**
     * Creates a new orchestrator that will resolve providers through
     * {@code pluginLoader}.
     *
     * @param pluginLoader plugin loader used by {@link #scan} and
     *                     {@link #collectMethodsByFile}; must not be
     *                     {@code null}
     */
    public ScanOrchestrator(PluginLoader pluginLoader) {
        this.pluginLoader = pluginLoader;
    }

    /**
     * Scans every configured root, forwarding each discovered test method to
     * {@code sink}. Loads and closes the {@link TestDiscovery} providers
     * internally so callers do not need to manage the lifecycle.
     *
     * @param roots           source roots to scan; must not be {@code null}
     * @param cliConfig       full parsed CLI configuration
     * @param discoveryConfig discovery configuration forwarded to providers
     * @param aiEngine        AI engine providing suggestions; may be
     *                        {@code null} when AI is disabled
     * @param sink            receiver of discovered test method records
     * @param override        human classification overrides
     * @param aiCache         AI result cache
     * @return {@code 0} if all files were processed successfully, {@code 1}
     *         if any file produced a parse or processing error
     * @throws IOException if traversing a file tree fails
     */
    public int scan(List<Path> roots, CliConfig cliConfig, TestDiscoveryConfig discoveryConfig,
            AiSuggestionEngine aiEngine, TestMethodSink sink,
            ClassificationOverride override, AiResultCache aiCache) throws IOException {
        List<TestDiscovery> providers = pluginLoader.loadProviders(discoveryConfig);
        boolean hadErrors = false;
        try {
            for (Path root : roots) {
                if (runDiscovery(root, providers, cliConfig.aiOptions(), aiEngine, sink,
                        cliConfig.contentHash(), override, aiCache)) {
                    hadErrors = true;
                }
            }
        } finally {
            pluginLoader.closeAll(providers);
        }
        return hadErrors ? 1 : 0;
    }

    /**
     * Runs all configured {@link TestDiscovery} providers on {@code root},
     * merges their results, orchestrates AI analysis per class, and forwards
     * each method record to {@code sink}.
     *
     * <p>
     * Providers are passed in pre-loaded; callers manage the lifecycle
     * (typically through {@link PluginLoader#closeAll(List)} in a
     * {@code finally} block) so that they can share one provider list across
     * multiple roots while still computing per-root metadata before each
     * call.
     * </p>
     *
     * @param root               directory to scan
     * @param providers          list of pre-configured discovery providers
     * @param aiOptions          AI configuration for the current run
     * @param aiEngine           AI engine, or {@code null} when AI is disabled
     * @param sink               receiver of discovered test method records
     * @param contentHashEnabled whether to include the class content hash in
     *                           emitted records
     * @param override           human classification overrides
     * @param aiCache            AI result cache
     * @return {@code true} if any provider encountered a parse or processing
     *         error
     * @throws IOException if traversing the file tree fails
     */
    @SuppressWarnings("PMD.CloseResource") // providers are owned by the caller; this method does not close them
    public boolean runDiscovery(Path root, List<TestDiscovery> providers,
            AiOptions aiOptions, AiSuggestionEngine aiEngine, TestMethodSink sink,
            boolean contentHashEnabled, ClassificationOverride override,
            AiResultCache aiCache) throws IOException {

        List<DiscoveredMethod> methods = new ArrayList<>();
        boolean hadErrors = false;
        for (TestDiscovery provider : providers) {
            provider.discover(root).forEach(methods::add);
            if (provider.hadErrors()) {
                hadErrors = true;
            }
        }

        Map<String, List<DiscoveredMethod>> byClass = methods.stream()
                .collect(Collectors.groupingBy(DiscoveredMethod::fqcn,
                        LinkedHashMap::new, Collectors.toList()));

        AiRuntime ai = new AiRuntime(aiOptions, aiEngine, override, aiCache);

        for (Map.Entry<String, List<DiscoveredMethod>> entry : byClass.entrySet()) {
            String fqcn = entry.getKey();
            List<DiscoveredMethod> classMethods = entry.getValue();

            String classSource = classMethods.get(0).sourceContent().get().orElse(null);

            String lookupHash = (contentHashEnabled || aiCache.isActive()) && classSource != null
                    ? ContentHasher.hashClass(classSource) : null;
            String outputHash = contentHashEnabled ? lookupHash : null;

            String fileStem = classMethods.get(0).fileStem();
            List<String> methodNames = classMethods.stream().map(DiscoveredMethod::method).toList();
            List<PromptBuilder.TargetMethod> targetMethods = classMethods.stream()
                    .map(ScanOrchestrator::toTargetMethod)
                    .toList();

            SuggestionLookup suggestions = resolveSuggestionLookup(
                    fileStem, fqcn, classSource, methodNames, targetMethods, ai, lookupHash);

            for (DiscoveredMethod m : classMethods) {
                sink.record(m.fqcn(), m.method(), m.beginLine(), m.loc(), outputHash,
                        m.tags(), m.displayName(),
                        suggestions.find(m.method()).orElse(null));
            }
        }

        return hadErrors;
    }

    /**
     * Collects all discovered methods from every configured root, keyed by
     * source-file path. Methods whose {@link DiscoveredMethod#filePath()} is
     * {@code null} are silently skipped.
     *
     * <p>
     * Providers are passed in pre-loaded so the caller can read each
     * provider's {@link TestDiscovery#hadErrors()} after the call and decide
     * how to propagate the exit code; the caller also owns closing them.
     * </p>
     *
     * @param roots     scan roots; must not be {@code null}
     * @param providers configured and already-loaded discovery providers;
     *                  must not be {@code null}
     * @return mutable map from source-file path to the methods found in that
     *         file; insertion order matches discovery order
     * @throws IOException if directory traversal fails for any root
     */
    @SuppressWarnings({"PMD.AvoidInstantiatingObjectsInLoops",
            "PMD.CloseResource"}) // providers are owned by the caller
    public Map<Path, List<DiscoveredMethod>> collectMethodsByFile(
            List<Path> roots, List<TestDiscovery> providers) throws IOException {
        Map<Path, List<DiscoveredMethod>> byFile = new LinkedHashMap<>();
        for (Path root : roots) {
            for (TestDiscovery provider : providers) {
                provider.discover(root).forEach(m -> {
                    if (m.filePath() != null) {
                        byFile.computeIfAbsent(m.filePath(), k -> new ArrayList<>()).add(m);
                    }
                });
            }
        }
        return byFile;
    }

    /**
     * Resolves AI security-classification suggestions for every class in
     * {@code byClass} and populates {@code tagsToApply} and
     * {@code displayNames} with the results for methods that are
     * security-relevant.
     *
     * <p>
     * A display-name suggestion is only placed into {@code displayNames}
     * when the discovered method has no existing {@code @DisplayName} in
     * source (i.e. {@link DiscoveredMethod#displayName()} returns
     * {@code null}). This prevents AI-generated names from overwriting
     * manually authored ones.
     * </p>
     *
     * @param byClass      discovered methods grouped by FQCN for one source file
     * @param ai           AI runtime carrying the engine, override, and cache
     * @param aiCache      AI result cache used to compute the content-hash lookup key
     * @param tagsToApply  output accumulator: method name to tag values to write
     * @param displayNames output accumulator: method name to display name to write
     */
    public void gatherAiSuggestionsForFile(Map<String, List<DiscoveredMethod>> byClass,
            AiRuntime ai, AiResultCache aiCache,
            Map<String, List<String>> tagsToApply, Map<String, String> displayNames) {
        for (Map.Entry<String, List<DiscoveredMethod>> classEntry : byClass.entrySet()) {
            String fqcn = classEntry.getKey();
            List<DiscoveredMethod> classMethods = classEntry.getValue();

            String classSource = classMethods.get(0).sourceContent().get().orElse(null);
            String lookupHash = aiCache.isActive() && classSource != null
                    ? ContentHasher.hashClass(classSource) : null;
            String fileStem = classMethods.get(0).fileStem();
            List<String> methodNames = classMethods.stream().map(DiscoveredMethod::method).toList();
            List<PromptBuilder.TargetMethod> targetMethods = classMethods.stream()
                    .map(ScanOrchestrator::toTargetMethod).toList();

            SuggestionLookup suggestions = resolveSuggestionLookup(
                    fileStem, fqcn, classSource, methodNames, targetMethods, ai, lookupHash);

            for (DiscoveredMethod m : classMethods) {
                AiMethodSuggestion suggestion = suggestions.find(m.method()).orElse(null);
                if (suggestion == null || !suggestion.securityRelevant()) {
                    continue;
                }
                if (suggestion.displayName() != null && !suggestion.displayName().isBlank()
                        && m.displayName() == null) {
                    displayNames.putIfAbsent(m.method(), suggestion.displayName());
                }
                if (suggestion.tags() != null && !suggestion.tags().isEmpty()) {
                    tagsToApply.putIfAbsent(m.method(), suggestion.tags());
                }
            }
        }
    }

    /**
     * Wraps a {@link TestMethodSink} so that only records that pass all
     * active filters are forwarded to {@code delegate}.
     *
     * <p>
     * Two independent filters are supported and composed in order:
     * </p>
     * <ol>
     *   <li><b>Security-only filter</b> — when {@code securityOnly} is
     *       {@code true}, records whose {@link AiMethodSuggestion} is
     *       {@code null} or has {@code securityRelevant=false} are dropped.</li>
     *   <li><b>Confidence threshold filter</b> — when {@code confidenceEnabled}
     *       is {@code true} <em>and</em> {@code minConfidence > 0.0}, records
     *       whose {@link AiMethodSuggestion} is {@code null} or has a
     *       {@link AiMethodSuggestion#confidence()} below {@code minConfidence}
     *       are dropped. This filter is a no-op when {@code confidenceEnabled}
     *       is {@code false} because the confidence field is always
     *       {@code 0.0} when confidence scoring was not requested.</li>
     * </ol>
     *
     * <p>
     * When neither filter is active the original {@code delegate} is
     * returned unchanged (zero overhead).
     * </p>
     *
     * @param delegate          the underlying sink to forward matching
     *                          records to
     * @param securityOnly      whether to enable the security-relevance filter
     * @param minConfidence     minimum confidence score (inclusive) required
     *                          to pass the confidence filter; {@code 0.0}
     *                          disables it
     * @param confidenceEnabled whether confidence scoring was requested;
     *                          must be {@code true} for the confidence
     *                          filter to activate
     * @return filtered sink, or {@code delegate} unchanged when all filters
     *         are off
     */
    public TestMethodSink filterSink(TestMethodSink delegate, boolean securityOnly,
            double minConfidence, boolean confidenceEnabled) {
        TestMethodSink sink = delegate;
        if (securityOnly) {
            final TestMethodSink next = sink;
            sink = (fqcn, method, beginLine, loc, contentHash, tags, displayName, suggestion) -> {
                if (suggestion != null && suggestion.securityRelevant()) {
                    next.record(fqcn, method, beginLine, loc, contentHash, tags, displayName, suggestion);
                }
            };
        }
        if (confidenceEnabled && minConfidence > 0.0) {
            final double threshold = minConfidence;
            final TestMethodSink next = sink;
            sink = (fqcn, method, beginLine, loc, contentHash, tags, displayName, suggestion) -> {
                if (suggestion != null && suggestion.confidence() >= threshold) {
                    next.record(fqcn, method, beginLine, loc, contentHash, tags, displayName, suggestion);
                }
            };
        }
        return sink;
    }

    // -------------------------------------------------------------------------
    // Static utilities
    // -------------------------------------------------------------------------

    /**
     * Converts a single discovered test method into a prompt target descriptor.
     *
     * <p>
     * Exposed publicly because {@link ManualPrepareCommand} also needs to
     * build prompt-target lists from discovered methods when writing manual
     * work files; the conversion logic must stay aligned across both call
     * sites to keep the prompt format consistent.
     * </p>
     *
     * @param m discovered test method; must not be {@code null}
     * @return corresponding prompt target descriptor; never {@code null}
     * @see PromptBuilder.TargetMethod
     */
    public static PromptBuilder.TargetMethod toTargetMethod(DiscoveredMethod m) {
        return new PromptBuilder.TargetMethod(
                m.method(),
                m.beginLine() > 0 ? m.beginLine() : null,
                m.endLine() > 0 ? m.endLine() : null);
    }

    // -------------------------------------------------------------------------
    // Private helpers
    // -------------------------------------------------------------------------

    private static SuggestionLookup resolveSuggestionLookup(String fileStem, String fqcn,
            String classSource, List<String> methodNames, List<PromptBuilder.TargetMethod> targetMethods,
            AiRuntime ai, String contentHash) {
        if (methodNames.isEmpty()) {
            return SuggestionLookup.from(null);
        }

        if (ai.engine() == null) {
            return SuggestionLookup.from(ai.override().apply(fqcn, null, methodNames));
        }

        // Check the cache before making an API call.
        AiClassSuggestion cached = ai.cache().lookup(contentHash).orElse(null);
        if (cached != null) {
            return SuggestionLookup.from(ai.override().apply(fqcn, cached, methodNames));
        }

        if (classSource == null) {
            return SuggestionLookup.from(ai.override().apply(fqcn, null, methodNames));
        }

        if (ai.options().enabled() && classSource.length() > ai.options().maxClassChars()) {
            if (LOG.isLoggable(Level.INFO)) {
                LOG.log(Level.INFO, "Skipping AI for {0}: class source too large ({1} chars)",
                        new Object[] { fqcn, classSource.length() });
            }
            return SuggestionLookup.from(ai.override().apply(fqcn, null, methodNames));
        }

        if (LOG.isLoggable(Level.INFO)) {
            LOG.log(Level.INFO, "Querying AI for {0} ({1} methods)", new Object[] { fqcn, targetMethods.size() });
        }

        try {
            AiClassSuggestion aiClassSuggestion =
                    ai.engine().suggestForClass(fileStem, fqcn, classSource, targetMethods);
            return SuggestionLookup.from(ai.override().apply(fqcn, aiClassSuggestion, methodNames));
        } catch (AiSuggestionException e) {
            if (LOG.isLoggable(Level.WARNING)) {
                LOG.log(Level.WARNING, "AI suggestion failed for class " + fqcn, e);
            }
            return SuggestionLookup.from(ai.override().apply(fqcn, null, methodNames));
        }
    }
}