CredentialDetectionRunner.java
package org.egothor.methodatlas.command;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.egothor.methodatlas.CliConfig;
import org.egothor.methodatlas.ai.AiSuggestionEngine;
import org.egothor.methodatlas.ai.AiSuggestionException;
import org.egothor.methodatlas.ai.PromptBuilder;
import org.egothor.methodatlas.ai.CredentialTriageVerdict;
import org.egothor.methodatlas.api.DiscoveredMethod;
import org.egothor.methodatlas.api.CredentialDetector;
import org.egothor.methodatlas.api.CredentialDetectorConfig;
import org.egothor.methodatlas.api.CredentialScanUnit;
import org.egothor.methodatlas.api.TestDiscovery;
import org.egothor.methodatlas.api.TestDiscoveryConfig;
import org.egothor.methodatlas.emit.SarifEmitter;
import org.egothor.methodatlas.emit.CredentialCsvEmitter;
import org.egothor.methodatlas.emit.CredentialFinding;
import org.egothor.methodatlas.emit.CredentialMasker;
/**
* Shared orchestration for the {@code -detect-secrets} feature.
*
* <p>
* Two triage strategies are supported, both producing the same outputs (log,
* secrets CSV, and — in SARIF mode — secret results embedded in the document):
* </p>
* <ul>
* <li><b>Folded</b> (default scope with AI enabled): the command runs detection
* up-front via {@link #detect(List)}, hands the resulting
* {@link CredentialTriageContext} to the scan so each per-class classification
* call <em>also</em> triages that class's candidates — the class source is
* sent to the provider once. The command then calls
* {@link #applyFoldedVerdicts(List, Map)} and {@link #emitFindings}.</li>
* <li><b>Separate</b> ({@link #run(List, SarifEmitter)}): used with no AI, or
* with a {@code -secrets-include} glob (which scans files outside the
* discovered classes). Detection is followed by an optional dedicated triage
* call per file.</li>
* </ul>
*
* <p>
* The deterministic detection itself never calls AI; a failed triage degrades to
* unverified candidates.
* </p>
*
* @since 4.1.0
*/
final class CredentialDetectionRunner {
private static final Logger LOG = Logger.getLogger(CredentialDetectionRunner.class.getName());
/** Default Shannon-entropy floor handed to the detectors. */
private static final double DEFAULT_ENTROPY = 4.0;
/** Extension appended when deriving a SARIF artifact URI from an FQCN. */
private static final String JAVA_EXTENSION = ".java";
/** Grouping key used for findings that carry no fully qualified class name. */
private static final String NO_FQCN = "";
private final CliConfig cfg;
private final TestDiscoveryConfig discoveryConfig;
private final PluginLoader pluginLoader;
private final ScanOrchestrator orchestrator;
private final AiSuggestionEngine aiEngine;
/** Whether triage is folded into the scan's classification call (set by {@link #prepare}). */
private boolean folded;
/** Up-front detection result captured by {@link #prepare} for the folded path. */
private DetectionResult preparedDetection;
/** Triage context handed to the scan in the folded path. */
private CredentialTriageContext preparedContext;
/**
* Creates a runner.
*
* @param cfg parsed CLI configuration; never {@code null}
* @param discoveryConfig discovery configuration used to enumerate test classes
* and build the attribution index; never {@code null}
* @param pluginLoader loader used to resolve discovery providers and secret
* detectors; never {@code null}
* @param orchestrator orchestrator used to group discovered methods by file;
* never {@code null}
* @param aiEngine AI engine used for triage, or {@code null} when AI is
* disabled (deterministic candidates are still emitted)
*/
/* default */ CredentialDetectionRunner(CliConfig cfg, TestDiscoveryConfig discoveryConfig,
PluginLoader pluginLoader, ScanOrchestrator orchestrator, AiSuggestionEngine aiEngine) {
this.cfg = cfg;
this.discoveryConfig = discoveryConfig;
this.pluginLoader = pluginLoader;
this.orchestrator = orchestrator;
this.aiEngine = aiEngine;
}
/**
* Deterministic detection result, plus the per-class candidate spans needed to
* fold triage into the scan and the per-file source used for separate triage.
*
* @param findings all deterministic findings (triage fields {@code null})
* @param candidatesByFqcn candidate spans per class, in finding order
* @param sourceByFile file source text, for separate-call triage
* @since 4.1.0
*/
/* default */ record DetectionResult(List<CredentialFinding> findings,
Map<String, List<PromptBuilder.CredentialCandidateRef>> candidatesByFqcn,
Map<Path, String> sourceByFile) {
}
// ---------------------------------------------------------------------
// Command-facing lifecycle (folded or separate, decided here)
// ---------------------------------------------------------------------
/**
* Prepares credential detection before the scan. When triage can be folded
* into the per-class classification call (AI enabled, default test-class scope,
* and {@code -secrets-separate-llm} not set), this runs deterministic detection
* up-front and returns the {@link CredentialTriageContext} the scan must thread
* through so the class source is sent to the provider once. Otherwise returns
* {@code null} and {@link #finish} performs detection (and any separate triage)
* after the scan.
*
* @param roots scan roots; never {@code null}
* @return the triage context to pass to the scan, or {@code null} when not folding
* @throws IOException if up-front detection fails
*/
/* default */ CredentialTriageContext prepare(List<Path> roots) throws IOException {
this.folded = aiEngine != null && cfg.secretsInclude() == null && !cfg.secretsSeparateLlm();
if (folded) {
this.preparedDetection = detect(roots);
this.preparedContext = toContext(preparedDetection);
return preparedContext;
}
return null;
}
/**
* Completes credential detection after the scan: in the folded path it merges
* the verdicts the scan collected and emits; otherwise it runs detection plus
* optional separate-call triage and emits.
*
* @param roots scan roots; never {@code null}
* @param sarifEmitter SARIF emitter to record findings into, or {@code null}
* @throws IOException if collecting files or writing the CSV fails
*/
/* default */ void finish(List<Path> roots, SarifEmitter sarifEmitter) throws IOException {
if (folded) {
emitFindings(applyFoldedVerdicts(preparedDetection.findings(),
preparedContext.verdictsByFqcn()), sarifEmitter);
} else {
run(roots, sarifEmitter);
}
}
// ---------------------------------------------------------------------
// Separate-call path
// ---------------------------------------------------------------------
/**
* Runs detection, optional separate-call triage, and emission. Used when AI is
* disabled or a {@code -secrets-include} glob is active.
*
* @param roots scan roots; never {@code null}
* @param sarifEmitter SARIF emitter to record findings into, or {@code null}
* @throws IOException if collecting files or writing the CSV fails
*/
/* default */ void run(List<Path> roots, SarifEmitter sarifEmitter) throws IOException {
DetectionResult dr = detect(roots);
List<CredentialFinding> triaged = aiEngine == null
? dr.findings()
: triageSeparately(dr.findings(), dr.sourceByFile());
emitFindings(triaged, sarifEmitter);
}
// ---------------------------------------------------------------------
// Folded path (used by the command together with the scan)
// ---------------------------------------------------------------------
/**
* Runs deterministic detection only, returning the findings plus the data the
* folded path needs to triage during the scan.
*
* @param roots scan roots; never {@code null}
* @return the detection result; never {@code null}
* @throws IOException if collecting files fails
*/
/* default */ DetectionResult detect(List<Path> roots) throws IOException {
Map<Path, List<DiscoveredMethod>> byFile = discoverByFile(roots);
Map<Path, List<DetectCredentialsStage.MethodRange>> attribution = toAttribution(byFile);
List<CredentialScanUnit> units = selectUnits(roots, byFile);
Map<Path, String> sourceByFile = units.stream()
.collect(Collectors.toMap(CredentialScanUnit::filePath, CredentialScanUnit::source, (a, b) -> a));
List<CredentialFinding> findings = runDetectors(units, attribution);
return new DetectionResult(findings, candidatesByFqcn(findings), sourceByFile);
}
/**
* Wraps a detection result in a triage context for the scan to fill.
*
* @param detection the detection result; never {@code null}
* @return a context carrying the per-class candidates
*/
/* default */ CredentialTriageContext toContext(DetectionResult detection) {
return new CredentialTriageContext(detection.candidatesByFqcn());
}
/**
* Merges the verdicts the scan collected (keyed by class) back into the
* findings, by class and candidate index.
*
* @param findings the deterministic findings; never {@code null}
* @param verdictsByFqcn verdicts collected during the folded scan; never {@code null}
* @return findings with triage fields populated where a verdict exists
*/
/* default */ List<CredentialFinding> applyFoldedVerdicts(List<CredentialFinding> findings,
Map<String, List<CredentialTriageVerdict>> verdictsByFqcn) {
List<CredentialFinding> out = new ArrayList<>(findings.size());
groupByFqcn(findings).forEach((fqcn, group) -> {
Map<Integer, CredentialTriageVerdict> byIndex = verdictsByFqcn.getOrDefault(fqcn, List.of()).stream()
.collect(Collectors.toMap(CredentialTriageVerdict::candidateIndex, v -> v, (a, b) -> a));
out.addAll(DetectCredentialsStage.mergeVerdicts(group, byIndex));
});
return out;
}
/**
* Applies the min-score filter and emits all outputs.
*
* @param findings findings to emit; never {@code null}
* @param sarifEmitter SARIF emitter to record findings into, or {@code null}
* @throws IOException if writing the CSV fails
*/
/* default */ void emitFindings(List<CredentialFinding> findings, SarifEmitter sarifEmitter) throws IOException {
List<CredentialFinding> kept = filterByMinScore(findings);
logSummary(kept);
logFindings(kept);
recordIntoSarif(kept, sarifEmitter);
writeCsv(kept);
}
// ---------------------------------------------------------------------
// Internals
// ---------------------------------------------------------------------
private Map<Path, List<DiscoveredMethod>> discoverByFile(List<Path> roots) throws IOException {
List<TestDiscovery> providers = pluginLoader.loadProviders(discoveryConfig);
try {
return orchestrator.collectMethodsByFile(roots, providers);
} finally {
pluginLoader.closeAll(providers);
}
}
private static Map<Path, List<DetectCredentialsStage.MethodRange>> toAttribution(
Map<Path, List<DiscoveredMethod>> byFile) {
Map<Path, List<DetectCredentialsStage.MethodRange>> attribution = new LinkedHashMap<>();
byFile.forEach((file, methods) -> attribution.put(file.toAbsolutePath(), methods.stream()
.map(m -> new DetectCredentialsStage.MethodRange(m.method(), m.beginLine(), m.endLine()))
.toList()));
return attribution;
}
private List<CredentialScanUnit> selectUnits(List<Path> roots,
Map<Path, List<DiscoveredMethod>> byFile) {
if (cfg.secretsInclude() != null) {
return new CredentialScanUnitSource(cfg.fileSuffixes(), cfg.secretsInclude()).collect(roots);
}
return byFile.entrySet().stream()
.map(entry -> toUnit(entry.getKey(), entry.getValue()))
.filter(Objects::nonNull)
.toList();
}
private static CredentialScanUnit toUnit(Path file, List<DiscoveredMethod> methods) {
if (methods.isEmpty()) {
return null;
}
Path abs = file.toAbsolutePath();
try {
String text = Files.readString(abs, StandardCharsets.UTF_8);
return new CredentialScanUnit(abs, methods.get(0).fqcn(), text,
CredentialScanUnitSource.languageOf(abs));
} catch (IOException e) {
LOG.log(Level.FINE, e, () -> "Skipping unreadable discovered file: " + abs);
return null;
}
}
private List<CredentialFinding> runDetectors(List<CredentialScanUnit> units,
Map<Path, List<DetectCredentialsStage.MethodRange>> attribution) {
CredentialDetectorConfig sdc = new CredentialDetectorConfig(
DEFAULT_ENTROPY, Optional.ofNullable(cfg.secretsRules()), Map.of());
List<CredentialDetector> detectors = pluginLoader.loadCredentialDetectors(sdc);
try {
return new DetectCredentialsStage(detectors, attribution).run(units);
} finally {
pluginLoader.closeAllCredentialDetectors(detectors);
}
}
private static Map<String, List<CredentialFinding>> groupByFqcn(List<CredentialFinding> findings) {
return findings.stream().collect(Collectors.groupingBy(
f -> f.fqcn() == null ? NO_FQCN : f.fqcn(), LinkedHashMap::new, Collectors.toList()));
}
private static Map<String, List<PromptBuilder.CredentialCandidateRef>> candidatesByFqcn(
List<CredentialFinding> findings) {
Map<String, List<PromptBuilder.CredentialCandidateRef>> byFqcn = new LinkedHashMap<>();
groupByFqcn(findings).forEach((fqcn, group) -> {
if (!NO_FQCN.equals(fqcn)) {
byFqcn.put(fqcn, IntStream.range(0, group.size())
.mapToObj(i -> new PromptBuilder.CredentialCandidateRef(i,
group.get(i).candidate().beginLine(), group.get(i).candidate().matchedValue()))
.toList());
}
});
return byFqcn;
}
private List<CredentialFinding> triageSeparately(List<CredentialFinding> findings, Map<Path, String> sourceByFile) {
if (findings.isEmpty()) {
return findings;
}
Map<Path, List<CredentialFinding>> byFile = findings.stream()
.collect(Collectors.groupingBy(CredentialFinding::filePath, LinkedHashMap::new, Collectors.toList()));
List<CredentialFinding> result = new ArrayList<>(findings.size());
byFile.forEach((file, group) ->
result.addAll(triageGroup(file, group, sourceByFile.getOrDefault(file, ""))));
return result;
}
private List<CredentialFinding> triageGroup(Path file, List<CredentialFinding> group, String source) {
String fqcn = group.get(0).fqcn() != null
? group.get(0).fqcn()
: file.toString().replace('\\', '/');
List<PromptBuilder.CredentialCandidateRef> refs = IntStream.range(0, group.size())
.mapToObj(i -> new PromptBuilder.CredentialCandidateRef(i,
group.get(i).candidate().beginLine(), group.get(i).candidate().matchedValue()))
.toList();
try {
List<CredentialTriageVerdict> verdicts = aiEngine.triageSecrets(fqcn, source, refs);
Map<Integer, CredentialTriageVerdict> byIndex = verdicts.stream()
.collect(Collectors.toMap(CredentialTriageVerdict::candidateIndex, v -> v, (a, b) -> a));
return DetectCredentialsStage.mergeVerdicts(group, byIndex);
} catch (AiSuggestionException e) {
LOG.log(Level.WARNING, e,
() -> "Secret triage failed for " + fqcn + "; emitting unverified candidates");
return group;
}
}
private List<CredentialFinding> filterByMinScore(List<CredentialFinding> findings) {
double minScore = cfg.secretsMinScore();
List<CredentialFinding> kept = new ArrayList<>(findings.size());
for (CredentialFinding f : findings) {
if (f.credibilityScore() == null || f.credibilityScore() >= minScore) {
kept.add(f);
}
}
return kept;
}
private static void logSummary(List<CredentialFinding> kept) {
if (LOG.isLoggable(Level.INFO)) {
long files = kept.stream().map(CredentialFinding::filePath).distinct().count();
LOG.log(Level.INFO, "Credential detection: {0} finding(s) across {1} file(s)",
new Object[] { kept.size(), files });
}
}
private void logFindings(List<CredentialFinding> kept) {
for (CredentialFinding f : kept) {
String raw = f.candidate().matchedValue();
String snippet = cfg.secretsShowValues() ? raw : CredentialMasker.mask(raw);
// Supplier form is lazy: the message is only assembled when INFO is loggable.
LOG.info(() -> " " + f.filePath().toString().replace('\\', '/')
+ ":" + f.candidate().beginLine()
+ " [" + f.candidate().ruleId() + "] " + snippet);
}
}
private void recordIntoSarif(List<CredentialFinding> kept, SarifEmitter sarifEmitter) {
if (sarifEmitter == null) {
return;
}
for (CredentialFinding f : kept) {
sarifEmitter.recordSecret(fileUri(f), f);
}
}
private static String fileUri(CredentialFinding f) {
if (f.fqcn() != null) {
return f.fqcn().replace('.', '/') + JAVA_EXTENSION;
}
return f.filePath().toString().replace('\\', '/');
}
private void writeCsv(List<CredentialFinding> kept) throws IOException {
try (PrintWriter w = new PrintWriter(Files.newBufferedWriter(cfg.secretsOut()))) {
new CredentialCsvEmitter(cfg.secretsShowValues()).flush(w, kept);
}
}
}