ReceiptBuilder.java

// SPDX-License-Identifier: Apache-2.0
// Copyright 2026 Egothor
// Copyright 2026 Accenture
package org.egothor.methodatlas.receipt;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.Instant;
import java.util.HexFormat;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.egothor.methodatlas.CliConfig;
import org.egothor.methodatlas.ai.AiOptions;
import org.egothor.methodatlas.ai.PromptTemplateKind;
import org.egothor.methodatlas.ai.PromptTemplateSet;
import org.egothor.methodatlas.command.ContentHasher;

/**
 * Assembles a {@link ReproducibilityReceipt} from a parsed
 * {@link CliConfig} and the resolved tool version.
 *
 * <p>
 * Package-private because nothing outside the {@code receipt} package needs to
 * call this; {@code MethodAtlasApp} is the sole external caller, accessing it
 * through {@link #build(CliConfig, String, String)}.
 * </p>
 */
final class ReceiptBuilder {

    /**
     * Schema version of the JSON payload; bumped on any breaking change.
     *
     * <p>
     * v2 (4.1.0) replaced the single {@code inputs.promptTemplateHash} of v1 with
     * three per-template hashes ({@code classificationPromptHash},
     * {@code triageAppendixPromptHash}, {@code dedicatedTriagePromptHash}) and folds
     * all three into {@code configHash}. See {@code docs/usage-modes/reproducibility-receipts.md}.
     * </p>
     */
    private static final String SCHEMA_VERSION = "2";

    /** Algorithm string passed to {@link MessageDigest#getInstance(String)}. */
    private static final String SHA256_ALGO = "SHA-256";

    /** Sentinel for "no value" in the canonical key=value serialisation. */
    @SuppressWarnings("InlineTrivialConstant")
    private static final String EMPTY = "";

    // Canonical key=value separator characters: kept as constants so the
    // algorithm is unambiguous to an auditor re-deriving the hash from
    // standard tooling (e.g. sha256sum + printf).

    /** Separator between key and value in the canonical serialisation. */
    private static final String CONFIG_HASH_KV_SEP = "=";

    /** Separator between key=value pairs in the canonical serialisation. */
    private static final String CONFIG_HASH_SEPARATOR = "\n";

    // TreeMap keys: alphabetical insertion is enforced by TreeMap, not by the
    // declaration order here.

    /** TreeMap key for the AI cache file fingerprint. */
    private static final String KEY_AI_CACHE_FILE_SHA = "aiCacheFileSha256";
    /** TreeMap key for the configured AI model name. */
    private static final String KEY_AI_MODEL = "aiModel";
    /** TreeMap key for the configured AI provider. */
    private static final String KEY_AI_PROVIDER = "aiProvider";
    /** TreeMap key for the built-in taxonomy mode name. */
    private static final String KEY_BUILT_IN_TAXONOMY = "builtInTaxonomy";
    /** TreeMap key for the MethodAtlas tool version. */
    private static final String KEY_METHOD_ATLAS_VERSION = "methodAtlasVersion";
    /** TreeMap key for the override file fingerprint. */
    private static final String KEY_OVERRIDE_FILE_SHA = "overrideFileSha256";
    /** TreeMap key for the effective method-classification prompt template hash. */
    private static final String KEY_CLASSIFICATION_PROMPT = "classificationPromptHash";
    /** TreeMap key for the effective folded credential-triage appendix template hash. */
    private static final String KEY_TRIAGE_APPENDIX_PROMPT = "triageAppendixPromptHash";
    /** TreeMap key for the effective standalone credential-triage template hash. */
    private static final String KEY_DEDICATED_TRIAGE_PROMPT = "dedicatedTriagePromptHash";
    /** TreeMap key for the taxonomy file fingerprint. */
    private static final String KEY_TAXONOMY_FILE_SHA = "taxonomyFileSha256";

    /**
     * Rough capacity estimate for the canonical key=value buffer: ten keys
     * with combined length ≈ 200 characters plus up to ten 64-char SHA-256
     * values plus separators leaves the StringBuilder near its final size
     * without reallocations.
     */
    private static final int CANONICAL_BUFFER_CAPACITY = 1024;

    private ReceiptBuilder() {
        // Utility class.
    }

    /**
     * Builds a reproducibility receipt for the supplied configuration.
     *
     * <p>
     * The {@code configHash} field is computed as follows:
     * </p>
     * <ol>
     *   <li>A {@link TreeMap} is populated with the ten canonical keys
     *       documented at class scope. Absent inputs map to the empty
     *       string.</li>
     *   <li>The map is serialised as {@code key1=value1\n…keyN=valueN\n}
     *       using {@link StandardCharsets#UTF_8}; {@link TreeMap} guarantees
     *       alphabetical key order.</li>
     *   <li>SHA-256 is applied to those bytes and the result is emitted as
     *       lowercase hex via {@link HexFormat#of()}.</li>
     * </ol>
     *
     * @param config         parsed CLI configuration; must not be {@code null}
     * @param toolVersion    resolved tool version string (use {@code "dev"} when
     *                       no implementation version is available)
     * @param outputModeName textual name of the chosen output mode (e.g.
     *                       {@code "SARIF"}); included in the receipt's
     *                       {@code outputMode} field
     * @return a populated receipt with a stable {@code configHash} that an
     *         auditor can recompute from the other fields
     * @throws IOException if any input file referenced by {@code config}
     *                     (override file, taxonomy file, AI cache) cannot be
     *                     read for hashing
     */
    /* default */ static ReproducibilityReceipt build(CliConfig config, String toolVersion,
            String outputModeName) throws IOException {
        AiOptions ai = config.aiOptions();
        FileArtifact overrideArtifact = hashIfPresent(config.overrideFile());
        FileArtifact aiCacheArtifact = hashIfPresent(config.aiCacheFile());

        FileArtifact taxonomyArtifact = null;
        String builtInTaxonomy = null;
        if (ai.taxonomyFile() != null) {
            taxonomyArtifact = hashIfPresent(ai.taxonomyFile());
        } else {
            builtInTaxonomy = ai.taxonomyMode().name();
        }

        String aiProvider = ai.enabled() ? ai.provider().name() : null;
        String aiModel = ai.enabled() ? ai.modelName() : null;
        PromptTemplateSet templates = ai.promptTemplates();
        String classificationPromptHash = ai.enabled() ? templates.hash(PromptTemplateKind.CLASSIFICATION) : null;
        String triageAppendixPromptHash = ai.enabled() ? templates.hash(PromptTemplateKind.TRIAGE_APPENDIX) : null;
        String dedicatedTriagePromptHash = ai.enabled() ? templates.hash(PromptTemplateKind.DEDICATED_TRIAGE) : null;

        ReceiptInputs inputs = new ReceiptInputs(taxonomyArtifact, builtInTaxonomy,
                overrideArtifact, aiCacheArtifact, aiProvider, aiModel,
                classificationPromptHash, triageAppendixPromptHash, dedicatedTriagePromptHash);

        String configHash = computeConfigHash(toolVersion, ai, overrideArtifact, aiCacheArtifact,
                taxonomyArtifact, builtInTaxonomy,
                classificationPromptHash, triageAppendixPromptHash, dedicatedTriagePromptHash);

        return new ReproducibilityReceipt(
                SCHEMA_VERSION,
                Instant.now().toString(),
                toolVersion,
                javaVersion(),
                outputModeName,
                resolveScanRoots(config),
                computeDeterministicReplay(config),
                inputs,
                configHash);
    }

    /**
     * Returns a {@link FileArtifact} for {@code file} or {@code null} when
     * {@code file} is itself {@code null}.
     *
     * @param file path to fingerprint; may be {@code null}
     * @return populated artefact or {@code null}
     * @throws IOException if the file cannot be read
     */
    private static FileArtifact hashIfPresent(Path file) throws IOException {
        if (file == null) {
            return null;
        }
        String sha = ContentHasher.hashFile(file);
        return new FileArtifact(file.toAbsolutePath().toString(), sha);
    }

    /**
     * Resolves the JVM's reported Java version to a non-null string.
     *
     * @return Java version, or the literal {@code "unknown"} when the
     *         {@code java.version} property is unset
     */
    private static String javaVersion() {
        String v = System.getProperty("java.version");
        return v == null ? "unknown" : v;
    }

    /**
     * Maps scan roots to absolute path strings. An empty input list means
     * "scan the current directory"; the absolute resolution captures the
     * actual directory that was scanned.
     *
     * @param config parsed CLI configuration
     * @return absolute path strings for every scan root, in the order
     *         supplied on the command line
     */
    private static List<String> resolveScanRoots(CliConfig config) {
        List<Path> roots = config.paths();
        if (roots.isEmpty()) {
            return List.of(Path.of("").toAbsolutePath().toString());
        }
        return roots.stream().map(p -> p.toAbsolutePath().toString()).toList();
    }

    /**
     * Computes the {@code deterministicReplay} flag.
     *
     * @param config parsed CLI configuration
     * @return {@code true} when AI is disabled or an AI cache is configured
     */
    private static boolean computeDeterministicReplay(CliConfig config) {
        return !config.aiOptions().enabled() || config.aiCacheFile() != null;
    }

    /**
     * Produces the SHA-256 hex of the canonical {@code key=value} buffer.
     *
     * @param toolVersion        MethodAtlas tool version string
     * @param ai                 parsed AI options
     * @param overrideArtifact   override file artefact, or {@code null}
     * @param aiCacheArtifact    AI cache file artefact, or {@code null}
     * @param taxonomyArtifact   taxonomy file artefact, or {@code null}
     * @param builtInTaxonomy    built-in taxonomy mode name, or {@code null}
     * @param classificationPromptHash effective classification template hash, or {@code null}
     * @param triageAppendixPromptHash effective folded triage-appendix template hash, or {@code null}
     * @param dedicatedTriagePromptHash effective standalone triage template hash, or {@code null}
     * @return 64-character lowercase hex SHA-256
     */
    private static String computeConfigHash(String toolVersion, AiOptions ai,
            FileArtifact overrideArtifact, FileArtifact aiCacheArtifact,
            FileArtifact taxonomyArtifact, String builtInTaxonomy,
            String classificationPromptHash, String triageAppendixPromptHash,
            String dedicatedTriagePromptHash) {
        Map<String, String> keys = new TreeMap<>();
        keys.put(KEY_AI_CACHE_FILE_SHA, shaOrEmpty(aiCacheArtifact));
        keys.put(KEY_AI_MODEL, ai.enabled() && ai.modelName() != null ? ai.modelName() : EMPTY);
        keys.put(KEY_AI_PROVIDER, ai.enabled() ? ai.provider().name() : EMPTY);
        keys.put(KEY_BUILT_IN_TAXONOMY, builtInTaxonomy == null ? EMPTY : builtInTaxonomy);
        keys.put(KEY_CLASSIFICATION_PROMPT, classificationPromptHash == null ? EMPTY : classificationPromptHash);
        keys.put(KEY_DEDICATED_TRIAGE_PROMPT, dedicatedTriagePromptHash == null ? EMPTY : dedicatedTriagePromptHash);
        keys.put(KEY_METHOD_ATLAS_VERSION, toolVersion);
        keys.put(KEY_OVERRIDE_FILE_SHA, shaOrEmpty(overrideArtifact));
        keys.put(KEY_TAXONOMY_FILE_SHA, shaOrEmpty(taxonomyArtifact));
        keys.put(KEY_TRIAGE_APPENDIX_PROMPT, triageAppendixPromptHash == null ? EMPTY : triageAppendixPromptHash);

        StringBuilder canonical = new StringBuilder(CANONICAL_BUFFER_CAPACITY);
        keys.forEach((k, v) -> canonical.append(k).append(CONFIG_HASH_KV_SEP)
                .append(v).append(CONFIG_HASH_SEPARATOR));

        try {
            MessageDigest digest = MessageDigest.getInstance(SHA256_ALGO);
            byte[] bytes = digest.digest(canonical.toString().getBytes(StandardCharsets.UTF_8));
            return HexFormat.of().formatHex(bytes);
        } catch (NoSuchAlgorithmException e) {
            throw new IllegalStateException("SHA-256 not available", e);
        }
    }

    /**
     * Extracts the SHA-256 string from a {@link FileArtifact}, or returns the
     * empty-string sentinel when the artefact is absent.
     *
     * @param artifact artefact to inspect; may be {@code null}
     * @return SHA-256 hex or {@code ""}
     */
    private static String shaOrEmpty(FileArtifact artifact) {
        return artifact == null ? EMPTY : artifact.sha256();
    }
}