ContentHasher.java
// SPDX-License-Identifier: Apache-2.0
// Copyright 2026 Egothor
// Copyright 2026 Accenture
package org.egothor.methodatlas.command;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HexFormat;
import java.util.List;
/**
* Pure utility helpers for content fingerprints and scan-root path prefixes.
*
* <p>
* Two unrelated-but-small concerns live here because both are stateless,
* dependency-free, and named-focused. Pulling them into one
* single-responsibility utility class keeps each concern visible without
* the cost of constructor injection — both methods are pure functions
* mandated by their specifications (SHA-256 for hashing, OS-native
* relativisation for the prefix), so there is nothing to substitute:
* </p>
* <ul>
* <li>{@link #hashClass(String)} — computes a SHA-256 fingerprint of the
* canonical pretty-printed AST text of a class. This is the value
* exposed as {@code content_hash} in CSV / SARIF output and used as the
* cache key by {@link org.egothor.methodatlas.AiResultCache}.</li>
* <li>{@link #filePrefix(List)} — derives the forward-slashed path prefix
* used in GitHub Actions workflow annotations and SARIF location URIs,
* relativised to the current working directory so that paths resolve
* to inline positions in PR diffs.</li>
* </ul>
*
* <p>
* Both methods are pure functions and therefore exposed as {@code static}.
* Test code calls them directly with handcrafted inputs; no dependency
* injection is needed because there is nothing to substitute — the SHA-256
* implementation is mandated by the Java SE specification and the path
* relativisation has only one correct answer.
* </p>
*
* @see ScanOrchestrator
* @since 1.0.0
*/
public final class ContentHasher {
private ContentHasher() {
// Utility class; instantiation is prevented to make the static
// intent obvious to callers and to satisfy PMD.
}
/**
* Computes a SHA-256 content fingerprint of a class source string.
*
* <p>
* The input is expected to be the canonical AST text of the class — for
* Java this is the JavaParser pretty-printed form, which normalises
* whitespace and comments so that semantically equivalent classes that
* differ only in formatting produce identical hashes. The output is
* suitable for incremental scanning, AI-cache lookups, and audit
* traceability across two pipeline stages.
* </p>
*
* <p>
* Algorithm: SHA-256 (FIPS 180-4) applied to the UTF-8 bytes of
* {@code classSource}. Time complexity is {@code O(n)} in the source
* size. The result is a 64-character lowercase hexadecimal string.
* </p>
*
* @param classSource canonical pretty-printed form of the class
* declaration; must not be {@code null}
* @return 64-character lowercase hexadecimal SHA-256 digest; never
* {@code null}, never empty
* @throws IllegalStateException if SHA-256 is unavailable — never in
* practice, because SHA-256 is mandated by
* the Java SE specification
*/
public static String hashClass(String classSource) {
try {
MessageDigest digest = MessageDigest.getInstance("SHA-256");
byte[] bytes = digest.digest(classSource.getBytes(StandardCharsets.UTF_8));
return HexFormat.of().formatHex(bytes);
} catch (NoSuchAlgorithmException e) {
throw new IllegalStateException("SHA-256 not available", e);
}
}
/**
* Derives the forward-slashed path prefix used in GitHub Actions
* workflow annotations and SARIF location URIs.
*
* <p>
* The first configured scan root is relativised against the current
* working directory and converted to forward slashes. A trailing slash
* is appended unless the prefix is empty. The resulting string is
* concatenated with the per-method relative path to produce annotation
* paths that GitHub resolves to inline positions in the PR diff
* (for example {@code src/test/java/com/acme/AuthTest.java}).
* </p>
*
* <p>
* When {@code roots} is empty the returned prefix is the empty string,
* which produces unprefixed annotation paths — appropriate when no scan
* root was configured because the caller is operating on the current
* directory directly.
* </p>
*
* <p>
* On Windows, scan roots that resolve to a different drive than the
* current working directory cannot be relativised. The method falls
* back to the absolute path of the root in that case rather than
* throwing.
* </p>
*
* @param roots configured scan roots; must not be {@code null}; may be
* empty
* @return forward-slash path ending with {@code /}, or the empty string
* when {@code roots} is empty
*/
public static String filePrefix(List<Path> roots) {
if (roots.isEmpty()) {
return "";
}
Path root = roots.get(0).toAbsolutePath().normalize();
String prefix;
try {
Path cwd = Paths.get("").toAbsolutePath();
prefix = cwd.relativize(root).toString().replace('\\', '/');
} catch (IllegalArgumentException e) {
// Different drive on Windows — fall back to the absolute path.
prefix = root.toString().replace('\\', '/');
}
if (!prefix.isEmpty() && !prefix.endsWith("/")) {
prefix += "/";
}
return prefix;
}
}