ContentHasher.java

1
// SPDX-License-Identifier: Apache-2.0
2
// Copyright 2026 Egothor
3
// Copyright 2026 Accenture
4
package org.egothor.methodatlas.command;
5
6
import java.io.IOException;
7
import java.nio.charset.StandardCharsets;
8
import java.nio.file.Files;
9
import java.nio.file.Path;
10
import java.nio.file.Paths;
11
import java.security.MessageDigest;
12
import java.security.NoSuchAlgorithmException;
13
import java.util.HexFormat;
14
import java.util.List;
15
16
/**
17
 * Pure utility helpers for content fingerprints and scan-root path prefixes.
18
 *
19
 * <p>
20
 * Two unrelated-but-small concerns live here because both are stateless,
21
 * dependency-free, and named-focused. Pulling them into one
22
 * single-responsibility utility class keeps each concern visible without
23
 * the cost of constructor injection — both methods are pure functions
24
 * mandated by their specifications (SHA-256 for hashing, OS-native
25
 * relativisation for the prefix), so there is nothing to substitute:
26
 * </p>
27
 * <ul>
28
 *   <li>{@link #hashClass(String)} — computes a SHA-256 fingerprint of the
29
 *       canonical pretty-printed AST text of a class. This is the value
30
 *       exposed as {@code content_hash} in CSV / SARIF output and used as the
31
 *       cache key by {@link org.egothor.methodatlas.AiResultCache}.</li>
32
 *   <li>{@link #filePrefix(List)} — derives the forward-slashed path prefix
33
 *       used in GitHub Actions workflow annotations and SARIF location URIs,
34
 *       relativised to the current working directory so that paths resolve
35
 *       to inline positions in PR diffs.</li>
36
 * </ul>
37
 *
38
 * <p>
39
 * Both methods are pure functions and therefore exposed as {@code static}.
40
 * Test code calls them directly with handcrafted inputs; no dependency
41
 * injection is needed because there is nothing to substitute — the SHA-256
42
 * implementation is mandated by the Java SE specification and the path
43
 * relativisation has only one correct answer.
44
 * </p>
45
 *
46
 * @see ScanOrchestrator
47
 * @since 1.0.0
48
 */
49
public final class ContentHasher {
50
51
    private ContentHasher() {
52
        // Utility class; instantiation is prevented to make the static
53
        // intent obvious to callers and to satisfy PMD.
54
    }
55
56
    /**
57
     * Computes a SHA-256 content fingerprint of a class source string.
58
     *
59
     * <p>
60
     * The input is expected to be the canonical AST text of the class — for
61
     * Java this is the JavaParser pretty-printed form, which normalises
62
     * whitespace and comments so that semantically equivalent classes that
63
     * differ only in formatting produce identical hashes. The output is
64
     * suitable for incremental scanning, AI-cache lookups, and audit
65
     * traceability across two pipeline stages.
66
     * </p>
67
     *
68
     * <p>
69
     * Algorithm: SHA-256 (FIPS 180-4) applied to the UTF-8 bytes of
70
     * {@code classSource}. Time complexity is {@code O(n)} in the source
71
     * size. The result is a 64-character lowercase hexadecimal string.
72
     * </p>
73
     *
74
     * @param classSource canonical pretty-printed form of the class
75
     *                    declaration; must not be {@code null}
76
     * @return 64-character lowercase hexadecimal SHA-256 digest; never
77
     *         {@code null}, never empty
78
     * @throws IllegalStateException if SHA-256 is unavailable — never in
79
     *                               practice, because SHA-256 is mandated by
80
     *                               the Java SE specification
81
     */
82
    public static String hashClass(String classSource) {
83
        try {
84
            MessageDigest digest = MessageDigest.getInstance("SHA-256");
85
            byte[] bytes = digest.digest(classSource.getBytes(StandardCharsets.UTF_8));
86 1 1. hashClass : replaced return value with "" for org/egothor/methodatlas/command/ContentHasher::hashClass → KILLED
            return HexFormat.of().formatHex(bytes);
87
        } catch (NoSuchAlgorithmException e) {
88
            throw new IllegalStateException("SHA-256 not available", e);
89
        }
90
    }
91
92
    /**
93
     * Computes a SHA-256 content fingerprint of the raw bytes of a file.
94
     *
95
     * <p>
96
     * The algorithm and output format are identical to
97
     * {@link #hashClass(String)} — SHA-256 (FIPS 180-4) followed by lowercase
98
     * hexadecimal encoding via {@link HexFormat#of()}. The only difference is
99
     * the input source: this overload reads bytes from disk verbatim rather
100
     * than taking an in-memory canonical class source string. Use it to
101
     * fingerprint configuration artefacts such as override YAML files,
102
     * taxonomy files, and AI cache CSVs whose semantic identity is the
103
     * entire file contents, not a parsed/normalised view.
104
     * </p>
105
     *
106
     * @param file path to the file to fingerprint; must not be {@code null}
107
     *             and must point to a readable regular file
108
     * @return 64-character lowercase hexadecimal SHA-256 digest; never
109
     *         {@code null}, never empty
110
     * @throws IOException           if {@code file} cannot be read
111
     * @throws IllegalStateException if SHA-256 is unavailable — never in
112
     *                               practice, because SHA-256 is mandated by
113
     *                               the Java SE specification
114
     */
115
    public static String hashFile(Path file) throws IOException {
116
        byte[] bytes = Files.readAllBytes(file);
117
        try {
118
            MessageDigest digest = MessageDigest.getInstance("SHA-256");
119 1 1. hashFile : replaced return value with "" for org/egothor/methodatlas/command/ContentHasher::hashFile → KILLED
            return HexFormat.of().formatHex(digest.digest(bytes));
120
        } catch (NoSuchAlgorithmException e) {
121
            throw new IllegalStateException("SHA-256 not available", e);
122
        }
123
    }
124
125
    /**
126
     * Derives the forward-slashed path prefix used in GitHub Actions
127
     * workflow annotations and SARIF location URIs.
128
     *
129
     * <p>
130
     * The first configured scan root is relativised against the current
131
     * working directory and converted to forward slashes. A trailing slash
132
     * is appended unless the prefix is empty. The resulting string is
133
     * concatenated with the per-method relative path to produce annotation
134
     * paths that GitHub resolves to inline positions in the PR diff
135
     * (for example {@code src/test/java/com/acme/AuthTest.java}).
136
     * </p>
137
     *
138
     * <p>
139
     * When {@code roots} is empty the returned prefix is the empty string,
140
     * which produces unprefixed annotation paths — appropriate when no scan
141
     * root was configured because the caller is operating on the current
142
     * directory directly.
143
     * </p>
144
     *
145
     * <p>
146
     * On Windows, scan roots that resolve to a different drive than the
147
     * current working directory cannot be relativised. The method falls
148
     * back to the absolute path of the root in that case rather than
149
     * throwing.
150
     * </p>
151
     *
152
     * @param roots configured scan roots; must not be {@code null}; may be
153
     *              empty
154
     * @return forward-slash path ending with {@code /}, or the empty string
155
     *         when {@code roots} is empty
156
     */
157
    public static String filePrefix(List<Path> roots) {
158 2 1. filePrefix : removed conditional - replaced equality check with false → KILLED
2. filePrefix : removed conditional - replaced equality check with true → KILLED
        if (roots.isEmpty()) {
159
            return "";
160
        }
161
        Path root = roots.get(0).toAbsolutePath().normalize();
162
        String prefix;
163
        try {
164
            Path cwd = Paths.get("").toAbsolutePath();
165
            prefix = cwd.relativize(root).toString().replace('\\', '/');
166
        } catch (IllegalArgumentException e) {
167
            // Different drive on Windows — fall back to the absolute path.
168
            prefix = root.toString().replace('\\', '/');
169
        }
170 4 1. filePrefix : removed conditional - replaced equality check with true → SURVIVED
2. filePrefix : removed conditional - replaced equality check with true → SURVIVED
3. filePrefix : removed conditional - replaced equality check with false → KILLED
4. filePrefix : removed conditional - replaced equality check with false → KILLED
        if (!prefix.isEmpty() && !prefix.endsWith("/")) {
171
            prefix += "/";
172
        }
173 1 1. filePrefix : replaced return value with "" for org/egothor/methodatlas/command/ContentHasher::filePrefix → KILLED
        return prefix;
174
    }
175
}

Mutations

86

1.1
Location : hashClass
Killed by : org.egothor.methodatlas.command.ContentHasherTest.[engine:junit-jupiter]/[class:org.egothor.methodatlas.command.ContentHasherTest]/[method:hashClass_returnsLowercaseHex64Characters()]
replaced return value with "" for org/egothor/methodatlas/command/ContentHasher::hashClass → KILLED

119

1.1
Location : hashFile
Killed by : org.egothor.methodatlas.command.ContentHasherTest.[engine:junit-jupiter]/[class:org.egothor.methodatlas.command.ContentHasherTest]/[method:hashFile_emptyFileMatchesPublishedShaOfEmptyInput(java.nio.file.Path)]
replaced return value with "" for org/egothor/methodatlas/command/ContentHasher::hashFile → KILLED

158

1.1
Location : filePrefix
Killed by : org.egothor.methodatlas.command.ContentHasherTest.[engine:junit-jupiter]/[class:org.egothor.methodatlas.command.ContentHasherTest]/[method:filePrefix_emptyList_returnsEmptyString()]
removed conditional - replaced equality check with false → KILLED

2.2
Location : filePrefix
Killed by : org.egothor.methodatlas.command.ContentHasherTest.[engine:junit-jupiter]/[class:org.egothor.methodatlas.command.ContentHasherTest]/[method:filePrefix_relativeRoot_endsWithSlash(java.nio.file.Path)]
removed conditional - replaced equality check with true → KILLED

170

1.1
Location : filePrefix
Killed by : org.egothor.methodatlas.command.ContentHasherTest.[engine:junit-jupiter]/[class:org.egothor.methodatlas.command.ContentHasherTest]/[method:filePrefix_relativeRoot_endsWithSlash(java.nio.file.Path)]
removed conditional - replaced equality check with false → KILLED

2.2
Location : filePrefix
Killed by : org.egothor.methodatlas.command.ContentHasherTest.[engine:junit-jupiter]/[class:org.egothor.methodatlas.command.ContentHasherTest]/[method:filePrefix_relativeRoot_endsWithSlash(java.nio.file.Path)]
removed conditional - replaced equality check with false → KILLED

3.3
Location : filePrefix
Killed by : none
removed conditional - replaced equality check with true → SURVIVED
Covering tests

4.4
Location : filePrefix
Killed by : none
removed conditional - replaced equality check with true → SURVIVED Covering tests

173

1.1
Location : filePrefix
Killed by : org.egothor.methodatlas.command.ContentHasherTest.[engine:junit-jupiter]/[class:org.egothor.methodatlas.command.ContentHasherTest]/[method:filePrefix_relativeRoot_endsWithSlash(java.nio.file.Path)]
replaced return value with "" for org/egothor/methodatlas/command/ContentHasher::filePrefix → KILLED

Active mutators

Tests examined


Report generated by PIT 1.22.1